mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-09 19:58:29 +00:00
commit
eb1c5e86fe
389 changed files with 10041 additions and 7739 deletions
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
|
@ -2,4 +2,4 @@
|
|||
|
||||
# These owners will be the default owners for everything in
|
||||
# the repo. Unless a later match takes precedence,
|
||||
* @ashwinb @yanxi0830 @hardikjshah @dltn @raghotham @dineshyv @vladimirivic @sixianyi0721
|
||||
* @ashwinb @yanxi0830 @hardikjshah @dltn @raghotham @dineshyv @vladimirivic @sixianyi0721 @ehhuang @terrytangyuan
|
||||
|
|
2
.github/ISSUE_TEMPLATE/bug.yml
vendored
2
.github/ISSUE_TEMPLATE/bug.yml
vendored
|
@ -1,6 +1,6 @@
|
|||
name: 🐛 Bug Report
|
||||
description: Create a report to help us reproduce and fix the bug
|
||||
|
||||
labels: ["bug"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
|
|
12
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
12
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
blank_issues_enabled: false
|
||||
|
||||
contact_links:
|
||||
- name: Have you read the docs?
|
||||
url: https://llama-stack.readthedocs.io/en/latest/index.html
|
||||
about: Much help can be found in the docs
|
||||
- name: Start a discussion
|
||||
url: https://github.com/meta-llama/llama-stack/discussions/new
|
||||
about: Start a discussion on a topic
|
||||
- name: Chat on Discord
|
||||
url: https://discord.gg/llama-stack
|
||||
about: Maybe chatting with the community can help
|
2
.github/ISSUE_TEMPLATE/feature-request.yml
vendored
2
.github/ISSUE_TEMPLATE/feature-request.yml
vendored
|
@ -1,6 +1,6 @@
|
|||
name: 🚀 Feature request
|
||||
description: Request a new llama-stack feature
|
||||
|
||||
labels: ["enhancement"]
|
||||
body:
|
||||
- type: textarea
|
||||
id: feature-pitch
|
||||
|
|
27
.github/PULL_REQUEST_TEMPLATE.md
vendored
27
.github/PULL_REQUEST_TEMPLATE.md
vendored
|
@ -1,27 +1,10 @@
|
|||
# What does this PR do?
|
||||
[Provide a short summary of what this PR does and why. Link to relevant issues if applicable.]
|
||||
|
||||
In short, provide a summary of what this PR does and why. Usually, the relevant context should be present in a linked issue.
|
||||
|
||||
- [ ] Addresses issue (#issue)
|
||||
|
||||
[//]: # (If resolving an issue, uncomment and update the line below)
|
||||
[//]: # (Closes #[issue-number])
|
||||
|
||||
## Test Plan
|
||||
[Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*]
|
||||
|
||||
Please describe:
|
||||
- tests you ran to verify your changes with result summaries.
|
||||
- provide instructions so it can be reproduced.
|
||||
|
||||
|
||||
## Sources
|
||||
|
||||
Please link relevant resources if necessary.
|
||||
|
||||
|
||||
## Before submitting
|
||||
|
||||
- [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case).
|
||||
- [ ] Ran pre-commit to handle lint / formatting issues.
|
||||
- [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md),
|
||||
Pull Request section?
|
||||
- [ ] Updated relevant documentation.
|
||||
- [ ] Wrote necessary unit or integration tests.
|
||||
[//]: # (## Documentation)
|
||||
|
|
10
.github/workflows/pre-commit.yml
vendored
10
.github/workflows/pre-commit.yml
vendored
|
@ -11,10 +11,10 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: pip
|
||||
|
@ -22,4 +22,8 @@ jobs:
|
|||
**/requirements*.txt
|
||||
.pre-commit-config.yaml
|
||||
|
||||
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd #v3.0.1
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
|
||||
- name: Verify if there are any diff files after pre-commit
|
||||
run: |
|
||||
git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
|
||||
|
|
148
.github/workflows/publish-to-docker.yml
vendored
148
.github/workflows/publish-to-docker.yml
vendored
|
@ -1,148 +0,0 @@
|
|||
name: Docker Build and Publish
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'TestPyPI or PyPI version to build (e.g., 0.0.63.dev20250114)'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
|
||||
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
|
||||
TAVILY_SEARCH_API_KEY: ${{ secrets.TAVILY_SEARCH_API_KEY }}
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Set version
|
||||
id: version
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "push" ]; then
|
||||
echo "VERSION=0.0.63.dev51206766" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "VERSION=${{ inputs.version }}" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Check package version availability
|
||||
run: |
|
||||
# Function to check if version exists in a repository
|
||||
check_version() {
|
||||
local repo=$1
|
||||
local VERSION_TO_CHECK=${{ steps.version.outputs.version }}
|
||||
echo "Checking version $VERSION_TO_CHECK in $repo"
|
||||
result=$(curl -s "https://$repo.org/pypi/llama-stack/json" | jq --arg v "$VERSION_TO_CHECK" '.releases | has($v)')
|
||||
echo "Result: $result"
|
||||
return $([ "$result" = "true" ])
|
||||
}
|
||||
|
||||
# Check TestPyPI first, then PyPI
|
||||
if check_version "test.pypi"; then
|
||||
echo "Version ${{ steps.version.outputs.version }} found in TestPyPI"
|
||||
echo "PYPI_SOURCE=testpypi" >> $GITHUB_ENV
|
||||
elif check_version "pypi"; then
|
||||
echo "Version ${{ steps.version.outputs.version }} found in PyPI"
|
||||
echo "PYPI_SOURCE=pypi" >> $GITHUB_ENV
|
||||
else
|
||||
echo "Error: Version ${{ steps.version.outputs.version }} not found in either TestPyPI or PyPI"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Install llama-stack
|
||||
run: |
|
||||
echo "PYPI_SOURCE=${PYPI_SOURCE}"
|
||||
if [ "${{ github.event_name }}" = "push" ]; then
|
||||
pip install -e .
|
||||
else
|
||||
if [ "$PYPI_SOURCE" = "testpypi" ]; then
|
||||
pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple llama-stack==${{ steps.version.outputs.version }}
|
||||
else
|
||||
pip install llama-stack==${{ steps.version.outputs.version }}
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Build docker image
|
||||
run: |
|
||||
echo "PYPI_SOURCE=${PYPI_SOURCE}"
|
||||
echo "VERSION=${{ steps.version.outputs.version }}"
|
||||
TEMPLATES=("ollama" "bedrock" "remote-vllm" "fireworks" "together" "tgi" "meta-reference-gpu")
|
||||
for template in "${TEMPLATES[@]}"; do
|
||||
if [ "$PYPI_SOURCE" = "testpypi" ]; then
|
||||
TEST_PYPI_VERSION=${{ steps.version.outputs.version }} llama stack build --template $template --image-type container
|
||||
else
|
||||
PYPI_VERSION=${{ steps.version.outputs.version }} llama stack build --template $template --image-type container
|
||||
fi
|
||||
done
|
||||
|
||||
- name: List docker images
|
||||
run: |
|
||||
docker images
|
||||
|
||||
# TODO (xiyan): make the following 2 steps into a matrix and test all templates other than fireworks
|
||||
- name: Start up built docker image
|
||||
run: |
|
||||
cd distributions/fireworks
|
||||
if [ "$PYPI_SOURCE" = "testpypi" ]; then
|
||||
sed -i 's|image: llamastack/distribution-fireworks|image: distribution-fireworks:test-${{ steps.version.outputs.version }}|' ./compose.yaml
|
||||
else
|
||||
sed -i 's|image: llamastack/distribution-fireworks|image: distribution-fireworks:${{ steps.version.outputs.version }}|' ./compose.yaml
|
||||
fi
|
||||
docker compose up -d
|
||||
cd ..
|
||||
# Wait for the container to start
|
||||
timeout=300
|
||||
while ! curl -s -f http://localhost:8321/v1/version > /dev/null && [ $timeout -gt 0 ]; do
|
||||
echo "Waiting for endpoint to be available..."
|
||||
sleep 5
|
||||
timeout=$((timeout - 5))
|
||||
done
|
||||
|
||||
if [ $timeout -le 0 ]; then
|
||||
echo "Timeout waiting for endpoint to become available"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Run simple models list test on docker server
|
||||
run: |
|
||||
curl http://localhost:8321/v1/models
|
||||
|
||||
# TODO (xiyan): figure out why client cannot find server but curl works
|
||||
# - name: Run pytest on docker server
|
||||
# run: |
|
||||
# pip install pytest pytest-md-report
|
||||
# export LLAMA_STACK_BASE_URL="http://localhost:8321"
|
||||
# LLAMA_STACK_BASE_URL="http://localhost:8321" pytest -v tests/client-sdk/inference/test_inference.py --md-report --md-report-verbose=1
|
||||
|
||||
- name: Push to dockerhub
|
||||
run: |
|
||||
echo "PYPI_SOURCE=${PYPI_SOURCE}"
|
||||
echo "VERSION=${{ steps.version.outputs.version }}"
|
||||
TEMPLATES=("ollama" "bedrock" "remote-vllm" "fireworks" "together" "tgi" "meta-reference-gpu")
|
||||
for template in "${TEMPLATES[@]}"; do
|
||||
if [ "$PYPI_SOURCE" = "testpypi" ]; then
|
||||
docker tag distribution-$template:test-${{ steps.version.outputs.version }} llamastack/distribution-$template:test-${{ steps.version.outputs.version }}
|
||||
docker push llamastack/distribution-$template:test-${{ steps.version.outputs.version }}
|
||||
else
|
||||
docker tag distribution-$template:${{ steps.version.outputs.version }} llamastack/distribution-$template:${{ steps.version.outputs.version }}
|
||||
docker tag distribution-$template:${{ steps.version.outputs.version }} llamastack/distribution-$template:latest
|
||||
docker push llamastack/distribution-$template:${{ steps.version.outputs.version }}
|
||||
docker push llamastack/distribution-$template:latest
|
||||
fi
|
||||
done
|
244
.github/workflows/publish-to-test-pypi.yml
vendored
244
.github/workflows/publish-to-test-pypi.yml
vendored
|
@ -1,244 +0,0 @@
|
|||
name: Publish Python 🐍 distribution 📦 to TestPyPI
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Keep manual trigger
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version number (e.g. 0.0.63.dev20250111)'
|
||||
required: true
|
||||
type: string
|
||||
schedule:
|
||||
- cron: "0 0 * * *" # Run every day at midnight
|
||||
|
||||
jobs:
|
||||
trigger-client-and-models-build:
|
||||
name: Trigger llama-stack-client and llama-models build
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
version: ${{ steps.version.outputs.version }}
|
||||
client_run_id: ${{ steps.trigger-client.outputs.workflow_id }}
|
||||
model_run_id: ${{ steps.trigger-models.outputs.workflow_id }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Get date
|
||||
id: date
|
||||
run: echo "date=$(date +'%Y%m%d')" >> $GITHUB_OUTPUT
|
||||
- name: Compute version based on dispatch event
|
||||
id: version
|
||||
run: |
|
||||
# Read base version from pyproject.toml
|
||||
version=$(sed -n 's/.*version="\([^"]*\)".*/\1/p' setup.py)
|
||||
if [ "${{ github.event_name }}" = "schedule" ]; then
|
||||
echo "version=${version}.dev${{ steps.date.outputs.date }}" >> $GITHUB_OUTPUT
|
||||
elif [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
echo "version=${{ inputs.version }}" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "version=${version}.dev$(shuf -i 10000000-99999999 -n 1)" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
- name: Trigger llama-stack-client workflow
|
||||
id: trigger-client
|
||||
run: |
|
||||
response=$(curl -X POST https://api.github.com/repos/meta-llama/llama-stack-client-python/dispatches \
|
||||
-H 'Accept: application/vnd.github.everest-preview+json' \
|
||||
-H "authorization: Bearer ${{ secrets.PAT_TOKEN }}" \
|
||||
--data "{\"event_type\": \"build-client-package\", \"client_payload\": {\"source\": \"llama-stack-nightly\", \"version\": \"${{ steps.version.outputs.version }}\"}}" \
|
||||
-w "\n%{http_code}")
|
||||
|
||||
http_code=$(echo "$response" | tail -n1)
|
||||
if [ "$http_code" != "204" ]; then
|
||||
echo "Failed to trigger client workflow"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get the run ID of the triggered workflow
|
||||
sleep 5 # Wait for workflow to be created
|
||||
run_id=$(curl -s -H "authorization: Bearer ${{ secrets.PAT_TOKEN }}" \
|
||||
"https://api.github.com/repos/meta-llama/llama-stack-client-python/actions/runs?event=repository_dispatch" \
|
||||
| jq '.workflow_runs[0].id')
|
||||
echo "workflow_id=$run_id" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Trigger llama-models workflow
|
||||
id: trigger-models
|
||||
run: |
|
||||
response=$(curl -X POST https://api.github.com/repos/meta-llama/llama-models/dispatches \
|
||||
-H 'Accept: application/vnd.github.everest-preview+json' \
|
||||
-H "authorization: Bearer ${{ secrets.PAT_TOKEN }}" \
|
||||
--data "{\"event_type\": \"build-models-package\", \"client_payload\": {\"source\": \"llama-stack-nightly\", \"version\": \"${{ steps.version.outputs.version }}\"}}" \
|
||||
-w "\n%{http_code}")
|
||||
|
||||
http_code=$(echo "$response" | tail -n1)
|
||||
if [ "$http_code" != "204" ]; then
|
||||
echo "Failed to trigger models workflow"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get the run ID of the triggered workflow
|
||||
sleep 5 # Wait for workflow to be created
|
||||
run_id=$(curl -s -H "authorization: Bearer ${{ secrets.PAT_TOKEN }}" \
|
||||
"https://api.github.com/repos/meta-llama/llama-models/actions/runs?event=repository_dispatch" \
|
||||
| jq '.workflow_runs[0].id')
|
||||
echo "workflow_id=$run_id" >> $GITHUB_OUTPUT
|
||||
|
||||
wait-for-workflows:
|
||||
name: Wait for triggered workflows
|
||||
needs: trigger-client-and-models-build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Wait for client workflow
|
||||
run: |
|
||||
while true; do
|
||||
status=$(curl -s -H "authorization: Bearer ${{ secrets.PAT_TOKEN }}" \
|
||||
"https://api.github.com/repos/meta-llama/llama-stack-client-python/actions/runs/${{ needs.trigger-client-and-models-build.outputs.client_run_id }}" \
|
||||
| jq -r '.status')
|
||||
conclusion=$(curl -s -H "authorization: Bearer ${{ secrets.PAT_TOKEN }}" \
|
||||
"https://api.github.com/repos/meta-llama/llama-stack-client-python/actions/runs/${{ needs.trigger-client-and-models-build.outputs.client_run_id }}" \
|
||||
| jq -r '.conclusion')
|
||||
|
||||
echo "llama-stack-client-python workflow status: $status, conclusion: $conclusion"
|
||||
|
||||
if [ "$status" = "completed" ]; then
|
||||
if [ "$conclusion" != "success" ]; then
|
||||
echo "llama-stack-client-python workflow failed"
|
||||
exit 1
|
||||
fi
|
||||
break
|
||||
fi
|
||||
|
||||
sleep 10
|
||||
done
|
||||
|
||||
- name: Wait for models workflow
|
||||
run: |
|
||||
while true; do
|
||||
status=$(curl -s -H "authorization: Bearer ${{ secrets.PAT_TOKEN }}" \
|
||||
"https://api.github.com/repos/meta-llama/llama-models/actions/runs/${{ needs.trigger-client-and-models-build.outputs.model_run_id }}" \
|
||||
| jq -r '.status')
|
||||
conclusion=$(curl -s -H "authorization: Bearer ${{ secrets.PAT_TOKEN }}" \
|
||||
"https://api.github.com/repos/meta-llama/llama-models/actions/runs/${{ needs.trigger-client-and-models-build.outputs.model_run_id }}" \
|
||||
| jq -r '.conclusion')
|
||||
|
||||
echo "llama-models workflow status: $status, conclusion: $conclusion"
|
||||
|
||||
if [ "$status" = "completed" ]; then
|
||||
if [ "$conclusion" != "success" ]; then
|
||||
echo "llama-models workflow failed"
|
||||
exit 1
|
||||
fi
|
||||
break
|
||||
fi
|
||||
|
||||
sleep 10
|
||||
done
|
||||
|
||||
build:
|
||||
name: Build distribution 📦
|
||||
needs:
|
||||
- wait-for-workflows
|
||||
- trigger-client-and-models-build
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Get date
|
||||
id: date
|
||||
run: echo "date=$(date +'%Y%m%d')" >> $GITHUB_OUTPUT
|
||||
- name: Update version for nightly
|
||||
run: |
|
||||
sed -i 's/version="\([^"]*\)"/version="${{ needs.trigger-client-and-models-build.outputs.version }}"/' setup.py
|
||||
sed -i 's/llama-stack-client>=\([^"]*\)/llama-stack-client==${{ needs.trigger-client-and-models-build.outputs.version }}/' requirements.txt
|
||||
sed -i 's/llama-models>=\([^"]*\)/llama-models==${{ needs.trigger-client-and-models-build.outputs.version }}/' requirements.txt
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Install pypa/build
|
||||
run: >-
|
||||
python3 -m
|
||||
pip install
|
||||
build
|
||||
--user
|
||||
- name: Build a binary wheel and a source tarball
|
||||
run: python3 -m build
|
||||
- name: Store the distribution packages
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
publish-to-testpypi:
|
||||
name: Publish Python 🐍 distribution 📦 to TestPyPI
|
||||
needs:
|
||||
- build
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
environment:
|
||||
name: testrelease
|
||||
url: https://test.pypi.org/p/llama-stack
|
||||
|
||||
permissions:
|
||||
id-token: write # IMPORTANT: mandatory for trusted publishing
|
||||
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
- name: Publish distribution 📦 to TestPyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
repository-url: https://test.pypi.org/legacy/
|
||||
|
||||
test-published-package:
|
||||
name: Test published package
|
||||
needs:
|
||||
- publish-to-testpypi
|
||||
- trigger-client-and-models-build
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
|
||||
TAVILY_SEARCH_API_KEY: ${{ secrets.TAVILY_SEARCH_API_KEY }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Install the package
|
||||
run: |
|
||||
max_attempts=6
|
||||
attempt=1
|
||||
while [ $attempt -le $max_attempts ]; do
|
||||
echo "Attempt $attempt of $max_attempts to install package..."
|
||||
if pip install --no-cache --index-url https://pypi.org/simple/ --extra-index-url https://test.pypi.org/simple/ llama-stack==${{ needs.trigger-client-and-models-build.outputs.version }}; then
|
||||
echo "Package installed successfully"
|
||||
break
|
||||
fi
|
||||
if [ $attempt -ge $max_attempts ]; then
|
||||
echo "Failed to install package after $max_attempts attempts"
|
||||
exit 1
|
||||
fi
|
||||
attempt=$((attempt + 1))
|
||||
sleep 10
|
||||
done
|
||||
- name: Test the package versions
|
||||
run: |
|
||||
pip list | grep llama_
|
||||
- name: Test CLI commands
|
||||
run: |
|
||||
llama model list
|
||||
llama stack build --list-templates
|
||||
llama model prompt-format -m Llama3.2-11B-Vision-Instruct
|
||||
llama stack list-apis
|
||||
llama stack list-providers inference
|
||||
llama stack list-providers telemetry
|
||||
- name: Test Notebook
|
||||
run: |
|
||||
pip install pytest nbval
|
||||
llama stack build --template together --image-type venv
|
||||
pytest -v -s --nbval-lax ./docs/getting_started.ipynb
|
||||
pytest -v -s --nbval-lax ./docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
|
||||
|
||||
# TODO: add trigger for integration test workflow & docker builds
|
21
.github/workflows/semantic-pr.yml
vendored
Normal file
21
.github/workflows/semantic-pr.yml
vendored
Normal file
|
@ -0,0 +1,21 @@
|
|||
name: Check semantic PR titles
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types:
|
||||
- opened
|
||||
- edited
|
||||
- reopened
|
||||
- synchronize
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
title-check:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check PR Title's semantic conformance
|
||||
uses: amannn/action-semantic-pull-request@v5
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
69
.github/workflows/tests.yml
vendored
Normal file
69
.github/workflows/tests.yml
vendored
Normal file
|
@ -0,0 +1,69 @@
|
|||
name: auto-tests
|
||||
|
||||
on:
|
||||
# pull_request:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
commit_sha:
|
||||
description: 'Specific Commit SHA to trigger on'
|
||||
required: false
|
||||
default: $GITHUB_SHA # default to the last commit of $GITHUB_REF branch
|
||||
|
||||
jobs:
|
||||
test-llama-stack-as-library:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
|
||||
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
|
||||
TAVILY_SEARCH_API_KEY: ${{ secrets.TAVILY_SEARCH_API_KEY }}
|
||||
strategy:
|
||||
matrix:
|
||||
provider: [fireworks, together]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.commit_sha }}
|
||||
|
||||
- name: Echo commit SHA
|
||||
run: |
|
||||
echo "Triggered on commit SHA: ${{ github.event.inputs.commit_sha }}"
|
||||
git rev-parse HEAD
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt pytest
|
||||
pip install -e .
|
||||
|
||||
- name: Build providers
|
||||
run: |
|
||||
llama stack build --template ${{ matrix.provider }} --image-type venv
|
||||
|
||||
- name: Install the latest llama-stack-client & llama-models packages
|
||||
run: |
|
||||
pip install -e git+https://github.com/meta-llama/llama-stack-client-python.git#egg=llama-stack-client
|
||||
pip install -e git+https://github.com/meta-llama/llama-models.git#egg=llama-models
|
||||
|
||||
- name: Run client-sdk test
|
||||
working-directory: "${{ github.workspace }}"
|
||||
env:
|
||||
REPORT_OUTPUT: md_report.md
|
||||
shell: bash
|
||||
run: |
|
||||
pip install --upgrade pytest-md-report
|
||||
echo "REPORT_FILE=${REPORT_OUTPUT}" >> "$GITHUB_ENV"
|
||||
|
||||
export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||
LLAMA_STACK_CONFIG=./llama_stack/templates/${{ matrix.provider }}/run.yaml pytest --md-report --md-report-verbose=1 ./tests/client-sdk/inference/ --md-report-output "$REPORT_OUTPUT"
|
||||
|
||||
- name: Output reports to the job summary
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
if [ -f "$REPORT_FILE" ]; then
|
||||
echo "<details><summary> Test Report for ${{ matrix.provider }} </summary>" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
cat "$REPORT_FILE" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "</details>" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
40
.github/workflows/update-readthedocs.yml
vendored
Normal file
40
.github/workflows/update-readthedocs.yml
vendored
Normal file
|
@ -0,0 +1,40 @@
|
|||
name: Update ReadTheDocs
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
branch:
|
||||
description: 'RTD version to update'
|
||||
required: false
|
||||
default: 'latest'
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- 'docs/source/**'
|
||||
- 'docs/resources/**'
|
||||
- '.github/workflows/update-readthedocs.yml'
|
||||
|
||||
jobs:
|
||||
update-readthedocs:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
TOKEN: ${{ secrets.READTHEDOCS_TOKEN }}
|
||||
steps:
|
||||
- name: Trigger ReadTheDocs build
|
||||
run: |
|
||||
if [ -z "$TOKEN" ]; then
|
||||
echo "READTHEDOCS_TOKEN is not set"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
response=$(curl -X POST \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"token\": \"$TOKEN\"}" \
|
||||
https://readthedocs.org/api/v2/webhook/llama-stack/289768/)
|
||||
|
||||
echo "Response: $response"
|
||||
if [ $(echo $response | jq -r '.build_triggered') != 'true' ]; then
|
||||
echo "Failed to trigger ReadTheDocs build"
|
||||
exit 1
|
||||
fi
|
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -19,3 +19,4 @@ Package.resolved
|
|||
_build
|
||||
docs/src
|
||||
pyrightconfig.json
|
||||
venv/
|
||||
|
|
|
@ -5,10 +5,8 @@ default_language_version:
|
|||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: 6306a48f7dae5861702d573c9c247e4e9498e867
|
||||
rev: v5.0.0 # Latest stable version
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: check-ast
|
||||
- id: check-merge-conflict
|
||||
- id: check-added-large-files
|
||||
args: ['--maxkb=1000']
|
||||
|
@ -28,23 +26,41 @@ repos:
|
|||
- --license-filepath
|
||||
- docs/license_header.txt
|
||||
|
||||
- repo: https://github.com/pycqa/flake8
|
||||
rev: 34cbf8ef3950f43d09b85e2e45c15ae5717dc37b
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.9.4
|
||||
hooks:
|
||||
- id: flake8
|
||||
additional_dependencies:
|
||||
- flake8-bugbear == 22.4.25
|
||||
- pep8-naming == 0.12.1
|
||||
- torchfix
|
||||
args: ['--config=.flake8']
|
||||
# Run the linter with import sorting.
|
||||
- id: ruff
|
||||
args: [
|
||||
--fix,
|
||||
--exit-non-zero-on-fix,
|
||||
--select, I,
|
||||
]
|
||||
- id: ruff-format
|
||||
|
||||
- repo: https://github.com/omnilib/ufmt
|
||||
rev: v2.7.0
|
||||
- repo: https://github.com/adamchainz/blacken-docs
|
||||
rev: 1.19.0
|
||||
hooks:
|
||||
- id: ufmt
|
||||
- id: blacken-docs
|
||||
additional_dependencies:
|
||||
- black == 24.4.2
|
||||
- usort == 1.0.8
|
||||
- black==24.3.0
|
||||
|
||||
- repo: https://github.com/astral-sh/uv-pre-commit
|
||||
rev: 0.5.26
|
||||
hooks:
|
||||
- id: uv-export
|
||||
args: ["--frozen", "--no-hashes", "--no-emit-project"]
|
||||
- id: uv-sync
|
||||
|
||||
# - repo: https://github.com/pre-commit/mirrors-mypy
|
||||
# rev: v1.14.0
|
||||
# hooks:
|
||||
# - id: mypy
|
||||
# additional_dependencies:
|
||||
# - types-requests
|
||||
# - types-setuptools
|
||||
# - pydantic
|
||||
# args: [--ignore-missing-imports]
|
||||
|
||||
# - repo: https://github.com/jsh9/pydoclint
|
||||
# rev: d88180a8632bb1602a4d81344085cf320f288c5a
|
||||
|
@ -71,3 +87,7 @@ repos:
|
|||
# require_serial: true
|
||||
# files: ^llama_stack/templates/.*$
|
||||
# stages: [manual]
|
||||
|
||||
ci:
|
||||
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
|
||||
autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
[flake8]
|
||||
# Suggested config from pytorch that we can adapt
|
||||
select = B,C,E,F,N,P,T4,W,B9,TOR0,TOR1,TOR2
|
||||
max-line-length = 120
|
||||
lint.select = ["B", "C", "E" , "F" , "N", "W", "B9"]
|
||||
|
||||
line-length = 120
|
||||
|
||||
# C408 ignored because we like the dict keyword argument syntax
|
||||
# E501 is not flexible enough, we're using B950 instead
|
||||
# N812 ignored because import torch.nn.functional as F is PyTorch convention
|
||||
|
@ -9,23 +10,28 @@ max-line-length = 120
|
|||
# E731 allow usage of assigning lambda expressions
|
||||
# E701 let black auto-format statements on one line
|
||||
# E704 let black auto-format statements on one line
|
||||
ignore =
|
||||
E203,E305,E402,E501,E721,E741,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,N812,N817,E731,E701,E704
|
||||
lint.ignore = [
|
||||
"E203", "E305", "E402", "E501", "E721", "E741", "F405", "F821", "F841",
|
||||
"C408", "E302", "W291", "E303", "N812", "N817", "E731", "E701",
|
||||
# These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later.
|
||||
"C901", "C405", "C414", "N803", "N999", "C403", "C416", "B028", "C419", "C401", "B023",
|
||||
# shebang has extra meaning in fbcode lints, so I think it's not worth trying
|
||||
# to line this up with executable bit
|
||||
EXE001,
|
||||
"EXE001",
|
||||
# random naming hints don't need
|
||||
N802,
|
||||
"N802",
|
||||
# these ignores are from flake8-bugbear; please fix!
|
||||
B007,B008,B950
|
||||
optional-ascii-coding = True
|
||||
exclude =
|
||||
./.git,
|
||||
./docs/*,
|
||||
./build,
|
||||
./scripts,
|
||||
./venv,
|
||||
*.pyi,
|
||||
.pre-commit-config.yaml,
|
||||
*.md,
|
||||
.flake8
|
||||
"B007", "B008"
|
||||
]
|
||||
|
||||
exclude = [
|
||||
"./.git",
|
||||
"./docs/*",
|
||||
"./build",
|
||||
"./scripts",
|
||||
"./venv",
|
||||
"*.pyi",
|
||||
".pre-commit-config.yaml",
|
||||
"*.md",
|
||||
".flake8"
|
||||
]
|
35
CHANGELOG.md
35
CHANGELOG.md
|
@ -1,35 +0,0 @@
|
|||
# Changelog
|
||||
|
||||
## 0.0.53
|
||||
|
||||
### Added
|
||||
- Resource-oriented design for models, shields, memory banks, datasets and eval tasks
|
||||
- Persistence for registered objects with distribution
|
||||
- Ability to persist memory banks created for FAISS
|
||||
- PostgreSQL KVStore implementation
|
||||
- Environment variable placeholder support in run.yaml files
|
||||
- Comprehensive Zero-to-Hero notebooks and quickstart guides
|
||||
- Support for quantized models in Ollama
|
||||
- Vision models support for Together, Fireworks, Meta-Reference, and Ollama, and vLLM
|
||||
- Bedrock distribution with safety shields support
|
||||
- Evals API with task registration and scoring functions
|
||||
- MMLU and SimpleQA benchmark scoring functions
|
||||
- Huggingface dataset provider integration for benchmarks
|
||||
- Support for custom dataset registration from local paths
|
||||
- Benchmark evaluation CLI tools with visualization tables
|
||||
- RAG evaluation scoring functions and metrics
|
||||
- Local persistence for datasets and eval tasks
|
||||
|
||||
### Changed
|
||||
- Split safety into distinct providers (llama-guard, prompt-guard, code-scanner)
|
||||
- Changed provider naming convention (`impls` → `inline`, `adapters` → `remote`)
|
||||
- Updated API signatures for dataset and eval task registration
|
||||
- Restructured folder organization for providers
|
||||
- Enhanced Docker build configuration
|
||||
- Added version prefixing for REST API routes
|
||||
- Enhanced evaluation task registration workflow
|
||||
- Improved benchmark evaluation output formatting
|
||||
- Restructured evals folder organization for better modularity
|
||||
|
||||
### Removed
|
||||
- `llama stack configure` command
|
|
@ -40,6 +40,7 @@ If you need help or guidance, comment on the issue. Issues that are extra friend
|
|||
3. Ensure the test suite passes.
|
||||
4. Make sure your code lints using `pre-commit`.
|
||||
5. If you haven't already, complete the Contributor License Agreement ("CLA").
|
||||
6. Ensure your pull request follows the [conventional commits format](https://www.conventionalcommits.org/en/v1.0.0/).
|
||||
|
||||
## Contributor License Agreement ("CLA")
|
||||
In order to accept your pull request, we need you to submit a CLA. You only need
|
||||
|
@ -56,22 +57,50 @@ disclosure of security bugs. In those cases, please go through the process
|
|||
outlined on that page and do not file a public issue.
|
||||
|
||||
|
||||
## Set up your development environment
|
||||
|
||||
We use [uv](https://github.com/astral-sh/uv) to manage python dependencies and virtual environments.
|
||||
You can install `uv` by following this [guide](https://docs.astral.sh/uv/getting-started/installation/).
|
||||
You can install the dependencies by running:
|
||||
|
||||
```bash
|
||||
$ cd llama-stack
|
||||
$ uv sync --extra dev
|
||||
$ uv pip install -e .
|
||||
$ source .venv/bin/activate
|
||||
```
|
||||
|
||||
## Pre-commit Hooks
|
||||
|
||||
We use [pre-commit](https://pre-commit.com/) to run linting and formatting checks on your code. You can install the pre-commit hooks by running:
|
||||
|
||||
```bash
|
||||
$ cd llama-stack
|
||||
$ conda activate <your-environment>
|
||||
$ pip install pre-commit
|
||||
$ pre-commit install
|
||||
$ uv run pre-commit install
|
||||
```
|
||||
|
||||
After that, pre-commit hooks will run automatically before each commit.
|
||||
|
||||
Alternatively, if you don't want to install the pre-commit hooks, you can run the checks manually by running:
|
||||
|
||||
```bash
|
||||
$ uv run pre-commit run --all-files
|
||||
```
|
||||
|
||||
> [!CAUTION]
|
||||
> Before pushing your changes, make sure that the pre-commit hooks have passed successfully.
|
||||
|
||||
## Adding a new dependency to the project
|
||||
|
||||
To add a new dependency to the project, you can use the `uv` command. For example, to add `foo` to the project, you can run:
|
||||
|
||||
```bash
|
||||
$ uv add foo
|
||||
$ uv sync
|
||||
```
|
||||
|
||||
## Coding Style
|
||||
* 2 spaces for indentation rather than tabs
|
||||
|
||||
* 4 spaces for indentation rather than tabs
|
||||
* 80 character line length
|
||||
* ...
|
||||
|
||||
|
@ -102,13 +131,12 @@ If you have made changes to a provider's configuration in any form (introducing
|
|||
If you are making changes to the documentation at [https://llama-stack.readthedocs.io/en/latest/](https://llama-stack.readthedocs.io/en/latest/), you can use the following command to build the documentation and preview your changes. You will need [Sphinx](https://www.sphinx-doc.org/en/master/) and the readthedocs theme.
|
||||
|
||||
```bash
|
||||
cd llama-stack/docs
|
||||
pip install -r requirements.txt
|
||||
pip install sphinx-autobuild
|
||||
$ cd llama-stack/docs
|
||||
$ uv sync --extra docs
|
||||
|
||||
# This will start a local server (usually at http://127.0.0.1:8000) that automatically rebuilds and refreshes when you make changes to the documentation.
|
||||
make html
|
||||
sphinx-autobuild source build/html
|
||||
$ make html
|
||||
$ uv run sphinx-autobuild source build/html
|
||||
```
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
include requirements.txt
|
||||
include pyproject.toml
|
||||
include distributions/dependencies.json
|
||||
include llama_stack/distribution/*.sh
|
||||
include llama_stack/cli/scripts/*.sh
|
||||
|
|
68
README.md
68
README.md
|
@ -2,17 +2,18 @@
|
|||
|
||||
[](https://pypi.org/project/llama_stack/)
|
||||
[](https://pypi.org/project/llama-stack/)
|
||||
[](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
|
||||
[](https://discord.gg/llama-stack)
|
||||
|
||||
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb)
|
||||
|
||||
Llama Stack defines and standardizes the core building blocks that simplify AI application development. It codified best practices across the Llama ecosystem. More specifically, it provides
|
||||
Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides
|
||||
|
||||
- **Unified API layer** for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.
|
||||
- **Plugin architecture** to support the rich ecosystem of implementations of the different APIs in different environments like local development, on-premises, cloud, and mobile.
|
||||
- **Prepackaged verified distributions** which offer a one-stop solution for developers to get started quickly and reliably in any environment
|
||||
- **Multiple developer interfaces** like CLI and SDKs for Python, Node, iOS, and Android
|
||||
- **Standalone applications** as examples for how to build production-grade AI applications with Llama Stack
|
||||
- **Plugin architecture** to support the rich ecosystem of different API implementations in various environments, including local development, on-premises, cloud, and mobile.
|
||||
- **Prepackaged verified distributions** which offer a one-stop solution for developers to get started quickly and reliably in any environment.
|
||||
- **Multiple developer interfaces** like CLI and SDKs for Python, Typescript, iOS, and Android.
|
||||
- **Standalone applications** as examples for how to build production-grade AI applications with Llama Stack.
|
||||
|
||||
<div style="text-align: center;">
|
||||
<img
|
||||
|
@ -24,31 +25,31 @@ Llama Stack defines and standardizes the core building blocks that simplify AI a
|
|||
</div>
|
||||
|
||||
### Llama Stack Benefits
|
||||
- **Flexible Options**: Developers can choose their preferred infrastructure without changing APIs and enjoy flexible deployment choice.
|
||||
- **Consistent Experience**: With its unified APIs Llama Stack makes it easier to build, test, and deploy AI applications with consistent application behavior.
|
||||
- **Flexible Options**: Developers can choose their preferred infrastructure without changing APIs and enjoy flexible deployment choices.
|
||||
- **Consistent Experience**: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI applications with consistent application behavior.
|
||||
- **Robust Ecosystem**: Llama Stack is already integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies) that offer tailored infrastructure, software, and services for deploying Llama models.
|
||||
|
||||
By reducing friction and complexity, Llama Stack empowers developers to focus on what they do best: building transformative generative AI applications.
|
||||
|
||||
### API Providers
|
||||
Here is a list of the various API providers and available distributions to developers started easily,
|
||||
Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
|
||||
|
||||
| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** |
|
||||
|:------------------------------------------------------------------------------------------:|:----------------------:|:------------------:|:------------------:|:------------------:|:------------------:|:------------------:|
|
||||
| Meta Reference | Single Node | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
|
||||
| SambaNova | Hosted | | :heavy_check_mark: | | | |
|
||||
| Cerebras | Hosted | | :heavy_check_mark: | | | |
|
||||
| Fireworks | Hosted | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | |
|
||||
| AWS Bedrock | Hosted | | :heavy_check_mark: | | :heavy_check_mark: | |
|
||||
| Together | Hosted | :heavy_check_mark: | :heavy_check_mark: | | :heavy_check_mark: | |
|
||||
| Groq | Hosted | | :heavy_check_mark: | | | |
|
||||
| Ollama | Single Node | | :heavy_check_mark: | | | |
|
||||
| TGI | Hosted and Single Node | | :heavy_check_mark: | | | |
|
||||
| NVIDIA NIM | Hosted and Single Node | | :heavy_check_mark: | | | |
|
||||
| Chroma | Single Node | | | :heavy_check_mark: | | |
|
||||
| PG Vector | Single Node | | | :heavy_check_mark: | | |
|
||||
| PyTorch ExecuTorch | On-device iOS | :heavy_check_mark: | :heavy_check_mark: | | | |
|
||||
| vLLM | Hosted and Single Node | | :heavy_check_mark: | | | |
|
||||
| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** |
|
||||
|:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|
|
||||
| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| SambaNova | Hosted | | ✅ | | | |
|
||||
| Cerebras | Hosted | | ✅ | | | |
|
||||
| Fireworks | Hosted | ✅ | ✅ | ✅ | | |
|
||||
| AWS Bedrock | Hosted | | ✅ | | ✅ | |
|
||||
| Together | Hosted | ✅ | ✅ | | ✅ | |
|
||||
| Groq | Hosted | | ✅ | | | |
|
||||
| Ollama | Single Node | | ✅ | | | |
|
||||
| TGI | Hosted and Single Node | | ✅ | | | |
|
||||
| NVIDIA NIM | Hosted and Single Node | | ✅ | | | |
|
||||
| Chroma | Single Node | | | ✅ | | |
|
||||
| PG Vector | Single Node | | | ✅ | | |
|
||||
| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | |
|
||||
| vLLM | Hosted and Single Node | | ✅ | | | |
|
||||
|
||||
### Distributions
|
||||
|
||||
|
@ -70,15 +71,15 @@ A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider
|
|||
|
||||
You have two ways to install this repository:
|
||||
|
||||
1. **Install as a package**:
|
||||
* **Install as a package**:
|
||||
You can install the repository directly from [PyPI](https://pypi.org/project/llama-stack/) by running the following command:
|
||||
```bash
|
||||
pip install llama-stack
|
||||
```
|
||||
|
||||
2. **Install from source**:
|
||||
* **Install from source**:
|
||||
If you prefer to install from the source code, make sure you have [conda installed](https://docs.conda.io/projects/conda/en/stable).
|
||||
Then, follow these steps:
|
||||
Then, run the following commands:
|
||||
```bash
|
||||
mkdir -p ~/local
|
||||
cd ~/local
|
||||
|
@ -95,10 +96,11 @@ You have two ways to install this repository:
|
|||
|
||||
Please checkout our [Documentation](https://llama-stack.readthedocs.io/en/latest/index.html) page for more details.
|
||||
|
||||
* [CLI reference](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/index.html)
|
||||
* Guide using `llama` CLI to work with Llama models (download, study prompts), and building/starting a Llama Stack distribution.
|
||||
* [Getting Started](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html)
|
||||
* Quick guide to start a Llama Stack server.
|
||||
* CLI references
|
||||
* [llama (server-side) CLI Reference](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/index.html): Guide for using the `llama` CLI to work with Llama models (download, study prompts), and building/starting a Llama Stack distribution.
|
||||
* [llama (client-side) CLI Reference](https://llama-stack.readthedocs.io/en/latest/references/llama_stack_client_cli_reference.html): Guide for using the `llama-stack-client` CLI, which allows you to query information about the distribution.
|
||||
* Getting Started
|
||||
* [Quick guide to start a Llama Stack server](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html).
|
||||
* [Jupyter notebook](./docs/getting_started.ipynb) to walk-through how to use simple text and vision inference llama_stack_client APIs
|
||||
* The complete Llama Stack lesson [Colab notebook](https://colab.research.google.com/drive/1dtVmxotBsI4cGZQNsJRYPrLiDeT0Wnwt) of the new [Llama 3.2 course on Deeplearning.ai](https://learn.deeplearning.ai/courses/introducing-multimodal-llama-3-2/lesson/8/llama-stack).
|
||||
* A [Zero-to-Hero Guide](https://github.com/meta-llama/llama-stack/tree/main/docs/zero_to_hero_guide) that guide you through all the key components of llama stack with code samples.
|
||||
|
@ -111,9 +113,9 @@ Please checkout our [Documentation](https://llama-stack.readthedocs.io/en/latest
|
|||
| :----: | :----: | :----: |
|
||||
| Python | [llama-stack-client-python](https://github.com/meta-llama/llama-stack-client-python) | [](https://pypi.org/project/llama_stack_client/)
|
||||
| Swift | [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift) | [](https://swiftpackageindex.com/meta-llama/llama-stack-client-swift)
|
||||
| Node | [llama-stack-client-node](https://github.com/meta-llama/llama-stack-client-node) | [](https://npmjs.org/package/llama-stack-client)
|
||||
| Typescript | [llama-stack-client-typescript](https://github.com/meta-llama/llama-stack-client-typescript) | [](https://npmjs.org/package/llama-stack-client)
|
||||
| Kotlin | [llama-stack-client-kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) | [](https://central.sonatype.com/artifact/com.llama.llamastack/llama-stack-client-kotlin)
|
||||
|
||||
Check out our client SDKs for connecting to Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [node](https://github.com/meta-llama/llama-stack-client-node), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications.
|
||||
Check out our client SDKs for connecting to a Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [typescript](https://github.com/meta-llama/llama-stack-client-typescript), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications.
|
||||
|
||||
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
|
||||
|
|
|
@ -1,9 +1,46 @@
|
|||
{
|
||||
"sambanova": [
|
||||
"bedrock": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"boto3",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"cerebras": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"cerebras_cloud_sdk",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
|
@ -27,7 +64,110 @@
|
|||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"dell": [
|
||||
"aiohttp",
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"huggingface_hub",
|
||||
"matplotlib",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"fireworks": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"fireworks-ai",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"hf-endpoint": [
|
||||
"aiohttp",
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"huggingface_hub",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"hf-serverless": [
|
||||
"aiohttp",
|
||||
|
@ -62,211 +202,7 @@
|
|||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"together": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"together",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"vllm-gpu": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"vllm",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"remote-vllm": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"fireworks": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"fireworks-ai",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"tgi": [
|
||||
"aiohttp",
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"huggingface_hub",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"bedrock": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"boto3",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"meta-reference-gpu": [
|
||||
"accelerate",
|
||||
|
@ -306,39 +242,7 @@
|
|||
"uvicorn",
|
||||
"zmq",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"nvidia": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"meta-reference-quantized-gpu": [
|
||||
"accelerate",
|
||||
|
@ -380,21 +284,20 @@
|
|||
"uvicorn",
|
||||
"zmq",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"cerebras": [
|
||||
"nvidia": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"cerebras_cloud_sdk",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
|
@ -413,7 +316,7 @@
|
|||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"ollama": [
|
||||
"aiohttp",
|
||||
|
@ -447,9 +350,72 @@
|
|||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"hf-endpoint": [
|
||||
"remote-vllm": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"sambanova": [
|
||||
"aiosqlite",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"tgi": [
|
||||
"aiohttp",
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
|
@ -482,6 +448,74 @@
|
|||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"together": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"together",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"vllm-gpu": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"vllm",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
]
|
||||
}
|
||||
|
|
|
@ -1,65 +0,0 @@
|
|||
# Together Distribution
|
||||
|
||||
### Connect to a Llama Stack Together Endpoint
|
||||
- You may connect to a hosted endpoint `https://llama-stack.together.ai`, serving a Llama Stack distribution
|
||||
|
||||
The `llamastack/distribution-together` distribution consists of the following provider configurations.
|
||||
|
||||
|
||||
| **API** | **Inference** | **Agents** | **Memory** | **Safety** | **Telemetry** |
|
||||
|----------------- |--------------- |---------------- |-------------------------------------------------- |---------------- |---------------- |
|
||||
| **Provider(s)** | remote::together | meta-reference | meta-reference, remote::weaviate | meta-reference | meta-reference |
|
||||
|
||||
|
||||
### Docker: Start the Distribution (Single Node CPU)
|
||||
|
||||
> [!NOTE]
|
||||
> This assumes you have an hosted endpoint at Together with API Key.
|
||||
|
||||
```
|
||||
$ cd distributions/together
|
||||
$ ls
|
||||
compose.yaml run.yaml
|
||||
$ docker compose up
|
||||
```
|
||||
|
||||
Make sure in you `run.yaml` file, you inference provider is pointing to the correct Together URL server endpoint. E.g.
|
||||
```
|
||||
inference:
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
api_key: <optional api key>
|
||||
```
|
||||
|
||||
### Conda llama stack run (Single Node CPU)
|
||||
|
||||
```bash
|
||||
llama stack build --template together --image-type conda
|
||||
# -- modify run.yaml to a valid Together server endpoint
|
||||
llama stack run ./run.yaml
|
||||
```
|
||||
|
||||
### (Optional) Update Model Serving Configuration
|
||||
|
||||
Use `llama-stack-client models list` to check the available models served by together.
|
||||
|
||||
```
|
||||
$ llama-stack-client models list
|
||||
+------------------------------+------------------------------+---------------+------------+
|
||||
| identifier | llama_model | provider_id | metadata |
|
||||
+==============================+==============================+===============+============+
|
||||
| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | together0 | {} |
|
||||
+------------------------------+------------------------------+---------------+------------+
|
||||
| Llama3.1-70B-Instruct | Llama3.1-70B-Instruct | together0 | {} |
|
||||
+------------------------------+------------------------------+---------------+------------+
|
||||
| Llama3.1-405B-Instruct | Llama3.1-405B-Instruct | together0 | {} |
|
||||
+------------------------------+------------------------------+---------------+------------+
|
||||
| Llama3.2-3B-Instruct | Llama3.2-3B-Instruct | together0 | {} |
|
||||
+------------------------------+------------------------------+---------------+------------+
|
||||
| Llama3.2-11B-Vision-Instruct | Llama3.2-11B-Vision-Instruct | together0 | {} |
|
||||
+------------------------------+------------------------------+---------------+------------+
|
||||
| Llama3.2-90B-Vision-Instruct | Llama3.2-90B-Vision-Instruct | together0 | {} |
|
||||
+------------------------------+------------------------------+---------------+------------+
|
||||
```
|
4
docs/_static/css/my_theme.css
vendored
4
docs/_static/css/my_theme.css
vendored
|
@ -12,3 +12,7 @@
|
|||
.wy-side-nav-search {
|
||||
background-color: transparent !important;
|
||||
}
|
||||
|
||||
.hide-title h1 {
|
||||
display: none;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
9
docs/conftest.py
Normal file
9
docs/conftest.py
Normal file
|
@ -0,0 +1,9 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
def pytest_collection_modifyitems(items):
|
||||
for item in items:
|
||||
item.name = item.name.replace(' ', '_')
|
|
@ -7,7 +7,7 @@
|
|||
"id": "c1e7571c"
|
||||
},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/drive/1F2ksmkoGQPa4pzRjMOE6BXWeOxWFIW6n?usp=sharing)\n",
|
||||
"[](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)\n",
|
||||
"\n",
|
||||
"# Llama Stack - Building AI Applications\n",
|
||||
"\n",
|
||||
|
@ -15,7 +15,7 @@
|
|||
"\n",
|
||||
"[Llama Stack](https://github.com/meta-llama/llama-stack) defines and standardizes the set of core building blocks needed to bring generative AI applications to market. These building blocks are presented in the form of interoperable APIs with a broad set of Service Providers providing their implementations.\n",
|
||||
"\n",
|
||||
"Read more about the project: https://llama-stack.readthedocs.io/en/latest/index.html\n",
|
||||
"Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
|
||||
"\n",
|
||||
"In this guide, we will showcase how you can build LLM-powered agentic applications using Llama Stack.\n"
|
||||
]
|
||||
|
@ -71,7 +71,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "J2kGed0R5PSf",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
|
@ -81,119 +81,15 @@
|
|||
"id": "J2kGed0R5PSf",
|
||||
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Reading package lists... Done\n",
|
||||
"Building dependency tree... Done\n",
|
||||
"Reading state information... Done\n",
|
||||
"The following NEW packages will be installed:\n",
|
||||
" bubblewrap\n",
|
||||
"0 upgraded, 1 newly installed, 0 to remove and 49 not upgraded.\n",
|
||||
"Need to get 46.3 kB of archives.\n",
|
||||
"After this operation, 132 kB of additional disk space will be used.\n",
|
||||
"Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 bubblewrap amd64 0.6.1-1ubuntu0.1 [46.3 kB]\n",
|
||||
"Fetched 46.3 kB in 0s (122 kB/s)\n",
|
||||
"Selecting previously unselected package bubblewrap.\n",
|
||||
"(Reading database ... 124561 files and directories currently installed.)\n",
|
||||
"Preparing to unpack .../bubblewrap_0.6.1-1ubuntu0.1_amd64.deb ...\n",
|
||||
"Unpacking bubblewrap (0.6.1-1ubuntu0.1) ...\n",
|
||||
"Setting up bubblewrap (0.6.1-1ubuntu0.1) ...\n",
|
||||
"Processing triggers for man-db (2.10.2-1) ...\n",
|
||||
"Looking in indexes: https://test.pypi.org/simple/, https://pypi.python.org/simple\n",
|
||||
"Collecting llama-stack==0.1.0rc10\n",
|
||||
" Downloading https://test-files.pythonhosted.org/packages/68/22/4a170fbe01095df81e76c7bf8f35c716c1a0a5ec4503da6e78695fab351c/llama_stack-0.1.0rc10-py3-none-any.whl.metadata (15 kB)\n",
|
||||
"Collecting blobfile (from llama-stack==0.1.0rc10)\n",
|
||||
" Downloading blobfile-3.0.0-py3-none-any.whl.metadata (15 kB)\n",
|
||||
"Collecting fire (from llama-stack==0.1.0rc10)\n",
|
||||
" Downloading fire-0.7.0.tar.gz (87 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
||||
"Requirement already satisfied: httpx in /usr/local/lib/python3.11/dist-packages (from llama-stack==0.1.0rc10) (0.28.1)\n",
|
||||
"Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from llama-stack==0.1.0rc10) (0.27.1)\n",
|
||||
"Collecting llama-models==0.1.0rc10 (from llama-stack==0.1.0rc10)\n",
|
||||
" Downloading https://test-files.pythonhosted.org/packages/45/2b/6a6947d5915054b9980f82606942f1b79960a27168299254ca12e5b5795b/llama_models-0.1.0rc10-py3-none-any.whl.metadata (8.5 kB)\n",
|
||||
"Collecting llama-stack-client==0.1.0rc10 (from llama-stack==0.1.0rc10)\n",
|
||||
" Downloading https://test-files.pythonhosted.org/packages/d6/85/a4fd621c4ae4db7339ab098b37bf4b4ad3cc12440e75ef10ec524e28ef7d/llama_stack_client-0.1.0rc10-py3-none-any.whl.metadata (15 kB)\n",
|
||||
"Requirement already satisfied: prompt-toolkit in /usr/local/lib/python3.11/dist-packages (from llama-stack==0.1.0rc10) (3.0.48)\n",
|
||||
"Collecting python-dotenv (from llama-stack==0.1.0rc10)\n",
|
||||
" Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n",
|
||||
"Requirement already satisfied: pydantic>=2 in /usr/local/lib/python3.11/dist-packages (from llama-stack==0.1.0rc10) (2.10.5)\n",
|
||||
"Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from llama-stack==0.1.0rc10) (2.32.3)\n",
|
||||
"Requirement already satisfied: rich in /usr/local/lib/python3.11/dist-packages (from llama-stack==0.1.0rc10) (13.9.4)\n",
|
||||
"Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from llama-stack==0.1.0rc10) (75.1.0)\n",
|
||||
"Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from llama-stack==0.1.0rc10) (2.5.0)\n",
|
||||
"Requirement already satisfied: PyYAML in /usr/local/lib/python3.11/dist-packages (from llama-models==0.1.0rc10->llama-stack==0.1.0rc10) (6.0.2)\n",
|
||||
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from llama-models==0.1.0rc10->llama-stack==0.1.0rc10) (3.1.5)\n",
|
||||
"Collecting tiktoken (from llama-models==0.1.0rc10->llama-stack==0.1.0rc10)\n",
|
||||
" Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
|
||||
"Requirement already satisfied: Pillow in /usr/local/lib/python3.11/dist-packages (from llama-models==0.1.0rc10->llama-stack==0.1.0rc10) (11.1.0)\n",
|
||||
"Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.11/dist-packages (from llama-stack-client==0.1.0rc10->llama-stack==0.1.0rc10) (3.7.1)\n",
|
||||
"Requirement already satisfied: click in /usr/local/lib/python3.11/dist-packages (from llama-stack-client==0.1.0rc10->llama-stack==0.1.0rc10) (8.1.8)\n",
|
||||
"Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from llama-stack-client==0.1.0rc10->llama-stack==0.1.0rc10) (1.9.0)\n",
|
||||
"Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from llama-stack-client==0.1.0rc10->llama-stack==0.1.0rc10) (2.2.2)\n",
|
||||
"Collecting pyaml (from llama-stack-client==0.1.0rc10->llama-stack==0.1.0rc10)\n",
|
||||
" Downloading pyaml-25.1.0-py3-none-any.whl.metadata (12 kB)\n",
|
||||
"Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from llama-stack-client==0.1.0rc10->llama-stack==0.1.0rc10) (1.3.1)\n",
|
||||
"Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from llama-stack-client==0.1.0rc10->llama-stack==0.1.0rc10) (4.67.1)\n",
|
||||
"Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.11/dist-packages (from llama-stack-client==0.1.0rc10->llama-stack==0.1.0rc10) (4.12.2)\n",
|
||||
"Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from httpx->llama-stack==0.1.0rc10) (2024.12.14)\n",
|
||||
"Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx->llama-stack==0.1.0rc10) (1.0.7)\n",
|
||||
"Requirement already satisfied: idna in /usr/local/lib/python3.11/dist-packages (from httpx->llama-stack==0.1.0rc10) (3.10)\n",
|
||||
"Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx->llama-stack==0.1.0rc10) (0.14.0)\n",
|
||||
"Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2->llama-stack==0.1.0rc10) (0.7.0)\n",
|
||||
"Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2->llama-stack==0.1.0rc10) (2.27.2)\n",
|
||||
"Collecting pycryptodomex>=3.8 (from blobfile->llama-stack==0.1.0rc10)\n",
|
||||
" Downloading pycryptodomex-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.11/dist-packages (from blobfile->llama-stack==0.1.0rc10) (2.3.0)\n",
|
||||
"Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.11/dist-packages (from blobfile->llama-stack==0.1.0rc10) (5.3.0)\n",
|
||||
"Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.11/dist-packages (from blobfile->llama-stack==0.1.0rc10) (3.16.1)\n",
|
||||
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->llama-stack==0.1.0rc10) (2024.10.0)\n",
|
||||
"Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->llama-stack==0.1.0rc10) (24.2)\n",
|
||||
"Requirement already satisfied: wcwidth in /usr/local/lib/python3.11/dist-packages (from prompt-toolkit->llama-stack==0.1.0rc10) (0.2.13)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->llama-stack==0.1.0rc10) (3.4.1)\n",
|
||||
"Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich->llama-stack==0.1.0rc10) (3.0.0)\n",
|
||||
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich->llama-stack==0.1.0rc10) (2.18.0)\n",
|
||||
"Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich->llama-stack==0.1.0rc10) (0.1.2)\n",
|
||||
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->llama-models==0.1.0rc10->llama-stack==0.1.0rc10) (3.0.2)\n",
|
||||
"Requirement already satisfied: numpy>=1.23.2 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-stack-client==0.1.0rc10->llama-stack==0.1.0rc10) (1.26.4)\n",
|
||||
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-stack-client==0.1.0rc10->llama-stack==0.1.0rc10) (2.8.2)\n",
|
||||
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-stack-client==0.1.0rc10->llama-stack==0.1.0rc10) (2024.2)\n",
|
||||
"Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-stack-client==0.1.0rc10->llama-stack==0.1.0rc10) (2024.2)\n",
|
||||
"Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.11/dist-packages (from tiktoken->llama-models==0.1.0rc10->llama-stack==0.1.0rc10) (2024.11.6)\n",
|
||||
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client==0.1.0rc10->llama-stack==0.1.0rc10) (1.17.0)\n",
|
||||
"Downloading https://test-files.pythonhosted.org/packages/68/22/4a170fbe01095df81e76c7bf8f35c716c1a0a5ec4503da6e78695fab351c/llama_stack-0.1.0rc10-py3-none-any.whl (532 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m532.7/532.7 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading https://test-files.pythonhosted.org/packages/45/2b/6a6947d5915054b9980f82606942f1b79960a27168299254ca12e5b5795b/llama_models-0.1.0rc10-py3-none-any.whl (1.6 MB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m20.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading https://test-files.pythonhosted.org/packages/d6/85/a4fd621c4ae4db7339ab098b37bf4b4ad3cc12440e75ef10ec524e28ef7d/llama_stack_client-0.1.0rc10-py3-none-any.whl (328 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m328.5/328.5 kB\u001b[0m \u001b[31m29.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading blobfile-3.0.0-py3-none-any.whl (75 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.4/75.4 kB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n",
|
||||
"Downloading pycryptodomex-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m57.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading pyaml-25.1.0-py3-none-any.whl (26 kB)\n",
|
||||
"Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m64.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hBuilding wheels for collected packages: fire\n",
|
||||
" Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
|
||||
" Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=3a37285ecae37a5fb69bbad717aabdb8c13f0da7906668b7c123475eefa41c3b\n",
|
||||
" Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n",
|
||||
"Successfully built fire\n",
|
||||
"Installing collected packages: python-dotenv, pycryptodomex, pyaml, fire, tiktoken, blobfile, llama-stack-client, llama-models, llama-stack\n",
|
||||
"Successfully installed blobfile-3.0.0 fire-0.7.0 llama-models-0.1.0rc10 llama-stack-0.1.0rc10 llama-stack-client-0.1.0rc10 pyaml-25.1.0 pycryptodomex-3.21.0 python-dotenv-1.0.1 tiktoken-0.8.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# NBVAL_SKIP\n",
|
||||
"\n",
|
||||
"!apt-get install -y bubblewrap\n",
|
||||
"# install a branch of llama stack\n",
|
||||
"!pip install llama-stack"
|
||||
"import os\n",
|
||||
"os.environ[\"UV_SYSTEM_PYTHON\"] = \"1\"\n",
|
||||
"!pip install uv\n",
|
||||
"!uv pip install llama-stack"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -218,7 +114,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "HaepEZXCDgif",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
|
@ -228,331 +124,9 @@
|
|||
"id": "HaepEZXCDgif",
|
||||
"outputId": "9314f698-593d-4c1a-ea15-15c735dc1023"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: llama-stack in /usr/local/lib/python3.11/dist-packages (0.1.0rc10)\r\n",
|
||||
"Requirement already satisfied: blobfile in /usr/local/lib/python3.11/dist-packages (from llama-stack) (3.0.0)\r\n",
|
||||
"Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (from llama-stack) (0.7.0)\r\n",
|
||||
"Requirement already satisfied: httpx in /usr/local/lib/python3.11/dist-packages (from llama-stack) (0.28.1)\r\n",
|
||||
"Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.11/dist-packages (from llama-stack) (0.27.1)\r\n",
|
||||
"Requirement already satisfied: llama-models==0.1.0rc10 in /usr/local/lib/python3.11/dist-packages (from llama-stack) (0.1.0rc10)\r\n",
|
||||
"Requirement already satisfied: llama-stack-client==0.1.0rc10 in /usr/local/lib/python3.11/dist-packages (from llama-stack) (0.1.0rc10)\r\n",
|
||||
"Requirement already satisfied: prompt-toolkit in /usr/local/lib/python3.11/dist-packages (from llama-stack) (3.0.48)\r\n",
|
||||
"Requirement already satisfied: python-dotenv in /usr/local/lib/python3.11/dist-packages (from llama-stack) (1.0.1)\r\n",
|
||||
"Requirement already satisfied: pydantic>=2 in /usr/local/lib/python3.11/dist-packages (from llama-stack) (2.10.5)\r\n",
|
||||
"Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from llama-stack) (2.32.3)\r\n",
|
||||
"Requirement already satisfied: rich in /usr/local/lib/python3.11/dist-packages (from llama-stack) (13.9.4)\r\n",
|
||||
"Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from llama-stack) (75.1.0)\r\n",
|
||||
"Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from llama-stack) (2.5.0)\r\n",
|
||||
"Requirement already satisfied: PyYAML in /usr/local/lib/python3.11/dist-packages (from llama-models==0.1.0rc10->llama-stack) (6.0.2)\r\n",
|
||||
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from llama-models==0.1.0rc10->llama-stack) (3.1.5)\r\n",
|
||||
"Requirement already satisfied: tiktoken in /usr/local/lib/python3.11/dist-packages (from llama-models==0.1.0rc10->llama-stack) (0.8.0)\r\n",
|
||||
"Requirement already satisfied: Pillow in /usr/local/lib/python3.11/dist-packages (from llama-models==0.1.0rc10->llama-stack) (11.1.0)\r\n",
|
||||
"Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.11/dist-packages (from llama-stack-client==0.1.0rc10->llama-stack) (3.7.1)\r\n",
|
||||
"Requirement already satisfied: click in /usr/local/lib/python3.11/dist-packages (from llama-stack-client==0.1.0rc10->llama-stack) (8.1.8)\r\n",
|
||||
"Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from llama-stack-client==0.1.0rc10->llama-stack) (1.9.0)\r\n",
|
||||
"Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from llama-stack-client==0.1.0rc10->llama-stack) (2.2.2)\r\n",
|
||||
"Requirement already satisfied: pyaml in /usr/local/lib/python3.11/dist-packages (from llama-stack-client==0.1.0rc10->llama-stack) (25.1.0)\r\n",
|
||||
"Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from llama-stack-client==0.1.0rc10->llama-stack) (1.3.1)\r\n",
|
||||
"Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from llama-stack-client==0.1.0rc10->llama-stack) (4.67.1)\r\n",
|
||||
"Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.11/dist-packages (from llama-stack-client==0.1.0rc10->llama-stack) (4.12.2)\r\n",
|
||||
"Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from httpx->llama-stack) (2024.12.14)\r\n",
|
||||
"Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx->llama-stack) (1.0.7)\r\n",
|
||||
"Requirement already satisfied: idna in /usr/local/lib/python3.11/dist-packages (from httpx->llama-stack) (3.10)\r\n",
|
||||
"Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\r\n",
|
||||
"Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2->llama-stack) (0.7.0)\r\n",
|
||||
"Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2->llama-stack) (2.27.2)\r\n",
|
||||
"Requirement already satisfied: pycryptodomex>=3.8 in /usr/local/lib/python3.11/dist-packages (from blobfile->llama-stack) (3.21.0)\r\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.11/dist-packages (from blobfile->llama-stack) (2.3.0)\r\n",
|
||||
"Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.11/dist-packages (from blobfile->llama-stack) (5.3.0)\r\n",
|
||||
"Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.11/dist-packages (from blobfile->llama-stack) (3.16.1)\r\n",
|
||||
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->llama-stack) (2024.10.0)\r\n",
|
||||
"Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub->llama-stack) (24.2)\r\n",
|
||||
"Requirement already satisfied: wcwidth in /usr/local/lib/python3.11/dist-packages (from prompt-toolkit->llama-stack) (0.2.13)\r\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->llama-stack) (3.4.1)\r\n",
|
||||
"Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich->llama-stack) (3.0.0)\r\n",
|
||||
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich->llama-stack) (2.18.0)\n",
|
||||
"Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n",
|
||||
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->llama-models==0.1.0rc10->llama-stack) (3.0.2)\n",
|
||||
"Requirement already satisfied: numpy>=1.23.2 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-stack-client==0.1.0rc10->llama-stack) (1.26.4)\n",
|
||||
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-stack-client==0.1.0rc10->llama-stack) (2.8.2)\n",
|
||||
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-stack-client==0.1.0rc10->llama-stack) (2024.2)\n",
|
||||
"Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->llama-stack-client==0.1.0rc10->llama-stack) (2024.2)\n",
|
||||
"Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.11/dist-packages (from tiktoken->llama-models==0.1.0rc10->llama-stack) (2024.11.6)\n",
|
||||
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client==0.1.0rc10->llama-stack) (1.17.0)\n",
|
||||
"Installing pip dependencies\n",
|
||||
"Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (2.2.2)\n",
|
||||
"Collecting together\n",
|
||||
" Downloading together-1.3.11-py3-none-any.whl.metadata (11 kB)\n",
|
||||
"Collecting datasets\n",
|
||||
" Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)\n",
|
||||
"Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (4.47.1)\n",
|
||||
"Requirement already satisfied: blobfile in /usr/local/lib/python3.11/dist-packages (3.0.0)\n",
|
||||
"Requirement already satisfied: opentelemetry-sdk in /usr/local/lib/python3.11/dist-packages (1.29.0)\n",
|
||||
"Collecting redis\n",
|
||||
" Downloading redis-5.2.1-py3-none-any.whl.metadata (9.1 kB)\n",
|
||||
"Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (3.10.0)\n",
|
||||
"Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (2.32.3)\n",
|
||||
"Requirement already satisfied: chardet in /usr/local/lib/python3.11/dist-packages (5.2.0)\n",
|
||||
"Collecting chromadb-client\n",
|
||||
" Downloading chromadb_client-0.6.3-py3-none-any.whl.metadata (2.4 kB)\n",
|
||||
"Collecting psycopg2-binary\n",
|
||||
" Downloading psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
|
||||
"Collecting mcp\n",
|
||||
" Downloading mcp-1.2.0-py3-none-any.whl.metadata (15 kB)\n",
|
||||
"Requirement already satisfied: pillow in /usr/local/lib/python3.11/dist-packages (11.1.0)\n",
|
||||
"Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (1.13.1)\n",
|
||||
"Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (4.67.1)\n",
|
||||
"Requirement already satisfied: nltk in /usr/local/lib/python3.11/dist-packages (3.9.1)\n",
|
||||
"Requirement already satisfied: sentencepiece in /usr/local/lib/python3.11/dist-packages (0.2.0)\n",
|
||||
"Collecting faiss-cpu\n",
|
||||
" Downloading faiss_cpu-1.9.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)\n",
|
||||
"Collecting opentelemetry-exporter-otlp-proto-http\n",
|
||||
" Downloading opentelemetry_exporter_otlp_proto_http-1.29.0-py3-none-any.whl.metadata (2.2 kB)\n",
|
||||
"Collecting autoevals\n",
|
||||
" Downloading autoevals-0.0.117-py3-none-any.whl.metadata (12 kB)\n",
|
||||
"Collecting pypdf\n",
|
||||
" Downloading pypdf-5.1.0-py3-none-any.whl.metadata (7.2 kB)\n",
|
||||
"Collecting aiosqlite\n",
|
||||
" Downloading aiosqlite-0.20.0-py3-none-any.whl.metadata (4.3 kB)\n",
|
||||
"Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (1.26.4)\n",
|
||||
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (1.6.0)\n",
|
||||
"Requirement already satisfied: openai in /usr/local/lib/python3.11/dist-packages (1.59.6)\n",
|
||||
"Collecting fastapi\n",
|
||||
" Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)\n",
|
||||
"Requirement already satisfied: fire in /usr/local/lib/python3.11/dist-packages (0.7.0)\n",
|
||||
"Requirement already satisfied: httpx in /usr/local/lib/python3.11/dist-packages (0.28.1)\n",
|
||||
"Collecting uvicorn\n",
|
||||
" Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)\n",
|
||||
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (2.8.2)\n",
|
||||
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas) (2024.2)\n",
|
||||
"Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas) (2024.2)\n",
|
||||
"Requirement already satisfied: aiohttp<4.0.0,>=3.9.3 in /usr/local/lib/python3.11/dist-packages (from together) (3.11.11)\n",
|
||||
"Requirement already satisfied: click<9.0.0,>=8.1.7 in /usr/local/lib/python3.11/dist-packages (from together) (8.1.8)\n",
|
||||
"Requirement already satisfied: eval-type-backport<0.3.0,>=0.1.3 in /usr/local/lib/python3.11/dist-packages (from together) (0.2.2)\n",
|
||||
"Requirement already satisfied: filelock<4.0.0,>=3.13.1 in /usr/local/lib/python3.11/dist-packages (from together) (3.16.1)\n",
|
||||
"Collecting pillow\n",
|
||||
" Downloading pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.2 kB)\n",
|
||||
"Requirement already satisfied: pyarrow>=10.0.1 in /usr/local/lib/python3.11/dist-packages (from together) (17.0.0)\n",
|
||||
"Requirement already satisfied: pydantic<3.0.0,>=2.6.3 in /usr/local/lib/python3.11/dist-packages (from together) (2.10.5)\n",
|
||||
"Requirement already satisfied: rich<14.0.0,>=13.8.1 in /usr/local/lib/python3.11/dist-packages (from together) (13.9.4)\n",
|
||||
"Requirement already satisfied: tabulate<0.10.0,>=0.9.0 in /usr/local/lib/python3.11/dist-packages (from together) (0.9.0)\n",
|
||||
"Requirement already satisfied: typer<0.16,>=0.9 in /usr/local/lib/python3.11/dist-packages (from together) (0.15.1)\n",
|
||||
"Collecting dill<0.3.9,>=0.3.0 (from datasets)\n",
|
||||
" Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n",
|
||||
"Collecting xxhash (from datasets)\n",
|
||||
" Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
|
||||
"Collecting multiprocess<0.70.17 (from datasets)\n",
|
||||
" Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n",
|
||||
"Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)\n",
|
||||
" Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n",
|
||||
"Requirement already satisfied: huggingface-hub>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.27.1)\n",
|
||||
"Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from datasets) (24.2)\n",
|
||||
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets) (6.0.2)\n",
|
||||
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2024.11.6)\n",
|
||||
"Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.21.0)\n",
|
||||
"Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.2)\n",
|
||||
"Requirement already satisfied: pycryptodomex>=3.8 in /usr/local/lib/python3.11/dist-packages (from blobfile) (3.21.0)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.11/dist-packages (from blobfile) (2.3.0)\n",
|
||||
"Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.11/dist-packages (from blobfile) (5.3.0)\n",
|
||||
"Requirement already satisfied: opentelemetry-api==1.29.0 in /usr/local/lib/python3.11/dist-packages (from opentelemetry-sdk) (1.29.0)\n",
|
||||
"Requirement already satisfied: opentelemetry-semantic-conventions==0.50b0 in /usr/local/lib/python3.11/dist-packages (from opentelemetry-sdk) (0.50b0)\n",
|
||||
"Requirement already satisfied: typing-extensions>=3.7.4 in /usr/local/lib/python3.11/dist-packages (from opentelemetry-sdk) (4.12.2)\n",
|
||||
"Requirement already satisfied: deprecated>=1.2.6 in /usr/local/lib/python3.11/dist-packages (from opentelemetry-api==1.29.0->opentelemetry-sdk) (1.2.15)\n",
|
||||
"Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /usr/local/lib/python3.11/dist-packages (from opentelemetry-api==1.29.0->opentelemetry-sdk) (8.5.0)\n",
|
||||
"Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (1.3.1)\n",
|
||||
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (0.12.1)\n",
|
||||
"Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (4.55.3)\n",
|
||||
"Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (1.4.8)\n",
|
||||
"Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (3.2.1)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests) (3.4.1)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests) (3.10)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests) (2024.12.14)\n",
|
||||
"Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb-client)\n",
|
||||
" Downloading opentelemetry_exporter_otlp_proto_grpc-1.29.0-py3-none-any.whl.metadata (2.2 kB)\n",
|
||||
"Collecting overrides>=7.3.1 (from chromadb-client)\n",
|
||||
" Downloading overrides-7.7.0-py3-none-any.whl.metadata (5.8 kB)\n",
|
||||
"Collecting posthog>=2.4.0 (from chromadb-client)\n",
|
||||
" Downloading posthog-3.8.4-py2.py3-none-any.whl.metadata (2.8 kB)\n",
|
||||
"Requirement already satisfied: tenacity>=8.2.3 in /usr/local/lib/python3.11/dist-packages (from chromadb-client) (9.0.0)\n",
|
||||
"Requirement already satisfied: orjson>=3.9.12 in /usr/local/lib/python3.11/dist-packages (from chromadb-client) (3.10.14)\n",
|
||||
"Collecting anyio>=4.5 (from mcp)\n",
|
||||
" Downloading anyio-4.8.0-py3-none-any.whl.metadata (4.6 kB)\n",
|
||||
"Collecting httpx-sse>=0.4 (from mcp)\n",
|
||||
" Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)\n",
|
||||
"Collecting pydantic-settings>=2.6.1 (from mcp)\n",
|
||||
" Downloading pydantic_settings-2.7.1-py3-none-any.whl.metadata (3.5 kB)\n",
|
||||
"Collecting sse-starlette>=1.6.1 (from mcp)\n",
|
||||
" Downloading sse_starlette-2.2.1-py3-none-any.whl.metadata (7.8 kB)\n",
|
||||
"Collecting starlette>=0.27 (from mcp)\n",
|
||||
" Downloading starlette-0.45.2-py3-none-any.whl.metadata (6.3 kB)\n",
|
||||
"Requirement already satisfied: joblib in /usr/local/lib/python3.11/dist-packages (from nltk) (1.4.2)\n",
|
||||
"Requirement already satisfied: googleapis-common-protos~=1.52 in /usr/local/lib/python3.11/dist-packages (from opentelemetry-exporter-otlp-proto-http) (1.66.0)\n",
|
||||
"Collecting opentelemetry-exporter-otlp-proto-common==1.29.0 (from opentelemetry-exporter-otlp-proto-http)\n",
|
||||
" Downloading opentelemetry_exporter_otlp_proto_common-1.29.0-py3-none-any.whl.metadata (1.8 kB)\n",
|
||||
"Collecting opentelemetry-proto==1.29.0 (from opentelemetry-exporter-otlp-proto-http)\n",
|
||||
" Downloading opentelemetry_proto-1.29.0-py3-none-any.whl.metadata (2.3 kB)\n",
|
||||
"Collecting protobuf<6.0,>=5.0 (from opentelemetry-proto==1.29.0->opentelemetry-exporter-otlp-proto-http)\n",
|
||||
" Downloading protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl.metadata (592 bytes)\n",
|
||||
"Collecting chevron (from autoevals)\n",
|
||||
" Downloading chevron-0.14.0-py3-none-any.whl.metadata (4.9 kB)\n",
|
||||
"Collecting levenshtein (from autoevals)\n",
|
||||
" Downloading levenshtein-0.26.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.2 kB)\n",
|
||||
"Collecting braintrust_core==0.0.58 (from autoevals)\n",
|
||||
" Downloading braintrust_core-0.0.58-py3-none-any.whl.metadata (669 bytes)\n",
|
||||
"Requirement already satisfied: jsonschema in /usr/local/lib/python3.11/dist-packages (from autoevals) (4.23.0)\n",
|
||||
"Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (3.5.0)\n",
|
||||
"Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from openai) (1.9.0)\n",
|
||||
"Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from openai) (0.8.2)\n",
|
||||
"Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from openai) (1.3.1)\n",
|
||||
"Collecting starlette>=0.27 (from mcp)\n",
|
||||
" Downloading starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)\n",
|
||||
"Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire) (2.5.0)\n",
|
||||
"Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx) (1.0.7)\n",
|
||||
"Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx) (0.14.0)\n",
|
||||
"Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.9.3->together) (2.4.4)\n",
|
||||
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.9.3->together) (1.3.2)\n",
|
||||
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.9.3->together) (24.3.0)\n",
|
||||
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.9.3->together) (1.5.0)\n",
|
||||
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.9.3->together) (6.1.0)\n",
|
||||
"Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.9.3->together) (0.2.1)\n",
|
||||
"Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.9.3->together) (1.18.3)\n",
|
||||
"Requirement already satisfied: wrapt<2,>=1.10 in /usr/local/lib/python3.11/dist-packages (from deprecated>=1.2.6->opentelemetry-api==1.29.0->opentelemetry-sdk) (1.17.0)\n",
|
||||
"Requirement already satisfied: grpcio<2.0.0,>=1.63.2 in /usr/local/lib/python3.11/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb-client) (1.69.0)\n",
|
||||
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from posthog>=2.4.0->chromadb-client) (1.17.0)\n",
|
||||
"Collecting monotonic>=1.5 (from posthog>=2.4.0->chromadb-client)\n",
|
||||
" Downloading monotonic-1.6-py2.py3-none-any.whl.metadata (1.5 kB)\n",
|
||||
"Collecting backoff>=1.10.0 (from posthog>=2.4.0->chromadb-client)\n",
|
||||
" Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)\n",
|
||||
"Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.6.3->together) (0.7.0)\n",
|
||||
"Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.6.3->together) (2.27.2)\n",
|
||||
"Requirement already satisfied: python-dotenv>=0.21.0 in /usr/local/lib/python3.11/dist-packages (from pydantic-settings>=2.6.1->mcp) (1.0.1)\n",
|
||||
"Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich<14.0.0,>=13.8.1->together) (3.0.0)\n",
|
||||
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich<14.0.0,>=13.8.1->together) (2.18.0)\n",
|
||||
"Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from typer<0.16,>=0.9->together) (1.5.4)\n",
|
||||
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema->autoevals) (2024.10.1)\n",
|
||||
"Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema->autoevals) (0.35.1)\n",
|
||||
"Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema->autoevals) (0.22.3)\n",
|
||||
"Collecting rapidfuzz<4.0.0,>=3.9.0 (from levenshtein->autoevals)\n",
|
||||
" Downloading rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n",
|
||||
"Requirement already satisfied: zipp>=3.20 in /usr/local/lib/python3.11/dist-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.29.0->opentelemetry-sdk) (3.21.0)\n",
|
||||
"Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich<14.0.0,>=13.8.1->together) (0.1.2)\n",
|
||||
"Downloading together-1.3.11-py3-none-any.whl (70 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m70.6/70.6 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading datasets-3.2.0-py3-none-any.whl (480 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m20.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading redis-5.2.1-py3-none-any.whl (261 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m261.5/261.5 kB\u001b[0m \u001b[31m25.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading chromadb_client-0.6.3-py3-none-any.whl (609 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m609.2/609.2 kB\u001b[0m \u001b[31m38.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.0/3.0 MB\u001b[0m \u001b[31m100.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading mcp-1.2.0-py3-none-any.whl (66 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.5/66.5 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl (4.5 MB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m106.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading faiss_cpu-1.9.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m27.5/27.5 MB\u001b[0m \u001b[31m78.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading opentelemetry_exporter_otlp_proto_http-1.29.0-py3-none-any.whl (17 kB)\n",
|
||||
"Downloading opentelemetry_exporter_otlp_proto_common-1.29.0-py3-none-any.whl (18 kB)\n",
|
||||
"Downloading opentelemetry_proto-1.29.0-py3-none-any.whl (55 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.8/55.8 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading autoevals-0.0.117-py3-none-any.whl (41 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.4/41.4 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading braintrust_core-0.0.58-py3-none-any.whl (4.4 kB)\n",
|
||||
"Downloading pypdf-5.1.0-py3-none-any.whl (297 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m298.0/298.0 kB\u001b[0m \u001b[31m24.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading aiosqlite-0.20.0-py3-none-any.whl (15 kB)\n",
|
||||
"Downloading fastapi-0.115.6-py3-none-any.whl (94 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.8/94.8 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading uvicorn-0.34.0-py3-none-any.whl (62 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.3/62.3 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading anyio-4.8.0-py3-none-any.whl (96 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m96.0/96.0 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m17.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading httpx_sse-0.4.0-py3-none-any.whl (7.8 kB)\n",
|
||||
"Downloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m14.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading opentelemetry_exporter_otlp_proto_grpc-1.29.0-py3-none-any.whl (18 kB)\n",
|
||||
"Downloading overrides-7.7.0-py3-none-any.whl (17 kB)\n",
|
||||
"Downloading posthog-3.8.4-py2.py3-none-any.whl (69 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.8/69.8 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading pydantic_settings-2.7.1-py3-none-any.whl (29 kB)\n",
|
||||
"Downloading sse_starlette-2.2.1-py3-none-any.whl (10 kB)\n",
|
||||
"Downloading starlette-0.41.3-py3-none-any.whl (73 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.2/73.2 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading chevron-0.14.0-py3-none-any.whl (11 kB)\n",
|
||||
"Downloading levenshtein-0.26.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (162 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m162.7/162.7 kB\u001b[0m \u001b[31m17.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m21.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading backoff-2.2.1-py3-none-any.whl (15 kB)\n",
|
||||
"Downloading monotonic-1.6-py2.py3-none-any.whl (8.2 kB)\n",
|
||||
"Downloading protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl (319 kB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m319.7/319.7 kB\u001b[0m \u001b[31m28.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hDownloading rapidfuzz-3.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n",
|
||||
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m84.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
||||
"\u001b[?25hInstalling collected packages: monotonic, chevron, xxhash, uvicorn, redis, rapidfuzz, pypdf, psycopg2-binary, protobuf, pillow, overrides, httpx-sse, fsspec, faiss-cpu, dill, braintrust_core, backoff, anyio, aiosqlite, starlette, posthog, opentelemetry-proto, multiprocess, levenshtein, sse-starlette, pydantic-settings, opentelemetry-exporter-otlp-proto-common, fastapi, together, mcp, datasets, autoevals, opentelemetry-exporter-otlp-proto-http, opentelemetry-exporter-otlp-proto-grpc, chromadb-client\n",
|
||||
" Attempting uninstall: protobuf\n",
|
||||
" Found existing installation: protobuf 4.25.5\n",
|
||||
" Uninstalling protobuf-4.25.5:\n",
|
||||
" Successfully uninstalled protobuf-4.25.5\n",
|
||||
" Attempting uninstall: pillow\n",
|
||||
" Found existing installation: pillow 11.1.0\n",
|
||||
" Uninstalling pillow-11.1.0:\n",
|
||||
" Successfully uninstalled pillow-11.1.0\n",
|
||||
" Attempting uninstall: fsspec\n",
|
||||
" Found existing installation: fsspec 2024.10.0\n",
|
||||
" Uninstalling fsspec-2024.10.0:\n",
|
||||
" Successfully uninstalled fsspec-2024.10.0\n",
|
||||
" Attempting uninstall: anyio\n",
|
||||
" Found existing installation: anyio 3.7.1\n",
|
||||
" Uninstalling anyio-3.7.1:\n",
|
||||
" Successfully uninstalled anyio-3.7.1\n",
|
||||
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
||||
"jupyter-server 1.24.0 requires anyio<4,>=3.1.0, but you have anyio 4.8.0 which is incompatible.\n",
|
||||
"gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\n",
|
||||
"tensorflow 2.17.1 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 5.29.3 which is incompatible.\u001b[0m\u001b[31m\n",
|
||||
"\u001b[0mSuccessfully installed aiosqlite-0.20.0 anyio-4.8.0 autoevals-0.0.117 backoff-2.2.1 braintrust_core-0.0.58 chevron-0.14.0 chromadb-client-0.6.3 datasets-3.2.0 dill-0.3.8 faiss-cpu-1.9.0.post1 fastapi-0.115.6 fsspec-2024.9.0 httpx-sse-0.4.0 levenshtein-0.26.1 mcp-1.2.0 monotonic-1.6 multiprocess-0.70.16 opentelemetry-exporter-otlp-proto-common-1.29.0 opentelemetry-exporter-otlp-proto-grpc-1.29.0 opentelemetry-exporter-otlp-proto-http-1.29.0 opentelemetry-proto-1.29.0 overrides-7.7.0 pillow-10.4.0 posthog-3.8.4 protobuf-5.29.3 psycopg2-binary-2.9.10 pydantic-settings-2.7.1 pypdf-5.1.0 rapidfuzz-3.11.0 redis-5.2.1 sse-starlette-2.2.1 starlette-0.41.3 together-1.3.11 uvicorn-0.34.0 xxhash-3.5.0\n",
|
||||
"torch --index-url https://download.pytorch.org/whl/cpu\n",
|
||||
"Looking in indexes: https://download.pytorch.org/whl/cpu\n",
|
||||
"Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (2.5.1+cu121)\n",
|
||||
"Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch) (3.16.1)\n",
|
||||
"Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.11/dist-packages (from torch) (4.12.2)\n",
|
||||
"Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch) (3.4.2)\n",
|
||||
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.5)\n",
|
||||
"Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch) (2024.9.0)\n",
|
||||
"Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.105)\n",
|
||||
"Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.105)\n",
|
||||
"Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.105)\n",
|
||||
"Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch) (9.1.0.70)\n",
|
||||
"Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.3.1)\n",
|
||||
"Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch) (11.0.2.54)\n",
|
||||
"Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch) (10.3.2.106)\n",
|
||||
"Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch) (11.4.5.107)\n",
|
||||
"Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.0.106)\n",
|
||||
"Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch) (2.21.5)\n",
|
||||
"Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch) (12.1.105)\n",
|
||||
"Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.0)\n",
|
||||
"Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch) (1.13.1)\n",
|
||||
"Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch) (12.6.85)\n",
|
||||
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n",
|
||||
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch) (3.0.2)\n",
|
||||
"sentence-transformers --no-deps\n",
|
||||
"Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.11/dist-packages (3.3.1)\n",
|
||||
"\u001b[32mBuild Successful!\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# NBVAL_SKIP\n",
|
||||
"\n",
|
||||
"# This will build all the dependencies you will need\n",
|
||||
"!llama stack build --template together --image-type venv"
|
||||
]
|
||||
|
@ -571,7 +145,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 4,
|
||||
"id": "E1UFuJC570Tk",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
|
@ -1125,11 +699,8 @@
|
|||
" if not api_key:\n",
|
||||
" raise ValueError(f\"{key} environment variable is empty\")\n",
|
||||
" except KeyError:\n",
|
||||
" raise KeyError(\n",
|
||||
" f\"{key} environment variable is not set. \"\n",
|
||||
" \"Please set your API key using in userdata (if using google colab notebook)\"\n",
|
||||
" f\"or using `export {key}='your-api-key-here'`\"\n",
|
||||
" ) from None\n",
|
||||
" api_key = input(f\"{key} environment variable is not set. Please enter your API key: \")\n",
|
||||
" os.environ[key] = api_key\n",
|
||||
"\n",
|
||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
||||
"client = LlamaStackAsLibraryClient(\"together\", provider_data = {\"tavily_search_api_key\": os.environ['TAVILY_SEARCH_API_KEY']})\n",
|
||||
|
@ -1150,7 +721,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"id": "ruO9jQna_t_S",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
|
@ -1211,7 +782,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"id": "LINBvv8lwTJh",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
|
@ -1228,7 +799,7 @@
|
|||
"'meta-llama/Llama-3.1-70B-Instruct'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -1253,7 +824,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"id": "77c29dba",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
|
@ -1267,7 +838,7 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Here's a two-sentence poem about a llama:\n",
|
||||
"Here is a two-sentence poem about a llama:\n",
|
||||
"\n",
|
||||
"With gentle eyes and a soft, fuzzy face,\n",
|
||||
"The llama roams, a peaceful, gentle pace.\n"
|
||||
|
@ -2084,13 +1655,14 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"import uuid\n",
|
||||
"from llama_stack_client.lib.agents.agent import Agent\n",
|
||||
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
|
||||
"from llama_stack_client.types.agent_create_params import AgentConfig\n",
|
||||
"from termcolor import cprint\n",
|
||||
"from llama_stack_client.types import Document\n",
|
||||
"\n",
|
||||
"urls = [\"chat.rst\", \"llama3.rst\", \"datasets.rst\", \"lora_finetune.rst\"]\n",
|
||||
"urls = [\"chat.rst\", \"llama3.rst\", \"memory_optimizations.rst\", \"lora_finetune.rst\"]\n",
|
||||
"documents = [\n",
|
||||
" Document(\n",
|
||||
" document_id=f\"num-{i}\",\n",
|
||||
|
@ -2101,7 +1673,7 @@
|
|||
" for i, url in enumerate(urls)\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"vector_db_id = \"test-vector-db\"\n",
|
||||
"vector_db_id = f\"test-vector-db-{uuid.uuid4().hex}\"\n",
|
||||
"client.vector_dbs.register(\n",
|
||||
" vector_db_id=vector_db_id,\n",
|
||||
" embedding_model=\"all-MiniLM-L6-v2\",\n",
|
||||
|
@ -2398,6 +1970,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"# NBVAL_SKIP\n",
|
||||
"!pip install colab-xterm #https://pypi.org/project/colab-xterm/\n",
|
||||
"%load_ext colabxterm"
|
||||
]
|
||||
|
@ -2774,7 +2347,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"# NBVAL_SKIP\n",
|
||||
"%xterm\n",
|
||||
"# touch /content/foo\n",
|
||||
"# touch /content/bar\n",
|
||||
|
@ -2800,6 +2373,7 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# NBVAL_SKIP\n",
|
||||
"from llama_stack_client.types.shared_params.url import URL\n",
|
||||
"client.toolgroups.register(\n",
|
||||
" toolgroup_id=\"mcp::filesystem\",\n",
|
||||
|
@ -3170,6 +2744,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"# NBVAL_SKIP\n",
|
||||
"from llama_stack_client.lib.agents.agent import Agent\n",
|
||||
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
|
||||
"from llama_stack_client.types.agent_create_params import AgentConfig\n",
|
||||
|
@ -3523,7 +3098,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"# NBVAL_SKIP \n",
|
||||
"# NBVAL_SKIP\n",
|
||||
"print(f\"Getting traces for session_id={session_id}\")\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
|
@ -3821,6 +3396,231 @@
|
|||
"response = client.scoring.score(input_rows=rows, scoring_functions=scoring_params)\n",
|
||||
"pprint(response)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ad077440",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Image Understanding with Llama 3.2\n",
|
||||
"\n",
|
||||
"Below is a complete example of using Together's Llama Stack 0.1 server at https://llama-stack.together.ai to ask Llama 3.2 questions about an image."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "82e381ec",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 4.1 Setup and helpers\n",
|
||||
"\n",
|
||||
"Below we install the Llama Stack client 0.1, download the example image, define two image helpers, and set Llama Stack Together server URL and Llama 3.2 model name.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "865fc5a8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install llama-stack-client==0.1.0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "44e05e16",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!wget https://raw.githubusercontent.com/meta-llama/llama-models/refs/heads/main/Llama_Repo.jpeg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "469750f7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from PIL import Image\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"def display_image(path):\n",
|
||||
" img = Image.open(path)\n",
|
||||
" plt.imshow(img)\n",
|
||||
" plt.axis('off')\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
"display_image(\"Llama_Repo.jpeg\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a2c1e1c2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"\n",
|
||||
"def encode_image(image_path):\n",
|
||||
" with open(image_path, \"rb\") as image_file:\n",
|
||||
" base64_string = base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
|
||||
" base64_url = f\"data:image/png;base64,{base64_string}\"\n",
|
||||
" return base64_url"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c565f99e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"\n",
|
||||
"LLAMA_STACK_API_TOGETHER_URL=\"https://llama-stack.together.ai\"\n",
|
||||
"LLAMA32_11B_INSTRUCT = \"meta-llama/Llama-3.2-11B-Vision-Instruct\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7737cd41",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 4.2 Using Llama Stack Chat API\n",
|
||||
"\n",
|
||||
"The code below uses the Llama Stack 0.1's chat API to interact with Llama 3.2:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d7914894",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_stack_client.lib.inference.event_logger import EventLogger\n",
|
||||
"\n",
|
||||
"async def run_main(image_path: str, prompt):\n",
|
||||
" client = LlamaStackClient(\n",
|
||||
" base_url=LLAMA_STACK_API_TOGETHER_URL,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"image\",\n",
|
||||
" \"image\": {\n",
|
||||
" \"url\": {\n",
|
||||
" \"uri\": encode_image(image_path)\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": prompt,\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" response = client.inference.chat_completion(\n",
|
||||
" messages=[message],\n",
|
||||
" model_id=LLAMA32_11B_INSTRUCT,\n",
|
||||
" stream=False,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(response.completion_message.content.lower().strip())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4ee09b97",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"await run_main(\"Llama_Repo.jpeg\",\n",
|
||||
" \"How many different colors are those llamas?\\\n",
|
||||
" What are those colors?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e741d7b9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 4.3 Using Llama Stack Agent API\n",
|
||||
"\n",
|
||||
"The code below uses the Llama Stack 0.1's Agent API to interact with Llama 3.2:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f9a83275",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_stack_client.lib.agents.agent import Agent\n",
|
||||
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
|
||||
"from llama_stack_client.types.agent_create_params import AgentConfig\n",
|
||||
"\n",
|
||||
"async def run_main(image_path, prompt):\n",
|
||||
" base64_image = encode_image(image_path)\n",
|
||||
"\n",
|
||||
" client = LlamaStackClient(\n",
|
||||
" base_url=LLAMA_STACK_API_TOGETHER_URL,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" agent_config = AgentConfig(\n",
|
||||
" model=LLAMA32_11B_INSTRUCT,\n",
|
||||
" instructions=\"You are a helpful assistant\",\n",
|
||||
" enable_session_persistence=False,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" agent = Agent(client, agent_config)\n",
|
||||
" session_id = agent.create_session(\"test-session\")\n",
|
||||
"\n",
|
||||
" response = agent.create_turn(\n",
|
||||
" messages=[{\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"image\",\n",
|
||||
" \"image\": {\n",
|
||||
" \"url\": {\n",
|
||||
" \"uri\": encode_image(image_path)\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": prompt,\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" }],\n",
|
||||
" session_id=session_id,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" for log in EventLogger().log(response):\n",
|
||||
" log.print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "15d0098b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"await run_main(\"Llama_Repo.jpeg\",\n",
|
||||
" \"How many different colors are those llamas?\\\n",
|
||||
" What are those colors?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
@ -3830,7 +3630,8 @@
|
|||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "toolchain",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
"id": "hTIfyoGtjoWD"
|
||||
},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/drive/1UvR9m2KTinvlDXeOWfS2HBU4X72LAjTz?usp=sharing)\n",
|
||||
"[](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb)\n",
|
||||
"\n",
|
||||
"# Llama Stack Benchmark Evals\n",
|
||||
"\n",
|
||||
|
@ -1383,7 +1383,8 @@
|
|||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "master",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
|
|
|
@ -36,6 +36,16 @@ from .pyopenapi.specification import Info, Server # noqa: E402
|
|||
from .pyopenapi.utility import Specification # noqa: E402
|
||||
|
||||
|
||||
def str_presenter(dumper, data):
|
||||
if data.startswith(f"/{LLAMA_STACK_API_VERSION}") or data.startswith(
|
||||
"#/components/schemas/"
|
||||
):
|
||||
style = None
|
||||
else:
|
||||
style = ">" if "\n" in data or len(data) > 40 else None
|
||||
return dumper.represent_scalar("tag:yaml.org,2002:str", data, style=style)
|
||||
|
||||
|
||||
def main(output_dir: str):
|
||||
output_dir = Path(output_dir)
|
||||
if not output_dir.exists():
|
||||
|
@ -69,7 +79,8 @@ def main(output_dir: str):
|
|||
y.sequence_dash_offset = 2
|
||||
y.width = 80
|
||||
y.allow_unicode = True
|
||||
y.explicit_start = True
|
||||
y.representer.add_representer(str, str_presenter)
|
||||
|
||||
y.dump(
|
||||
spec.get_json(),
|
||||
fp,
|
||||
|
|
|
@ -4,10 +4,10 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import collections
|
||||
import hashlib
|
||||
import ipaddress
|
||||
import typing
|
||||
from dataclasses import make_dataclass
|
||||
from typing import Any, Dict, Set, Union
|
||||
|
||||
from ..strong_typing.core import JsonType
|
||||
|
@ -177,20 +177,37 @@ class ContentBuilder:
|
|||
) -> Dict[str, MediaType]:
|
||||
"Creates the content subtree for a request or response."
|
||||
|
||||
def has_iterator_type(t):
|
||||
if typing.get_origin(t) is typing.Union:
|
||||
return any(has_iterator_type(a) for a in typing.get_args(t))
|
||||
def is_iterator_type(t):
|
||||
return "StreamChunk" in str(t)
|
||||
|
||||
def get_media_type(t):
|
||||
if is_generic_list(t):
|
||||
return "application/jsonl"
|
||||
elif is_iterator_type(t):
|
||||
return "text/event-stream"
|
||||
else:
|
||||
# TODO: needs a proper fix where we let all types correctly flow upwards
|
||||
# and then test against AsyncIterator
|
||||
return "StreamChunk" in str(t)
|
||||
return "application/json"
|
||||
|
||||
if typing.get_origin(payload_type) is typing.Union:
|
||||
media_types = []
|
||||
item_types = []
|
||||
for x in typing.get_args(payload_type):
|
||||
media_types.append(get_media_type(x))
|
||||
item_types.append(x)
|
||||
|
||||
if len(set(media_types)) == 1:
|
||||
# all types have the same media type
|
||||
return {media_types[0]: self.build_media_type(payload_type, examples)}
|
||||
else:
|
||||
# different types have different media types
|
||||
return {
|
||||
media_type: self.build_media_type(item_type, examples)
|
||||
for media_type, item_type in zip(media_types, item_types)
|
||||
}
|
||||
|
||||
if is_generic_list(payload_type):
|
||||
media_type = "application/jsonl"
|
||||
item_type = unwrap_generic_list(payload_type)
|
||||
elif has_iterator_type(payload_type):
|
||||
item_type = payload_type
|
||||
media_type = "text/event-stream"
|
||||
else:
|
||||
media_type = "application/json"
|
||||
item_type = payload_type
|
||||
|
@ -233,7 +250,9 @@ class ContentBuilder:
|
|||
value = sample_transformer(object_to_json(example))
|
||||
|
||||
hash_string = (
|
||||
hashlib.md5(json_dump_string(value).encode("utf-8")).digest().hex()
|
||||
hashlib.sha256(json_dump_string(value).encode("utf-8"))
|
||||
.digest()
|
||||
.hex()[:16]
|
||||
)
|
||||
name = f"ex-{hash_string}"
|
||||
|
||||
|
@ -276,6 +295,20 @@ class StatusResponse:
|
|||
examples: List[Any] = dataclasses.field(default_factory=list)
|
||||
|
||||
|
||||
def create_docstring_for_request(
|
||||
request_name: str, fields: List[Tuple[str, type, Any]], doc_params: Dict[str, str]
|
||||
) -> str:
|
||||
"""Creates a ReST-style docstring for a dynamically generated request dataclass."""
|
||||
lines = ["\n"] # Short description
|
||||
|
||||
# Add parameter documentation in ReST format
|
||||
for name, type_ in fields:
|
||||
desc = doc_params.get(name, "")
|
||||
lines.append(f":param {name}: {desc}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
class ResponseBuilder:
|
||||
content_builder: ContentBuilder
|
||||
|
||||
|
@ -493,11 +526,24 @@ class Generator:
|
|||
first = next(iter(op.request_params))
|
||||
request_name, request_type = first
|
||||
|
||||
from dataclasses import make_dataclass
|
||||
|
||||
op_name = "".join(word.capitalize() for word in op.name.split("_"))
|
||||
request_name = f"{op_name}Request"
|
||||
request_type = make_dataclass(request_name, op.request_params)
|
||||
fields = [
|
||||
(
|
||||
name,
|
||||
type_,
|
||||
)
|
||||
for name, type_ in op.request_params
|
||||
]
|
||||
request_type = make_dataclass(
|
||||
request_name,
|
||||
fields,
|
||||
namespace={
|
||||
"__doc__": create_docstring_for_request(
|
||||
request_name, fields, doc_params
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
requestBody = RequestBody(
|
||||
content={
|
||||
|
@ -598,10 +644,14 @@ class Generator:
|
|||
else:
|
||||
callbacks = None
|
||||
|
||||
description = "\n".join(
|
||||
filter(None, [doc_string.short_description, doc_string.long_description])
|
||||
)
|
||||
return Operation(
|
||||
tags=[op.defining_class.__name__],
|
||||
summary=doc_string.short_description,
|
||||
description=doc_string.long_description,
|
||||
summary=None,
|
||||
# summary=doc_string.short_description,
|
||||
description=description,
|
||||
parameters=parameters,
|
||||
requestBody=requestBody,
|
||||
responses=responses,
|
||||
|
@ -633,6 +683,7 @@ class Generator:
|
|||
raise NotImplementedError(f"unknown HTTP method: {op.http_method}")
|
||||
|
||||
route = op.get_route()
|
||||
route = route.replace(":path", "")
|
||||
print(f"route: {route}")
|
||||
if route in paths:
|
||||
paths[route].update(pathItem)
|
||||
|
@ -650,12 +701,6 @@ class Generator:
|
|||
)
|
||||
)
|
||||
|
||||
# types that are produced/consumed by operations
|
||||
type_tags = [
|
||||
self._build_type_tag(ref, schema)
|
||||
for ref, schema in self.schema_builder.schemas.items()
|
||||
]
|
||||
|
||||
# types that are emitted by events
|
||||
event_tags: List[Tag] = []
|
||||
events = get_endpoint_events(self.endpoint)
|
||||
|
@ -682,7 +727,6 @@ class Generator:
|
|||
# list all operations and types
|
||||
tags: List[Tag] = []
|
||||
tags.extend(operation_tags)
|
||||
tags.extend(type_tags)
|
||||
tags.extend(event_tags)
|
||||
for extra_tag_group in extra_tag_groups.values():
|
||||
tags.extend(extra_tag_group)
|
||||
|
@ -697,13 +741,6 @@ class Generator:
|
|||
tags=sorted(tag.name for tag in operation_tags),
|
||||
)
|
||||
)
|
||||
if type_tags:
|
||||
tag_groups.append(
|
||||
TagGroup(
|
||||
name=self.options.map("Types"),
|
||||
tags=sorted(tag.name for tag in type_tags),
|
||||
)
|
||||
)
|
||||
if event_tags:
|
||||
tag_groups.append(
|
||||
TagGroup(
|
||||
|
|
|
@ -130,6 +130,8 @@ class _FormatParameterExtractor:
|
|||
|
||||
def _get_route_parameters(route: str) -> List[str]:
|
||||
extractor = _FormatParameterExtractor()
|
||||
# Replace all occurrences of ":path" with empty string
|
||||
route = route.replace(":path", "")
|
||||
route.format_map(extractor)
|
||||
return extractor.keys
|
||||
|
||||
|
|
|
@ -6,36 +6,36 @@
|
|||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>OpenAPI specification</title>
|
||||
<link href="https://fonts.googleapis.com/css?family=Montserrat:300,400,700|Roboto:300,400,700" rel="stylesheet">
|
||||
<script type="module" src="https://unpkg.com/@stoplight/elements/web-components.min.js"></script>
|
||||
<link rel="stylesheet" href="https://unpkg.com/@stoplight/elements/styles.min.css">
|
||||
<style>
|
||||
body {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
height: 100vh;
|
||||
}
|
||||
|
||||
elements-api {
|
||||
height: 100%;
|
||||
}
|
||||
</style>
|
||||
<script defer="defer" src="https://cdn.redoc.ly/redoc/latest/bundles/redoc.standalone.js"></script>
|
||||
<script defer="defer">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<elements-api id="openapi-container" router="hash" layout="sidebar" hideExport="true"
|
||||
hideInternal="true"></elements-api>
|
||||
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", function () {
|
||||
spec = { /* OPENAPI_SPECIFICATION */ };
|
||||
options = {
|
||||
downloadFileName: "openapi.json",
|
||||
expandResponses: "200",
|
||||
expandSingleSchemaField: true,
|
||||
jsonSampleExpandLevel: "all",
|
||||
schemaExpansionLevel: "all",
|
||||
};
|
||||
element = document.getElementById("openapi-container");
|
||||
Redoc.init(spec, options, element);
|
||||
const spec = { /* OPENAPI_SPECIFICATION */ };
|
||||
const element = document.getElementById("openapi-container");
|
||||
element.apiDescriptionDocument = spec;
|
||||
|
||||
if (spec.info && spec.info.title) {
|
||||
document.title = spec.info.title;
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div id="openapi-container"></div>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
|
|
|
@ -29,4 +29,5 @@ fi
|
|||
|
||||
stack_dir=$(dirname $(dirname $THIS_DIR))
|
||||
models_dir=$(dirname $stack_dir)/llama-models
|
||||
PYTHONPATH=$PYTHONPATH:$stack_dir:$models_dir python -m docs.openapi_generator.generate $(dirname $THIS_DIR)/resources
|
||||
PYTHONPATH=$PYTHONPATH:$stack_dir:$models_dir \
|
||||
python -m docs.openapi_generator.generate $(dirname $THIS_DIR)/_static
|
||||
|
|
|
@ -109,10 +109,10 @@ def get_class_property_docstrings(
|
|||
def docstring_to_schema(data_type: type) -> Schema:
|
||||
short_description, long_description = get_class_docstrings(data_type)
|
||||
schema: Schema = {}
|
||||
if short_description:
|
||||
schema["title"] = short_description
|
||||
if long_description:
|
||||
schema["description"] = long_description
|
||||
|
||||
description = "\n".join(filter(None, [short_description, long_description]))
|
||||
if description:
|
||||
schema["description"] = description
|
||||
return schema
|
||||
|
||||
|
||||
|
@ -248,7 +248,9 @@ class JsonSchemaGenerator:
|
|||
type_schema.update(self._metadata_to_schema(m))
|
||||
return type_schema
|
||||
|
||||
def _simple_type_to_schema(self, typ: TypeLike) -> Optional[Schema]:
|
||||
def _simple_type_to_schema(
|
||||
self, typ: TypeLike, json_schema_extra: Optional[dict] = None
|
||||
) -> Optional[Schema]:
|
||||
"""
|
||||
Returns the JSON schema associated with a simple, unrestricted type.
|
||||
|
||||
|
@ -264,6 +266,11 @@ class JsonSchemaGenerator:
|
|||
elif typ is float:
|
||||
return {"type": "number"}
|
||||
elif typ is str:
|
||||
if json_schema_extra and "contentEncoding" in json_schema_extra:
|
||||
return {
|
||||
"type": "string",
|
||||
"contentEncoding": json_schema_extra["contentEncoding"],
|
||||
}
|
||||
return {"type": "string"}
|
||||
elif typ is bytes:
|
||||
return {"type": "string", "contentEncoding": "base64"}
|
||||
|
@ -303,7 +310,12 @@ class JsonSchemaGenerator:
|
|||
# not a simple type
|
||||
return None
|
||||
|
||||
def type_to_schema(self, data_type: TypeLike, force_expand: bool = False) -> Schema:
|
||||
def type_to_schema(
|
||||
self,
|
||||
data_type: TypeLike,
|
||||
force_expand: bool = False,
|
||||
json_schema_extra: Optional[dict] = None,
|
||||
) -> Schema:
|
||||
"""
|
||||
Returns the JSON schema associated with a type.
|
||||
|
||||
|
@ -313,7 +325,7 @@ class JsonSchemaGenerator:
|
|||
"""
|
||||
|
||||
# short-circuit for common simple types
|
||||
schema = self._simple_type_to_schema(data_type)
|
||||
schema = self._simple_type_to_schema(data_type, json_schema_extra)
|
||||
if schema is not None:
|
||||
return schema
|
||||
|
||||
|
@ -486,15 +498,9 @@ class JsonSchemaGenerator:
|
|||
property_docstrings = get_class_property_docstrings(
|
||||
typ, self.options.property_description_fun
|
||||
)
|
||||
|
||||
properties: Dict[str, Schema] = {}
|
||||
required: List[str] = []
|
||||
for property_name, property_type in get_class_properties(typ):
|
||||
defaults = {}
|
||||
if "model_fields" in members:
|
||||
f = members["model_fields"]
|
||||
defaults = {k: finfo.default for k, finfo in f.items()}
|
||||
|
||||
# rename property if an alias name is specified
|
||||
alias = get_annotation(property_type, Alias)
|
||||
if alias:
|
||||
|
@ -502,11 +508,22 @@ class JsonSchemaGenerator:
|
|||
else:
|
||||
output_name = property_name
|
||||
|
||||
defaults = {}
|
||||
json_schema_extra = None
|
||||
if "model_fields" in members:
|
||||
f = members["model_fields"]
|
||||
defaults = {k: finfo.default for k, finfo in f.items()}
|
||||
json_schema_extra = f.get(output_name, None).json_schema_extra
|
||||
|
||||
if is_type_optional(property_type):
|
||||
optional_type: type = unwrap_optional_type(property_type)
|
||||
property_def = self.type_to_schema(optional_type)
|
||||
property_def = self.type_to_schema(
|
||||
optional_type, json_schema_extra=json_schema_extra
|
||||
)
|
||||
else:
|
||||
property_def = self.type_to_schema(property_type)
|
||||
property_def = self.type_to_schema(
|
||||
property_type, json_schema_extra=json_schema_extra
|
||||
)
|
||||
required.append(output_name)
|
||||
|
||||
# check if attribute has a default value initializer
|
||||
|
@ -531,6 +548,7 @@ class JsonSchemaGenerator:
|
|||
# add property docstring if available
|
||||
property_doc = property_docstrings.get(property_name)
|
||||
if property_doc:
|
||||
# print(output_name, property_doc)
|
||||
property_def.pop("title", None)
|
||||
property_def["description"] = property_doc
|
||||
|
||||
|
|
|
@ -6,6 +6,6 @@ Here's a collection of comprehensive guides, examples, and resources for buildin
|
|||
|
||||
Try out Llama Stack's capabilities through our detailed Jupyter notebooks:
|
||||
|
||||
* [Building AI Applications Notebook](./notebooks/Llama_Stack_Building_AI_Applications.ipynb) - A comprehensive guide to building production-ready AI applications using Llama Stack
|
||||
* [Building AI Applications Notebook](./getting_started.ipynb) - A comprehensive guide to building production-ready AI applications using Llama Stack
|
||||
* [Benchmark Evaluations Notebook](./notebooks/Llama_Stack_Benchmark_Evals.ipynb) - Detailed performance evaluations and benchmarking results
|
||||
* [Zero-to-Hero Guide](./notebooks/Llama_Stack_Zero_to_Hero_Guide.ipynb) - Step-by-step guide for getting started with Llama Stack
|
||||
* [Zero-to-Hero Guide](./zero_to_hero_guide) - Step-by-step guide for getting started with Llama Stack
|
||||
|
|
|
@ -77,7 +77,7 @@ agent_config = AgentConfig(
|
|||
instructions="You are a helpful assistant",
|
||||
# Enable both RAG and tool usage
|
||||
toolgroups=[
|
||||
{"name": "builtin::rag", "args": {"vector_db_ids": ["my_docs"]}}.
|
||||
{"name": "builtin::rag", "args": {"vector_db_ids": ["my_docs"]}},
|
||||
"builtin::code_interpreter",
|
||||
],
|
||||
# Configure safety
|
||||
|
@ -86,13 +86,9 @@ agent_config = AgentConfig(
|
|||
# Control the inference loop
|
||||
max_infer_iters=5,
|
||||
sampling_params={
|
||||
"strategy": {
|
||||
"type": "top_p",
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95
|
||||
},
|
||||
"max_tokens": 2048
|
||||
}
|
||||
"strategy": {"type": "top_p", "temperature": 0.7, "top_p": 0.95},
|
||||
"max_tokens": 2048,
|
||||
},
|
||||
)
|
||||
|
||||
agent = Agent(client, agent_config)
|
||||
|
@ -101,11 +97,13 @@ session_id = agent.create_session("monitored_session")
|
|||
# Stream the agent's execution steps
|
||||
response = agent.create_turn(
|
||||
messages=[{"role": "user", "content": "Analyze this code and run it"}],
|
||||
attachments=[{
|
||||
"content": "https://raw.githubusercontent.com/example/code.py",
|
||||
"mime_type": "text/plain"
|
||||
}],
|
||||
session_id=session_id
|
||||
attachments=[
|
||||
{
|
||||
"content": "https://raw.githubusercontent.com/example/code.py",
|
||||
"mime_type": "text/plain",
|
||||
}
|
||||
],
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# Monitor each step of execution
|
||||
|
|
|
@ -15,6 +15,7 @@ This first example walks you through how to evaluate a model candidate served by
|
|||
|
||||
```python
|
||||
import datasets
|
||||
|
||||
ds = datasets.load_dataset(path="llamastack/mmmu", name="Agriculture", split="dev")
|
||||
ds = ds.select_columns(["chat_completion_input", "input_query", "expected_answer"])
|
||||
eval_rows = ds.to_pandas().to_dict(orient="records")
|
||||
|
@ -43,7 +44,7 @@ system_message = {
|
|||
client.eval_tasks.register(
|
||||
eval_task_id="meta-reference::mmmu",
|
||||
dataset_id=f"mmmu-{subset}-{split}",
|
||||
scoring_functions=["basic::regex_parser_multiple_choice_answer"]
|
||||
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
||||
)
|
||||
|
||||
response = client.eval.evaluate_rows(
|
||||
|
@ -62,9 +63,9 @@ response = client.eval.evaluate_rows(
|
|||
"max_tokens": 4096,
|
||||
"repeat_penalty": 1.0,
|
||||
},
|
||||
"system_message": system_message
|
||||
}
|
||||
}
|
||||
"system_message": system_message,
|
||||
},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -88,7 +89,7 @@ _ = client.datasets.register(
|
|||
"input_query": {"type": "string"},
|
||||
"expected_answer": {"type": "string"},
|
||||
"chat_completion_input": {"type": "chat_completion_input"},
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
eval_rows = client.datasetio.get_rows_paginated(
|
||||
|
@ -101,7 +102,7 @@ eval_rows = client.datasetio.get_rows_paginated(
|
|||
client.eval_tasks.register(
|
||||
eval_task_id="meta-reference::simpleqa",
|
||||
dataset_id=simpleqa_dataset_id,
|
||||
scoring_functions=["llm-as-judge::405b-simpleqa"]
|
||||
scoring_functions=["llm-as-judge::405b-simpleqa"],
|
||||
)
|
||||
|
||||
response = client.eval.evaluate_rows(
|
||||
|
@ -120,8 +121,8 @@ response = client.eval.evaluate_rows(
|
|||
"max_tokens": 4096,
|
||||
"repeat_penalty": 1.0,
|
||||
},
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -144,14 +145,14 @@ agent_config = {
|
|||
{
|
||||
"type": "brave_search",
|
||||
"engine": "tavily",
|
||||
"api_key": userdata.get("TAVILY_SEARCH_API_KEY")
|
||||
"api_key": userdata.get("TAVILY_SEARCH_API_KEY"),
|
||||
}
|
||||
],
|
||||
"tool_choice": "auto",
|
||||
"tool_prompt_format": "json",
|
||||
"input_shields": [],
|
||||
"output_shields": [],
|
||||
"enable_session_persistence": False
|
||||
"enable_session_persistence": False,
|
||||
}
|
||||
|
||||
response = client.eval.evaluate_rows(
|
||||
|
@ -163,7 +164,7 @@ response = client.eval.evaluate_rows(
|
|||
"eval_candidate": {
|
||||
"type": "agent",
|
||||
"config": agent_config,
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
|
|
@ -13,7 +13,7 @@ Here's how to set up basic evaluation:
|
|||
response = client.eval_tasks.register(
|
||||
eval_task_id="my_eval",
|
||||
dataset_id="my_dataset",
|
||||
scoring_functions=["accuracy", "relevance"]
|
||||
scoring_functions=["accuracy", "relevance"],
|
||||
)
|
||||
|
||||
# Run evaluation
|
||||
|
@ -21,16 +21,10 @@ job = client.eval.run_eval(
|
|||
task_id="my_eval",
|
||||
task_config={
|
||||
"type": "app",
|
||||
"eval_candidate": {
|
||||
"type": "agent",
|
||||
"config": agent_config
|
||||
}
|
||||
}
|
||||
"eval_candidate": {"type": "agent", "config": agent_config},
|
||||
},
|
||||
)
|
||||
|
||||
# Get results
|
||||
result = client.eval.job_result(
|
||||
task_id="my_eval",
|
||||
job_id=job.job_id
|
||||
)
|
||||
result = client.eval.job_result(task_id="my_eval", job_id=job.job_id)
|
||||
```
|
||||
|
|
|
@ -4,7 +4,7 @@ Llama Stack provides all the building blocks needed to create sophisticated AI a
|
|||
|
||||
The best way to get started is to look at this notebook which walks through the various APIs (from basic inference, to RAG agents) and how to use them.
|
||||
|
||||
**Notebook**: [Building AI Applications](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb)
|
||||
**Notebook**: [Building AI Applications](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)
|
||||
|
||||
Here are some key topics that will help you build effective agents:
|
||||
|
||||
|
|
|
@ -34,15 +34,15 @@ chunks = [
|
|||
{
|
||||
"document_id": "doc1",
|
||||
"content": "Your document text here",
|
||||
"mime_type": "text/plain"
|
||||
"mime_type": "text/plain",
|
||||
},
|
||||
...
|
||||
]
|
||||
client.vector_io.insert(vector_db_id, chunks)
|
||||
client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks)
|
||||
|
||||
# You can then query for these chunks
|
||||
chunks_response = client.vector_io.query(vector_db_id, query="What do you know about...")
|
||||
|
||||
chunks_response = client.vector_io.query(
|
||||
vector_db_id=vector_db_id, query="What do you know about..."
|
||||
)
|
||||
```
|
||||
|
||||
### Using the RAG Tool
|
||||
|
@ -71,8 +71,8 @@ client.tool_runtime.rag_tool.insert(
|
|||
|
||||
# Query documents
|
||||
results = client.tool_runtime.rag_tool.query(
|
||||
vector_db_id=vector_db_id,
|
||||
query="What do you know about...",
|
||||
vector_db_ids=[vector_db_id],
|
||||
content="What do you know about...",
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -81,19 +81,22 @@ results = client.tool_runtime.rag_tool.query(
|
|||
One of the most powerful patterns is combining agents with RAG capabilities. Here's a complete example:
|
||||
|
||||
```python
|
||||
from llama_stack_client.types.agent_create_params import AgentConfig
|
||||
from llama_stack_client.lib.agents.agent import Agent
|
||||
|
||||
# Configure agent with memory
|
||||
agent_config = AgentConfig(
|
||||
model="Llama3.2-3B-Instruct",
|
||||
model="meta-llama/Llama-3.2-3B-Instruct",
|
||||
instructions="You are a helpful assistant",
|
||||
enable_session_persistence=False,
|
||||
toolgroups=[
|
||||
{
|
||||
"name": "builtin::rag",
|
||||
"args": {
|
||||
"vector_db_ids": [vector_db_id],
|
||||
}
|
||||
},
|
||||
}
|
||||
]
|
||||
],
|
||||
)
|
||||
|
||||
agent = Agent(client, agent_config)
|
||||
|
@ -101,25 +104,21 @@ session_id = agent.create_session("rag_session")
|
|||
|
||||
# Initial document ingestion
|
||||
response = agent.create_turn(
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": "I am providing some documents for reference."
|
||||
}],
|
||||
documents=[
|
||||
dict(
|
||||
content="https://raw.githubusercontent.com/example/doc.rst",
|
||||
mime_type="text/plain"
|
||||
)
|
||||
messages=[
|
||||
{"role": "user", "content": "I am providing some documents for reference."}
|
||||
],
|
||||
session_id=session_id
|
||||
documents=[
|
||||
{
|
||||
"content": "https://raw.githubusercontent.com/example/doc.rst",
|
||||
"mime_type": "text/plain",
|
||||
}
|
||||
],
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# Query with RAG
|
||||
response = agent.create_turn(
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": "What are the key topics in the documents?"
|
||||
}],
|
||||
session_id=session_id
|
||||
messages=[{"role": "user", "content": "What are the key topics in the documents?"}],
|
||||
session_id=session_id,
|
||||
)
|
||||
```
|
||||
|
|
|
@ -5,15 +5,11 @@ Safety is a critical component of any AI application. Llama Stack provides a Shi
|
|||
```python
|
||||
# Register a safety shield
|
||||
shield_id = "content_safety"
|
||||
client.shields.register(
|
||||
shield_id=shield_id,
|
||||
provider_shield_id="llama-guard-basic"
|
||||
)
|
||||
client.shields.register(shield_id=shield_id, provider_shield_id="llama-guard-basic")
|
||||
|
||||
# Run content through shield
|
||||
response = client.safety.run_shield(
|
||||
shield_id=shield_id,
|
||||
messages=[{"role": "user", "content": "User message here"}]
|
||||
shield_id=shield_id, messages=[{"role": "user", "content": "User message here"}]
|
||||
)
|
||||
|
||||
if response.violation:
|
||||
|
|
|
@ -8,24 +8,16 @@ The telemetry system supports three main types of events:
|
|||
- **Unstructured Log Events**: Free-form log messages with severity levels
|
||||
```python
|
||||
unstructured_log_event = UnstructuredLogEvent(
|
||||
message="This is a log message",
|
||||
severity=LogSeverity.INFO
|
||||
message="This is a log message", severity=LogSeverity.INFO
|
||||
)
|
||||
```
|
||||
- **Metric Events**: Numerical measurements with units
|
||||
```python
|
||||
metric_event = MetricEvent(
|
||||
metric="my_metric",
|
||||
value=10,
|
||||
unit="count"
|
||||
)
|
||||
metric_event = MetricEvent(metric="my_metric", value=10, unit="count")
|
||||
```
|
||||
- **Structured Log Events**: System events like span start/end. Extensible to add more structured log types.
|
||||
```python
|
||||
structured_log_event = SpanStartPayload(
|
||||
name="my_span",
|
||||
parent_span_id="parent_span_id"
|
||||
)
|
||||
structured_log_event = SpanStartPayload(name="my_span", parent_span_id="parent_span_id")
|
||||
```
|
||||
|
||||
### Spans and Traces
|
||||
|
|
|
@ -35,7 +35,7 @@ Example client SDK call to register a "websearch" toolgroup that is provided by
|
|||
client.toolgroups.register(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="brave-search",
|
||||
args={"max_results": 5}
|
||||
args={"max_results": 5},
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -50,8 +50,7 @@ The Code Interpreter allows execution of Python code within a controlled environ
|
|||
```python
|
||||
# Register Code Interpreter tool group
|
||||
client.toolgroups.register(
|
||||
toolgroup_id="builtin::code_interpreter",
|
||||
provider_id="code_interpreter"
|
||||
toolgroup_id="builtin::code_interpreter", provider_id="code_interpreter"
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -68,16 +67,14 @@ The WolframAlpha tool provides access to computational knowledge through the Wol
|
|||
```python
|
||||
# Register WolframAlpha tool group
|
||||
client.toolgroups.register(
|
||||
toolgroup_id="builtin::wolfram_alpha",
|
||||
provider_id="wolfram-alpha"
|
||||
toolgroup_id="builtin::wolfram_alpha", provider_id="wolfram-alpha"
|
||||
)
|
||||
```
|
||||
|
||||
Example usage:
|
||||
```python
|
||||
result = client.tool_runtime.invoke_tool(
|
||||
tool_name="wolfram_alpha",
|
||||
args={"query": "solve x^2 + 2x + 1 = 0"}
|
||||
tool_name="wolfram_alpha", args={"query": "solve x^2 + 2x + 1 = 0"}
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -90,10 +87,7 @@ The Memory tool enables retrieval of context from various types of memory banks
|
|||
client.toolgroups.register(
|
||||
toolgroup_id="builtin::memory",
|
||||
provider_id="memory",
|
||||
args={
|
||||
"max_chunks": 5,
|
||||
"max_tokens_in_context": 4096
|
||||
}
|
||||
args={"max_chunks": 5, "max_tokens_in_context": 4096},
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -136,9 +130,7 @@ config = AgentConfig(
|
|||
toolgroups=[
|
||||
"builtin::websearch",
|
||||
],
|
||||
client_tools=[
|
||||
ToolDef(name="client_tool", description="Client provided tool")
|
||||
]
|
||||
client_tools=[ToolDef(name="client_tool", description="Client provided tool")],
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -167,9 +159,9 @@ Example tool definition:
|
|||
"name": "query",
|
||||
"parameter_type": "string",
|
||||
"description": "The query to search for",
|
||||
"required": True
|
||||
"required": True,
|
||||
}
|
||||
]
|
||||
],
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -179,8 +171,7 @@ Tools can be invoked using the `invoke_tool` method:
|
|||
|
||||
```python
|
||||
result = client.tool_runtime.invoke_tool(
|
||||
tool_name="web_search",
|
||||
kwargs={"query": "What is the capital of France?"}
|
||||
tool_name="web_search", kwargs={"query": "What is the capital of France?"}
|
||||
)
|
||||
```
|
||||
|
||||
|
|
|
@ -62,10 +62,3 @@ While there is a lot of flexibility to mix-and-match providers, often users will
|
|||
|
||||
|
||||
**On-device Distro**: Finally, you may want to run Llama Stack directly on an edge device (mobile phone or a tablet.) We provide Distros for iOS and Android (coming soon.)
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 1
|
||||
:hidden:
|
||||
|
||||
distributions/index
|
||||
```
|
||||
|
|
|
@ -68,6 +68,7 @@ myst_substitutions = {
|
|||
"docker_hub": "https://hub.docker.com/repository/docker/llamastack",
|
||||
}
|
||||
|
||||
suppress_warnings = ['myst.header']
|
||||
|
||||
# Copy button settings
|
||||
copybutton_prompt_text = "$ " # for bash prompts
|
||||
|
@ -94,22 +95,6 @@ html_static_path = ["../_static"]
|
|||
# html_logo = "../_static/llama-stack-logo.png"
|
||||
html_style = "../_static/css/my_theme.css"
|
||||
|
||||
redoc = [
|
||||
{
|
||||
"name": "Llama Stack API",
|
||||
"page": "references/api_reference/index",
|
||||
"spec": "../resources/llama-stack-spec.yaml",
|
||||
"opts": {
|
||||
"suppress-warnings": True,
|
||||
# "expand-responses": ["200", "201"],
|
||||
},
|
||||
"embed": True,
|
||||
},
|
||||
]
|
||||
|
||||
redoc_uri = "https://cdn.redoc.ly/redoc/latest/bundles/redoc.standalone.js"
|
||||
|
||||
|
||||
def setup(app):
|
||||
def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
|
||||
url = f"https://hub.docker.com/r/llamastack/{text}"
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
This guide will walk you through the process of adding a new API provider to Llama Stack.
|
||||
|
||||
|
||||
- Begin by reviewing the [core concepts](../concepts/) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.)
|
||||
- Begin by reviewing the [core concepts](../concepts/index.md) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.)
|
||||
- Determine the provider type ({repopath}`Remote::llama_stack/providers/remote` or {repopath}`Inline::llama_stack/providers/inline`). Remote providers make requests to external services, while inline providers execute implementation locally.
|
||||
- Add your provider to the appropriate {repopath}`Registry::llama_stack/providers/registry/`. Specify pip dependencies necessary.
|
||||
- Update any distribution {repopath}`Templates::llama_stack/templates/` build.yaml and run.yaml files if they should include your provider by default. Run {repopath}`llama_stack/scripts/distro_codegen.py` if necessary.
|
||||
|
|
|
@ -180,12 +180,45 @@ After this step is successful, you should be able to find the built container im
|
|||
### Running your Stack server
|
||||
Now, let's start the Llama Stack Distribution Server. You will need the YAML configuration file which was written out at the end by the `llama stack build` step.
|
||||
|
||||
```
|
||||
llama stack run -h
|
||||
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--disable-ipv6] [--env KEY=VALUE] [--tls-keyfile TLS_KEYFILE]
|
||||
[--tls-certfile TLS_CERTFILE] [--image-type {conda,container,venv}]
|
||||
config
|
||||
|
||||
start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
||||
|
||||
positional arguments:
|
||||
config Path to config file to use for the run
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--port PORT Port to run the server on. Defaults to 8321
|
||||
--image-name IMAGE_NAME
|
||||
Name of the image to run. Defaults to the current conda environment
|
||||
--disable-ipv6 Disable IPv6 support
|
||||
--env KEY=VALUE Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.
|
||||
--tls-keyfile TLS_KEYFILE
|
||||
Path to TLS key file for HTTPS
|
||||
--tls-certfile TLS_CERTFILE
|
||||
Path to TLS certificate file for HTTPS
|
||||
--image-type {conda,container,venv}
|
||||
Image Type used during the build. This can be either conda or container or venv.
|
||||
|
||||
```
|
||||
|
||||
```
|
||||
# Start using template name
|
||||
llama stack run tgi
|
||||
|
||||
# Start using config file
|
||||
llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
||||
|
||||
# Start using a venv
|
||||
llama stack run --image-type venv ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
||||
|
||||
# Start using a conda environment
|
||||
llama stack run --image-type conda ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
||||
```
|
||||
|
||||
```
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
# Using Llama Stack as a Library
|
||||
|
||||
If you are planning to use an external service for Inference (even Ollama or TGI counts as external), it is often easier to use Llama Stack as a library. This avoids the overhead of setting up a server.
|
||||
```python
|
||||
```bash
|
||||
# setup
|
||||
pip install llama-stack
|
||||
uv pip install llama-stack
|
||||
llama stack build --template together --image-type venv
|
||||
```
|
||||
|
||||
|
@ -13,7 +13,7 @@ from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
|
|||
client = LlamaStackAsLibraryClient(
|
||||
"ollama",
|
||||
# provider_data is optional, but if you need to pass in any provider specific data, you can do so here.
|
||||
provider_data = {"tavily_search_api_key": os.environ['TAVILY_SEARCH_API_KEY']}
|
||||
provider_data={"tavily_search_api_key": os.environ["TAVILY_SEARCH_API_KEY"]},
|
||||
)
|
||||
await client.initialize()
|
||||
```
|
||||
|
|
|
@ -7,14 +7,19 @@ You can run a Llama Stack server in one of the following ways:
|
|||
This is the simplest way to get started. Using Llama Stack as a library means you do not need to start a server. This is especially useful when you are not running inference locally and relying on an external inference service (eg. fireworks, together, groq, etc.) See [Using Llama Stack as a Library](importing_as_library)
|
||||
|
||||
|
||||
**Docker**:
|
||||
**Container**:
|
||||
|
||||
Another simple way to start interacting with Llama Stack is to just spin up docker which is pre-built with all the providers you need. We provide a number of pre-built Docker containers so you can start a Llama Stack server instantly. You can also build your own custom Docker container. Which distribution to choose depends on the hardware you have. See [Selection of a Distribution](distributions/selection) for more details.
|
||||
Another simple way to start interacting with Llama Stack is to just spin up a container (via Docker or Podman) which is pre-built with all the providers you need. We provide a number of pre-built images so you can start a Llama Stack server instantly. You can also build your own custom container. Which distribution to choose depends on the hardware you have. See [Selection of a Distribution](selection) for more details.
|
||||
|
||||
|
||||
**Conda**:
|
||||
|
||||
Lastly, if you have a custom or an advanced setup or you are developing on Llama Stack you can also build a custom Llama Stack server. Using `llama stack build` and `llama stack run` you can build/run a custom Llama Stack server containing the exact combination of providers you wish. We have also provided various templates to make getting started easier. See [Building a Custom Distribution](building_distro) for more details.
|
||||
If you have a custom or an advanced setup or you are developing on Llama Stack you can also build a custom Llama Stack server. Using `llama stack build` and `llama stack run` you can build/run a custom Llama Stack server containing the exact combination of providers you wish. We have also provided various templates to make getting started easier. See [Building a Custom Distribution](building_distro) for more details.
|
||||
|
||||
|
||||
**Kubernetes**:
|
||||
|
||||
If you have built a container image and want to deploy it in a Kubernetes cluster instead of starting the Llama Stack server locally. See [Kubernetes Deployment Guide](kubernetes_deployment) for more details.
|
||||
|
||||
|
||||
```{toctree}
|
||||
|
@ -24,4 +29,6 @@ Lastly, if you have a custom or an advanced setup or you are developing on Llama
|
|||
importing_as_library
|
||||
building_distro
|
||||
configuration
|
||||
selection
|
||||
kubernetes_deployment
|
||||
```
|
||||
|
|
207
docs/source/distributions/kubernetes_deployment.md
Normal file
207
docs/source/distributions/kubernetes_deployment.md
Normal file
|
@ -0,0 +1,207 @@
|
|||
# Kubernetes Deployment Guide
|
||||
|
||||
Instead of starting the Llama Stack and vLLM servers locally. We can deploy them in a Kubernetes cluster. In this guide, we'll use a local [Kind](https://kind.sigs.k8s.io/) cluster and a vLLM inference service in the same cluster for demonstration purposes.
|
||||
|
||||
First, create a local Kubernetes cluster via Kind:
|
||||
|
||||
```bash
|
||||
kind create cluster --image kindest/node:v1.32.0 --name llama-stack-test
|
||||
```
|
||||
|
||||
Start vLLM server as a Kubernetes Pod and Service:
|
||||
|
||||
```bash
|
||||
cat <<EOF |kubectl apply -f -
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: vllm-models
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
volumeMode: Filesystem
|
||||
resources:
|
||||
requests:
|
||||
storage: 50Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: hf-token-secret
|
||||
type: Opaque
|
||||
data:
|
||||
token: $(HF_TOKEN)
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vllm-server
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: vllm
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: vllm
|
||||
spec:
|
||||
containers:
|
||||
- name: llama-stack
|
||||
image: $(VLLM_IMAGE)
|
||||
command:
|
||||
- bash
|
||||
- -c
|
||||
- |
|
||||
MODEL="meta-llama/Llama-3.2-1B-Instruct"
|
||||
MODEL_PATH=/app/model/$(basename $MODEL)
|
||||
huggingface-cli login --token $HUGGING_FACE_HUB_TOKEN
|
||||
huggingface-cli download $MODEL --local-dir $MODEL_PATH --cache-dir $MODEL_PATH
|
||||
python3 -m vllm.entrypoints.openai.api_server --model $MODEL_PATH --served-model-name $MODEL --port 8000
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
volumeMounts:
|
||||
- name: llama-storage
|
||||
mountPath: /app/model
|
||||
env:
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: hf-token-secret
|
||||
key: token
|
||||
volumes:
|
||||
- name: llama-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: vllm-models
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vllm-server
|
||||
spec:
|
||||
selector:
|
||||
app.kubernetes.io/name: vllm
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
type: ClusterIP
|
||||
EOF
|
||||
```
|
||||
|
||||
We can verify that the vLLM server has started successfully via the logs (this might take a couple of minutes to download the model):
|
||||
|
||||
```bash
|
||||
$ kubectl logs -l app.kubernetes.io/name=vllm
|
||||
...
|
||||
INFO: Started server process [1]
|
||||
INFO: Waiting for application startup.
|
||||
INFO: Application startup complete.
|
||||
INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
|
||||
```
|
||||
|
||||
Then we can modify the Llama Stack run configuration YAML with the following inference provider:
|
||||
|
||||
```yaml
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: vllm
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: http://vllm-server.default.svc.cluster.local:8000/v1
|
||||
max_tokens: 4096
|
||||
api_token: fake
|
||||
```
|
||||
|
||||
Once we have defined the run configuration for Llama Stack, we can build an image with that configuration and the server source code:
|
||||
|
||||
```bash
|
||||
cat >/tmp/test-vllm-llama-stack/Containerfile.llama-stack-run-k8s <<EOF
|
||||
FROM distribution-myenv:dev
|
||||
|
||||
RUN apt-get update && apt-get install -y git
|
||||
RUN git clone https://github.com/meta-llama/llama-stack.git /app/llama-stack-source
|
||||
|
||||
ADD ./vllm-llama-stack-run-k8s.yaml /app/config.yaml
|
||||
EOF
|
||||
podman build -f /tmp/test-vllm-llama-stack/Containerfile.llama-stack-run-k8s -t llama-stack-run-k8s /tmp/test-vllm-llama-stack
|
||||
```
|
||||
|
||||
|
||||
We can then start the Llama Stack server by deploying a Kubernetes Pod and Service:
|
||||
|
||||
```bash
|
||||
cat <<EOF |kubectl apply -f -
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: llama-pvc
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llama-stack-server
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: llama-stack
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: llama-stack
|
||||
spec:
|
||||
containers:
|
||||
- name: llama-stack
|
||||
image: localhost/llama-stack-run-k8s:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: ["python", "-m", "llama_stack.distribution.server.server", "--yaml-config", "/app/config.yaml"]
|
||||
ports:
|
||||
- containerPort: 5000
|
||||
volumeMounts:
|
||||
- name: llama-storage
|
||||
mountPath: /root/.llama
|
||||
volumes:
|
||||
- name: llama-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: llama-pvc
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llama-stack-service
|
||||
spec:
|
||||
selector:
|
||||
app.kubernetes.io/name: llama-stack
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 5000
|
||||
targetPort: 5000
|
||||
type: ClusterIP
|
||||
EOF
|
||||
```
|
||||
|
||||
We can check that the LlamaStack server has started:
|
||||
|
||||
```bash
|
||||
$ kubectl logs -l app.kubernetes.io/name=llama-stack
|
||||
...
|
||||
INFO: Started server process [1]
|
||||
INFO: Waiting for application startup.
|
||||
INFO: ASGI 'lifespan' protocol appears unsupported.
|
||||
INFO: Application startup complete.
|
||||
INFO: Uvicorn running on http://['::', '0.0.0.0']:5000 (Press CTRL+C to quit)
|
||||
```
|
||||
|
||||
Finally, we forward the Kubernetes service to a local port and test some inference requests against it via the Llama Stack Client:
|
||||
|
||||
```bash
|
||||
kubectl port-forward service/llama-stack-service 5000:5000
|
||||
llama-stack-client --endpoint http://localhost:5000 inference chat-completion --message "hello, what model are you?"
|
||||
```
|
|
@ -1,3 +1,4 @@
|
|||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# NVIDIA Distribution
|
||||
|
||||
The `llamastack/distribution-nvidia` distribution consists of the following provider configurations.
|
||||
|
|
|
@ -23,7 +23,7 @@ Which templates / distributions to choose depends on the hardware you have for r
|
|||
- {dockerhub}`distribution-together` ([Guide](self_hosted_distro/together))
|
||||
- {dockerhub}`distribution-fireworks` ([Guide](self_hosted_distro/fireworks))
|
||||
|
||||
- **Do you want to run Llama Stack inference on your iOS / Android device** Lastly, we also provide templates for running Llama Stack inference on your iOS / Android device:
|
||||
- **Do you want to run Llama Stack inference on your iOS / Android device?** Lastly, we also provide templates for running Llama Stack inference on your iOS / Android device:
|
||||
- [iOS SDK](ondevice_distro/ios_sdk)
|
||||
- [Android](ondevice_distro/android_sdk)
|
||||
|
||||
|
@ -43,7 +43,6 @@ self_hosted_distro/nvidia
|
|||
self_hosted_distro/ollama
|
||||
self_hosted_distro/together
|
||||
self_hosted_distro/fireworks
|
||||
ondevice_distro/index
|
||||
```
|
||||
|
||||
### On-Device Distributions
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# Bedrock Distribution
|
||||
|
||||
```{toctree}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# Cerebras Distribution
|
||||
|
||||
The `llamastack/distribution-cerebras` distribution consists of the following provider configurations.
|
||||
|
|
186
docs/source/distributions/self_hosted_distro/dell.md
Normal file
186
docs/source/distributions/self_hosted_distro/dell.md
Normal file
|
@ -0,0 +1,186 @@
|
|||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
|
||||
# Dell Distribution of Llama Stack
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 2
|
||||
:hidden:
|
||||
|
||||
self
|
||||
```
|
||||
|
||||
The `llamastack/distribution-dell` distribution consists of the following provider configurations.
|
||||
|
||||
| API | Provider(s) |
|
||||
|-----|-------------|
|
||||
| agents | `inline::meta-reference` |
|
||||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||
| eval | `inline::meta-reference` |
|
||||
| inference | `remote::tgi` |
|
||||
| safety | `inline::llama-guard` |
|
||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` |
|
||||
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
|
||||
|
||||
You can use this distribution if you have GPUs and want to run an independent TGI or Dell Enterprise Hub container for running inference.
|
||||
|
||||
### Environment Variables
|
||||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `DEH_URL`: URL for the Dell inference server (default: `http://0.0.0.0:8181`)
|
||||
- `DEH_SAFETY_URL`: URL for the Dell safety inference server (default: `http://0.0.0.0:8282`)
|
||||
- `CHROMA_URL`: URL for the Chroma server (default: `http://localhost:6601`)
|
||||
- `INFERENCE_MODEL`: Inference model loaded into the TGI server (default: `meta-llama/Llama-3.2-3B-Instruct`)
|
||||
- `SAFETY_MODEL`: Name of the safety (Llama-Guard) model to use (default: `meta-llama/Llama-Guard-3-1B`)
|
||||
|
||||
|
||||
## Setting up Inference server using Dell Enterprise Hub's custom TGI container.
|
||||
|
||||
NOTE: This is a placeholder to run inference with TGI. This will be updated to use [Dell Enterprise Hub's containers](https://dell.huggingface.co/authenticated/models) once verified.
|
||||
|
||||
```bash
|
||||
export INFERENCE_PORT=8181
|
||||
export DEH_URL=http://0.0.0.0:$INFERENCE_PORT
|
||||
export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||
export CHROMADB_HOST=localhost
|
||||
export CHROMADB_PORT=6601
|
||||
export CHROMA_URL=http://$CHROMADB_HOST:$CHROMADB_PORT
|
||||
export CUDA_VISIBLE_DEVICES=0
|
||||
export LLAMA_STACK_PORT=8321
|
||||
|
||||
docker run --rm -it \
|
||||
--network host \
|
||||
-v $HOME/.cache/huggingface:/data \
|
||||
-e HF_TOKEN=$HF_TOKEN \
|
||||
-p $INFERENCE_PORT:$INFERENCE_PORT \
|
||||
--gpus $CUDA_VISIBLE_DEVICES \
|
||||
ghcr.io/huggingface/text-generation-inference \
|
||||
--dtype bfloat16 \
|
||||
--usage-stats off \
|
||||
--sharded false \
|
||||
--cuda-memory-fraction 0.7 \
|
||||
--model-id $INFERENCE_MODEL \
|
||||
--port $INFERENCE_PORT --hostname 0.0.0.0
|
||||
```
|
||||
|
||||
If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a TGI with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
|
||||
|
||||
```bash
|
||||
export SAFETY_INFERENCE_PORT=8282
|
||||
export DEH_SAFETY_URL=http://0.0.0.0:$SAFETY_INFERENCE_PORT
|
||||
export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||
export CUDA_VISIBLE_DEVICES=1
|
||||
|
||||
docker run --rm -it \
|
||||
--network host \
|
||||
-v $HOME/.cache/huggingface:/data \
|
||||
-e HF_TOKEN=$HF_TOKEN \
|
||||
-p $SAFETY_INFERENCE_PORT:$SAFETY_INFERENCE_PORT \
|
||||
--gpus $CUDA_VISIBLE_DEVICES \
|
||||
ghcr.io/huggingface/text-generation-inference \
|
||||
--dtype bfloat16 \
|
||||
--usage-stats off \
|
||||
--sharded false \
|
||||
--cuda-memory-fraction 0.7 \
|
||||
--model-id $SAFETY_MODEL \
|
||||
--hostname 0.0.0.0 \
|
||||
--port $SAFETY_INFERENCE_PORT
|
||||
```
|
||||
|
||||
## Dell distribution relies on ChromaDB for vector database usage
|
||||
|
||||
You can start a chroma-db easily using docker.
|
||||
```bash
|
||||
# This is where the indices are persisted
|
||||
mkdir -p $HOME/chromadb
|
||||
|
||||
podman run --rm -it \
|
||||
--network host \
|
||||
--name chromadb \
|
||||
-v $HOME/chromadb:/chroma/chroma \
|
||||
-e IS_PERSISTENT=TRUE \
|
||||
chromadb/chroma:latest \
|
||||
--port $CHROMADB_PORT \
|
||||
--host $CHROMADB_HOST
|
||||
```
|
||||
|
||||
## Running Llama Stack
|
||||
|
||||
Now you are ready to run Llama Stack with TGI as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
|
||||
|
||||
### Via Docker
|
||||
|
||||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
docker run -it \
|
||||
--network host \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v $HOME/.llama:/root/.llama \
|
||||
# NOTE: mount the llama-stack / llama-model directories if testing local changes else not needed
|
||||
-v /home/hjshah/git/llama-stack:/app/llama-stack-source -v /home/hjshah/git/llama-models:/app/llama-models-source \
|
||||
# localhost/distribution-dell:dev if building / testing locally
|
||||
llamastack/distribution-dell\
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env DEH_URL=$DEH_URL \
|
||||
--env CHROMA_URL=$CHROMA_URL
|
||||
|
||||
```
|
||||
|
||||
If you are using Llama Stack Safety / Shield APIs, use:
|
||||
|
||||
```bash
|
||||
# You need a local checkout of llama-stack to run this, get it using
|
||||
# git clone https://github.com/meta-llama/llama-stack.git
|
||||
cd /path/to/llama-stack
|
||||
|
||||
export SAFETY_INFERENCE_PORT=8282
|
||||
export DEH_SAFETY_URL=http://0.0.0.0:$SAFETY_INFERENCE_PORT
|
||||
export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||
|
||||
docker run \
|
||||
-it \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v $HOME/.llama:/root/.llama \
|
||||
-v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-dell \
|
||||
--yaml-config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env DEH_URL=$DEH_URL \
|
||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
||||
--env DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||
--env CHROMA_URL=$CHROMA_URL
|
||||
```
|
||||
|
||||
### Via Conda
|
||||
|
||||
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
||||
|
||||
```bash
|
||||
llama stack build --template dell --image-type conda
|
||||
llama stack run dell
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env DEH_URL=$DEH_URL \
|
||||
--env CHROMA_URL=$CHROMA_URL
|
||||
```
|
||||
|
||||
If you are using Llama Stack Safety / Shield APIs, use:
|
||||
|
||||
```bash
|
||||
llama stack run ./run-with-safety.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env DEH_URL=$DEH_URL \
|
||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
||||
--env DEH_SAFETY_URL=$DEH_SAFETY_URL \
|
||||
--env CHROMA_URL=$CHROMA_URL
|
||||
```
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# Fireworks Distribution
|
||||
|
||||
```{toctree}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# Meta Reference Distribution
|
||||
|
||||
```{toctree}
|
||||
|
@ -82,7 +83,7 @@ docker run \
|
|||
|
||||
### Via Conda
|
||||
|
||||
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
||||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
||||
|
||||
```bash
|
||||
llama stack build --template meta-reference-gpu --image-type conda
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# Meta Reference Quantized Distribution
|
||||
|
||||
```{toctree}
|
||||
|
@ -82,7 +83,7 @@ docker run \
|
|||
|
||||
### Via Conda
|
||||
|
||||
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
||||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
||||
|
||||
```bash
|
||||
llama stack build --template meta-reference-quantized-gpu --image-type conda
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# Ollama Distribution
|
||||
|
||||
```{toctree}
|
||||
|
@ -25,7 +26,9 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
|
|||
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
|
||||
|
||||
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.### Environment Variables
|
||||
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.
|
||||
|
||||
### Environment Variables
|
||||
|
||||
The following environment variables can be configured:
|
||||
|
||||
|
@ -101,7 +104,7 @@ docker run \
|
|||
|
||||
### Via Conda
|
||||
|
||||
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
||||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
||||
|
||||
```bash
|
||||
export LLAMA_STACK_PORT=5001
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# Remote vLLM Distribution
|
||||
```{toctree}
|
||||
:maxdepth: 2
|
||||
|
@ -131,7 +132,7 @@ docker run \
|
|||
|
||||
### Via Conda
|
||||
|
||||
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
||||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
||||
|
||||
```bash
|
||||
export INFERENCE_PORT=8000
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# SambaNova Distribution
|
||||
|
||||
```{toctree}
|
||||
|
@ -38,13 +39,15 @@ The following models are available by default:
|
|||
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)`
|
||||
- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)`
|
||||
- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)`
|
||||
- `meta-llama/Llama-3.3-70B-Instruct (Meta-Llama-3.3-70B-Instruct)`
|
||||
- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)`
|
||||
- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)`
|
||||
- `meta-llama/Llama-Guard-3-8B (Meta-Llama-Guard-3-8B)`
|
||||
|
||||
|
||||
### Prerequisite: API Keys
|
||||
|
||||
Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaBova.ai](https://sambanova.ai/).
|
||||
Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](https://sambanova.ai/).
|
||||
|
||||
|
||||
## Running Llama Stack with SambaNova
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
|
||||
# TGI Distribution
|
||||
|
||||
|
@ -122,7 +123,7 @@ docker run \
|
|||
|
||||
### Via Conda
|
||||
|
||||
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
||||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
||||
|
||||
```bash
|
||||
llama stack build --template tgi --image-type conda
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# Together Distribution
|
||||
|
||||
```{toctree}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# Quick Start
|
||||
|
||||
In this guide, we'll walk through how you can use the Llama Stack (server and client SDK ) to test a simple RAG agent.
|
||||
In this guide, we'll walk through how you can use the Llama Stack (server and client SDK) to test a simple RAG agent.
|
||||
|
||||
A Llama Stack agent is a simple integrated system that can perform tasks by combining a Llama model for reasoning with tools (e.g., RAG, web search, code execution, etc.) for taking actions.
|
||||
|
||||
|
@ -15,8 +15,11 @@ ollama run llama3.2:3b-instruct-fp16 --keepalive 60m
|
|||
|
||||
By default, Ollama keeps the model loaded in memory for 5 minutes which can be too short. We set the `--keepalive` flag to 60 minutes to ensure the model remains loaded for sometime.
|
||||
|
||||
NOTE: If you do not have ollama, you can install it from [here](https://ollama.ai/docs/installation).
|
||||
```{admonition} Note
|
||||
:class: tip
|
||||
|
||||
If you do not have ollama, you can install it from [here](https://ollama.com/download).
|
||||
```
|
||||
|
||||
|
||||
### 2. Pick a client environment
|
||||
|
@ -35,15 +38,20 @@ The API is **exactly identical** for both clients.
|
|||
:::{dropdown} Starting up the Llama Stack server
|
||||
The Llama Stack server can be configured flexibly so you can mix-and-match various providers for its individual API components -- beyond Inference, these include Vector IO, Agents, Telemetry, Evals, Post Training, etc.
|
||||
|
||||
To get started quickly, we provide various Docker images for the server component that work with different inference providers out of the box. For this guide, we will use `llamastack/distribution-ollama` as the Docker image.
|
||||
To get started quickly, we provide various container images for the server component that work with different inference providers out of the box. For this guide, we will use `llamastack/distribution-ollama` as the container image.
|
||||
|
||||
Lets setup some environment variables that we will use in the rest of the guide.
|
||||
```bash
|
||||
INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct"
|
||||
LLAMA_STACK_PORT=8321
|
||||
export INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct"
|
||||
export LLAMA_STACK_PORT=8321
|
||||
```
|
||||
|
||||
You can start the server using the following command:
|
||||
Next you can create a local directory to mount into the container’s file system.
|
||||
```bash
|
||||
mkdir -p ~/.llama
|
||||
```
|
||||
|
||||
Then you can start the server using the container tool of your choice. For example, if you are running Docker you can use the following command:
|
||||
```bash
|
||||
docker run -it \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
|
@ -53,8 +61,28 @@ docker run -it \
|
|||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env OLLAMA_URL=http://host.docker.internal:11434
|
||||
```
|
||||
|
||||
As another example, to start the container with Podman, you can do the same but replace `docker` at the start of the command with `podman`. If you are using `podman` older than `4.7.0`, please also replace `host.docker.internal` in the `OLLAMA_URL` with `host.containers.internal`.
|
||||
|
||||
Configuration for this is available at `distributions/ollama/run.yaml`.
|
||||
|
||||
```{admonition} Note
|
||||
:class: note
|
||||
|
||||
Docker containers run in their own isolated network namespaces on Linux. To allow the container to communicate with services running on the host via `localhost`, you need `--network=host`. This makes the container use the host’s network directly so it can connect to Ollama running on `localhost:11434`.
|
||||
|
||||
Linux users having issues running the above command should instead try the following:
|
||||
```bash
|
||||
docker run -it \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
--network=host \
|
||||
llamastack/distribution-ollama \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env OLLAMA_URL=http://localhost:11434
|
||||
```
|
||||
|
||||
:::
|
||||
|
||||
|
||||
|
@ -71,8 +99,10 @@ pip install llama-stack-client
|
|||
Let's use the `llama-stack-client` CLI to check the connectivity to the server.
|
||||
|
||||
```bash
|
||||
llama-stack-client configure --endpoint http://localhost:$LLAMA_STACK_PORT
|
||||
llama-stack-client models list
|
||||
$ llama-stack-client configure --endpoint http://localhost:$LLAMA_STACK_PORT
|
||||
> Enter the API key (leave empty if no key is needed):
|
||||
Done! You can now use the Llama Stack Client CLI with endpoint http://localhost:8321
|
||||
$ llama-stack-client models list
|
||||
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓
|
||||
┃ identifier ┃ provider_id ┃ provider_resource_id ┃ metadata ┃
|
||||
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩
|
||||
|
@ -95,19 +125,30 @@ llama-stack-client \
|
|||
Here is a simple example to perform chat completions using the SDK.
|
||||
```python
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def create_http_client():
|
||||
from llama_stack_client import LlamaStackClient
|
||||
return LlamaStackClient(base_url=f"http://localhost:{os.environ['LLAMA_STACK_PORT']}")
|
||||
|
||||
return LlamaStackClient(
|
||||
base_url=f"http://localhost:{os.environ['LLAMA_STACK_PORT']}"
|
||||
)
|
||||
|
||||
|
||||
def create_library_client(template="ollama"):
|
||||
from llama_stack import LlamaStackAsLibraryClient
|
||||
|
||||
client = LlamaStackAsLibraryClient(template)
|
||||
client.initialize()
|
||||
if not client.initialize():
|
||||
print("llama stack not built properly")
|
||||
sys.exit(1)
|
||||
return client
|
||||
|
||||
|
||||
client = create_library_client() # or create_http_client() depending on the environment you picked
|
||||
client = (
|
||||
create_library_client()
|
||||
) # or create_http_client() depending on the environment you picked
|
||||
|
||||
# List available models
|
||||
models = client.models.list()
|
||||
|
@ -120,8 +161,8 @@ response = client.inference.chat_completion(
|
|||
model_id=os.environ["INFERENCE_MODEL"],
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Write a haiku about coding"}
|
||||
]
|
||||
{"role": "user", "content": "Write a haiku about coding"},
|
||||
],
|
||||
)
|
||||
print(response.completion_message.content)
|
||||
```
|
||||
|
@ -132,6 +173,7 @@ Here is an example of a simple RAG (Retrieval Augmented Generation) chatbot agen
|
|||
|
||||
```python
|
||||
import os
|
||||
import uuid
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack_client.lib.agents.agent import Agent
|
||||
|
@ -139,10 +181,29 @@ from llama_stack_client.lib.agents.event_logger import EventLogger
|
|||
from llama_stack_client.types.agent_create_params import AgentConfig
|
||||
from llama_stack_client.types import Document
|
||||
|
||||
client = create_library_client() # or create_http_client() depending on the environment you picked
|
||||
|
||||
def create_http_client():
|
||||
from llama_stack_client import LlamaStackClient
|
||||
|
||||
return LlamaStackClient(
|
||||
base_url=f"http://localhost:{os.environ['LLAMA_STACK_PORT']}"
|
||||
)
|
||||
|
||||
|
||||
def create_library_client(template="ollama"):
|
||||
from llama_stack import LlamaStackAsLibraryClient
|
||||
|
||||
client = LlamaStackAsLibraryClient(template)
|
||||
client.initialize()
|
||||
return client
|
||||
|
||||
|
||||
client = (
|
||||
create_library_client()
|
||||
) # or create_http_client() depending on the environment you picked
|
||||
|
||||
# Documents to be used for RAG
|
||||
urls = ["chat.rst", "llama3.rst", "datasets.rst", "lora_finetune.rst"]
|
||||
urls = ["chat.rst", "llama3.rst", "memory_optimizations.rst", "lora_finetune.rst"]
|
||||
documents = [
|
||||
Document(
|
||||
document_id=f"num-{i}",
|
||||
|
@ -154,7 +215,7 @@ documents = [
|
|||
]
|
||||
|
||||
# Register a vector database
|
||||
vector_db_id = "test-vector-db"
|
||||
vector_db_id = f"test-vector-db-{uuid.uuid4().hex}"
|
||||
client.vector_dbs.register(
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model="all-MiniLM-L6-v2",
|
||||
|
@ -174,12 +235,12 @@ agent_config = AgentConfig(
|
|||
instructions="You are a helpful assistant",
|
||||
enable_session_persistence=False,
|
||||
# Define tools available to the agent
|
||||
toolgroups = [
|
||||
toolgroups=[
|
||||
{
|
||||
"name": "builtin::rag",
|
||||
"args" : {
|
||||
"vector_db_ids": [vector_db_id],
|
||||
}
|
||||
"name": "builtin::rag",
|
||||
"args": {
|
||||
"vector_db_ids": [vector_db_id],
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
@ -193,7 +254,7 @@ user_prompts = [
|
|||
|
||||
# Run the agent loop by calling the `create_turn` method
|
||||
for prompt in user_prompts:
|
||||
cprint(f'User> {prompt}', 'green')
|
||||
cprint(f"User> {prompt}", "green")
|
||||
response = rag_agent.create_turn(
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
session_id=session_id,
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
|
||||
```{admonition} News
|
||||
:class: tip
|
||||
|
||||
Llama Stack 0.1.0 is now available! See the [release notes](https://github.com/meta-llama/llama-stack/releases/tag/v0.1.0) for more details.
|
||||
Llama Stack 0.1.2 is now available! See the [release notes](https://github.com/meta-llama/llama-stack/releases/tag/v0.1.2) for more details.
|
||||
```
|
||||
|
||||
# Llama Stack
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
{.hide-title}
|
||||
# API Reference
|
||||
|
||||
```{eval-rst}
|
||||
.. sphinxcontrib-redoc:: ../resources/llama-stack-spec.yaml
|
||||
:page-title: API Reference
|
||||
:expand-responses: all
|
||||
```{raw} html
|
||||
:file: ../../../_static/llama-stack-spec.html
|
||||
```
|
||||
|
|
|
@ -12,7 +12,7 @@ This guide goes over the sets of APIs and developer experience flow of using Lla
|
|||
|
||||
## Evaluation Concepts
|
||||
|
||||
The Evaluation APIs are associated with a set of Resources as shown in the following diagram. Please visit the Resources section in our [Core Concepts](../concepts/index.md) guide for better high-level understanding.
|
||||
The Evaluation APIs are associated with a set of Resources as shown in the following diagram. Please visit the Resources section in our [Core Concepts](../../concepts/index.md) guide for better high-level understanding.
|
||||
|
||||

|
||||
|
||||
|
@ -51,6 +51,7 @@ This first example walks you through how to evaluate a model candidate served by
|
|||
|
||||
```python
|
||||
import datasets
|
||||
|
||||
ds = datasets.load_dataset(path="llamastack/mmmu", name="Agriculture", split="dev")
|
||||
ds = ds.select_columns(["chat_completion_input", "input_query", "expected_answer"])
|
||||
eval_rows = ds.to_pandas().to_dict(orient="records")
|
||||
|
@ -79,7 +80,7 @@ system_message = {
|
|||
client.eval_tasks.register(
|
||||
eval_task_id="meta-reference::mmmu",
|
||||
dataset_id=f"mmmu-{subset}-{split}",
|
||||
scoring_functions=["basic::regex_parser_multiple_choice_answer"]
|
||||
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
||||
)
|
||||
|
||||
response = client.eval.evaluate_rows(
|
||||
|
@ -98,9 +99,9 @@ response = client.eval.evaluate_rows(
|
|||
"max_tokens": 4096,
|
||||
"repeat_penalty": 1.0,
|
||||
},
|
||||
"system_message": system_message
|
||||
}
|
||||
}
|
||||
"system_message": system_message,
|
||||
},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -124,7 +125,7 @@ _ = client.datasets.register(
|
|||
"input_query": {"type": "string"},
|
||||
"expected_answer": {"type": "string"},
|
||||
"chat_completion_input": {"type": "chat_completion_input"},
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
eval_rows = client.datasetio.get_rows_paginated(
|
||||
|
@ -137,7 +138,7 @@ eval_rows = client.datasetio.get_rows_paginated(
|
|||
client.eval_tasks.register(
|
||||
eval_task_id="meta-reference::simpleqa",
|
||||
dataset_id=simpleqa_dataset_id,
|
||||
scoring_functions=["llm-as-judge::405b-simpleqa"]
|
||||
scoring_functions=["llm-as-judge::405b-simpleqa"],
|
||||
)
|
||||
|
||||
response = client.eval.evaluate_rows(
|
||||
|
@ -156,8 +157,8 @@ response = client.eval.evaluate_rows(
|
|||
"max_tokens": 4096,
|
||||
"repeat_penalty": 1.0,
|
||||
},
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -180,14 +181,14 @@ agent_config = {
|
|||
{
|
||||
"type": "brave_search",
|
||||
"engine": "tavily",
|
||||
"api_key": userdata.get("TAVILY_SEARCH_API_KEY")
|
||||
"api_key": userdata.get("TAVILY_SEARCH_API_KEY"),
|
||||
}
|
||||
],
|
||||
"tool_choice": "auto",
|
||||
"tool_prompt_format": "json",
|
||||
"input_shields": [],
|
||||
"output_shields": [],
|
||||
"enable_session_persistence": False
|
||||
"enable_session_persistence": False,
|
||||
}
|
||||
|
||||
response = client.eval.evaluate_rows(
|
||||
|
@ -199,8 +200,8 @@ response = client.eval.evaluate_rows(
|
|||
"eval_candidate": {
|
||||
"type": "agent",
|
||||
"config": agent_config,
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -237,7 +238,9 @@ GENERATED_RESPONSE: {generated_answer}
|
|||
EXPECTED_RESPONSE: {expected_answer}
|
||||
"""
|
||||
|
||||
input_query = "What are the top 5 topics that were explained? Only list succinct bullet points."
|
||||
input_query = (
|
||||
"What are the top 5 topics that were explained? Only list succinct bullet points."
|
||||
)
|
||||
generated_answer = """
|
||||
Here are the top 5 topics that were explained in the documentation for Torchtune:
|
||||
|
||||
|
@ -268,7 +271,9 @@ scoring_params = {
|
|||
"braintrust::factuality": None,
|
||||
}
|
||||
|
||||
response = client.scoring.score(input_rows=dataset_rows, scoring_functions=scoring_params)
|
||||
response = client.scoring.score(
|
||||
input_rows=dataset_rows, scoring_functions=scoring_params
|
||||
)
|
||||
```
|
||||
|
||||
## Running Evaluations via CLI
|
||||
|
|
|
@ -33,7 +33,11 @@ from llama_stack_client.types import (
|
|||
Types:
|
||||
|
||||
```python
|
||||
from llama_stack_client.types import ListToolGroupsResponse, ToolGroup, ToolgroupListResponse
|
||||
from llama_stack_client.types import (
|
||||
ListToolGroupsResponse,
|
||||
ToolGroup,
|
||||
ToolgroupListResponse,
|
||||
)
|
||||
```
|
||||
|
||||
Methods:
|
||||
|
@ -444,7 +448,11 @@ Methods:
|
|||
Types:
|
||||
|
||||
```python
|
||||
from llama_stack_client.types import EvalTask, ListEvalTasksResponse, EvalTaskListResponse
|
||||
from llama_stack_client.types import (
|
||||
EvalTask,
|
||||
ListEvalTasksResponse,
|
||||
EvalTaskListResponse,
|
||||
)
|
||||
```
|
||||
|
||||
Methods:
|
||||
|
|
|
@ -45,7 +45,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
|||
|
||||
---
|
||||
|
||||
## Install Dependencies and Set Up Environmen
|
||||
## Install Dependencies and Set Up Environment
|
||||
|
||||
1. **Create a Conda Environment**:
|
||||
Create a new Conda environment with Python 3.10:
|
||||
|
@ -73,7 +73,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
|||
Open a new terminal and install `llama-stack`:
|
||||
```bash
|
||||
conda activate ollama
|
||||
pip install llama-stack==0.0.61
|
||||
pip install llama-stack==0.1.0
|
||||
```
|
||||
|
||||
---
|
||||
|
@ -110,7 +110,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
|||
--env SAFETY_MODEL=$SAFETY_MODEL
|
||||
--env OLLAMA_URL=$OLLAMA_URL
|
||||
```
|
||||
Note: Everytime you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model.
|
||||
Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model.
|
||||
|
||||
The server will start and listen on `http://localhost:5001`.
|
||||
|
||||
|
@ -191,7 +191,7 @@ You can check the available models with the command `llama-stack-client models l
|
|||
|
||||
You can also interact with the Llama Stack server using a simple Python script. Below is an example:
|
||||
|
||||
### 1. Activate Conda Environmen
|
||||
### 1. Activate Conda Environment
|
||||
|
||||
```bash
|
||||
conda activate ollama
|
||||
|
@ -208,7 +208,7 @@ In `test_llama_stack.py`, write the following code:
|
|||
|
||||
```python
|
||||
import os
|
||||
from llama_stack_client import LlamaStackClien
|
||||
from llama_stack_client import LlamaStackClient
|
||||
|
||||
# Get the model ID from the environment variable
|
||||
INFERENCE_MODEL = os.environ.get("INFERENCE_MODEL")
|
||||
|
@ -224,7 +224,7 @@ client = LlamaStackClient(base_url="http://localhost:5001")
|
|||
response = client.inference.chat_completion(
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a friendly assistant."},
|
||||
{"role": "user", "content": "Write a two-sentence poem about llama."}
|
||||
{"role": "user", "content": "Write a two-sentence poem about llama."},
|
||||
],
|
||||
model_id=INFERENCE_MODEL,
|
||||
)
|
||||
|
|
|
@ -15,20 +15,21 @@ from typing import (
|
|||
Literal,
|
||||
Optional,
|
||||
Protocol,
|
||||
runtime_checkable,
|
||||
Union,
|
||||
runtime_checkable,
|
||||
)
|
||||
|
||||
from llama_models.schema_utils import json_schema_type, register_schema, webmethod
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, URL
|
||||
from llama_stack.apis.common.content_types import URL, ContentDelta, InterleavedContent
|
||||
from llama_stack.apis.inference import (
|
||||
CompletionMessage,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
ToolCall,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
ToolPromptFormat,
|
||||
ToolResponse,
|
||||
ToolResponseMessage,
|
||||
|
@ -86,9 +87,7 @@ class ShieldCallStep(StepCommon):
|
|||
|
||||
@json_schema_type
|
||||
class MemoryRetrievalStep(StepCommon):
|
||||
step_type: Literal[StepType.memory_retrieval.value] = (
|
||||
StepType.memory_retrieval.value
|
||||
)
|
||||
step_type: Literal[StepType.memory_retrieval.value] = StepType.memory_retrieval.value
|
||||
vector_db_ids: str
|
||||
inserted_context: InterleavedContent
|
||||
|
||||
|
@ -118,7 +117,7 @@ class Turn(BaseModel):
|
|||
]
|
||||
steps: List[Step]
|
||||
output_message: CompletionMessage
|
||||
output_attachments: List[Attachment] = Field(default_factory=list)
|
||||
output_attachments: Optional[List[Attachment]] = Field(default_factory=list)
|
||||
|
||||
started_at: datetime
|
||||
completed_at: Optional[datetime] = None
|
||||
|
@ -155,10 +154,25 @@ class AgentConfigCommon(BaseModel):
|
|||
output_shields: Optional[List[str]] = Field(default_factory=list)
|
||||
toolgroups: Optional[List[AgentToolGroup]] = Field(default_factory=list)
|
||||
client_tools: Optional[List[ToolDef]] = Field(default_factory=list)
|
||||
tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
|
||||
tool_prompt_format: Optional[ToolPromptFormat] = Field(default=None)
|
||||
tool_choice: Optional[ToolChoice] = Field(default=None, deprecated="use tool_config instead")
|
||||
tool_prompt_format: Optional[ToolPromptFormat] = Field(default=None, deprecated="use tool_config instead")
|
||||
tool_config: Optional[ToolConfig] = Field(default=None)
|
||||
|
||||
max_infer_iters: int = 10
|
||||
max_infer_iters: Optional[int] = 10
|
||||
|
||||
def model_post_init(self, __context):
|
||||
if self.tool_config:
|
||||
if self.tool_choice and self.tool_config.tool_choice != self.tool_choice:
|
||||
raise ValueError("tool_choice is deprecated. Use tool_choice in tool_config instead.")
|
||||
if self.tool_prompt_format and self.tool_config.tool_prompt_format != self.tool_prompt_format:
|
||||
raise ValueError("tool_prompt_format is deprecated. Use tool_prompt_format in tool_config instead.")
|
||||
else:
|
||||
params = {}
|
||||
if self.tool_choice:
|
||||
params["tool_choice"] = self.tool_choice
|
||||
if self.tool_prompt_format:
|
||||
params["tool_prompt_format"] = self.tool_prompt_format
|
||||
self.tool_config = ToolConfig(**params)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
@ -184,9 +198,7 @@ class AgentTurnResponseEventType(Enum):
|
|||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseStepStartPayload(BaseModel):
|
||||
event_type: Literal[AgentTurnResponseEventType.step_start.value] = (
|
||||
AgentTurnResponseEventType.step_start.value
|
||||
)
|
||||
event_type: Literal[AgentTurnResponseEventType.step_start.value] = AgentTurnResponseEventType.step_start.value
|
||||
step_type: StepType
|
||||
step_id: str
|
||||
metadata: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
||||
|
@ -194,9 +206,7 @@ class AgentTurnResponseStepStartPayload(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseStepCompletePayload(BaseModel):
|
||||
event_type: Literal[AgentTurnResponseEventType.step_complete.value] = (
|
||||
AgentTurnResponseEventType.step_complete.value
|
||||
)
|
||||
event_type: Literal[AgentTurnResponseEventType.step_complete.value] = AgentTurnResponseEventType.step_complete.value
|
||||
step_type: StepType
|
||||
step_id: str
|
||||
step_details: Step
|
||||
|
@ -206,9 +216,7 @@ class AgentTurnResponseStepCompletePayload(BaseModel):
|
|||
class AgentTurnResponseStepProgressPayload(BaseModel):
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
event_type: Literal[AgentTurnResponseEventType.step_progress.value] = (
|
||||
AgentTurnResponseEventType.step_progress.value
|
||||
)
|
||||
event_type: Literal[AgentTurnResponseEventType.step_progress.value] = AgentTurnResponseEventType.step_progress.value
|
||||
step_type: StepType
|
||||
step_id: str
|
||||
|
||||
|
@ -217,17 +225,13 @@ class AgentTurnResponseStepProgressPayload(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseTurnStartPayload(BaseModel):
|
||||
event_type: Literal[AgentTurnResponseEventType.turn_start.value] = (
|
||||
AgentTurnResponseEventType.turn_start.value
|
||||
)
|
||||
event_type: Literal[AgentTurnResponseEventType.turn_start.value] = AgentTurnResponseEventType.turn_start.value
|
||||
turn_id: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseTurnCompletePayload(BaseModel):
|
||||
event_type: Literal[AgentTurnResponseEventType.turn_complete.value] = (
|
||||
AgentTurnResponseEventType.turn_complete.value
|
||||
)
|
||||
event_type: Literal[AgentTurnResponseEventType.turn_complete.value] = AgentTurnResponseEventType.turn_complete.value
|
||||
turn: Turn
|
||||
|
||||
|
||||
|
@ -280,6 +284,7 @@ class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
|
|||
toolgroups: Optional[List[AgentToolGroup]] = None
|
||||
|
||||
stream: Optional[bool] = False
|
||||
tool_config: Optional[ToolConfig] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
@ -297,6 +302,16 @@ class AgentStepResponse(BaseModel):
|
|||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Agents(Protocol):
|
||||
"""Agents API for creating and interacting with agentic systems.
|
||||
|
||||
Main functionalities provided by this API:
|
||||
- Create agents with specific instructions and ability to use tools.
|
||||
- Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
|
||||
- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
|
||||
- Agents can be provided with various shields (see the Safety API for more details).
|
||||
- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
|
||||
"""
|
||||
|
||||
@webmethod(route="/agents", method="POST")
|
||||
async def create_agent(
|
||||
self,
|
||||
|
@ -317,10 +332,12 @@ class Agents(Protocol):
|
|||
stream: Optional[bool] = False,
|
||||
documents: Optional[List[Document]] = None,
|
||||
toolgroups: Optional[List[AgentToolGroup]] = None,
|
||||
tool_config: Optional[ToolConfig] = None,
|
||||
) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ...
|
||||
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}", method="GET"
|
||||
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
|
||||
method="GET",
|
||||
)
|
||||
async def get_agents_turn(
|
||||
self,
|
||||
|
|
|
@ -13,7 +13,6 @@ from termcolor import cprint
|
|||
from llama_stack.apis.agents import AgentTurnResponseEventType, StepType
|
||||
from llama_stack.apis.common.content_types import ToolCallParseStatus
|
||||
from llama_stack.apis.inference import ToolResponseMessage
|
||||
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
interleaved_content_as_str,
|
||||
)
|
||||
|
@ -63,9 +62,7 @@ class EventLogger:
|
|||
if isinstance(chunk, ToolResponseMessage):
|
||||
yield (
|
||||
chunk,
|
||||
LogEvent(
|
||||
role="CustomTool", content=chunk.content, color="grey"
|
||||
),
|
||||
LogEvent(role="CustomTool", content=chunk.content, color="grey"),
|
||||
)
|
||||
continue
|
||||
|
||||
|
@ -81,17 +78,12 @@ class EventLogger:
|
|||
|
||||
step_type = event.payload.step_type
|
||||
# handle safety
|
||||
if (
|
||||
step_type == StepType.shield_call
|
||||
and event_type == EventType.step_complete.value
|
||||
):
|
||||
if step_type == StepType.shield_call and event_type == EventType.step_complete.value:
|
||||
violation = event.payload.step_details.violation
|
||||
if not violation:
|
||||
yield (
|
||||
event,
|
||||
LogEvent(
|
||||
role=step_type, content="No Violation", color="magenta"
|
||||
),
|
||||
LogEvent(role=step_type, content="No Violation", color="magenta"),
|
||||
)
|
||||
else:
|
||||
yield (
|
||||
|
@ -110,9 +102,7 @@ class EventLogger:
|
|||
# TODO: Currently this event is never received
|
||||
yield (
|
||||
event,
|
||||
LogEvent(
|
||||
role=step_type, content="", end="", color="yellow"
|
||||
),
|
||||
LogEvent(role=step_type, content="", end="", color="yellow"),
|
||||
)
|
||||
elif event_type == EventType.step_progress.value:
|
||||
# HACK: if previous was not step/event was not inference's step_progress
|
||||
|
@ -125,9 +115,7 @@ class EventLogger:
|
|||
):
|
||||
yield (
|
||||
event,
|
||||
LogEvent(
|
||||
role=step_type, content="", end="", color="yellow"
|
||||
),
|
||||
LogEvent(role=step_type, content="", end="", color="yellow"),
|
||||
)
|
||||
|
||||
delta = event.payload.delta
|
||||
|
@ -161,9 +149,7 @@ class EventLogger:
|
|||
if event_type == EventType.step_complete.value:
|
||||
response = event.payload.step_details.model_response
|
||||
if response.tool_calls:
|
||||
content = ToolUtils.encode_tool_call(
|
||||
response.tool_calls[0], tool_prompt_format
|
||||
)
|
||||
content = ToolUtils.encode_tool_call(response.tool_calls[0], tool_prompt_format)
|
||||
else:
|
||||
content = response.content
|
||||
yield (
|
||||
|
@ -202,10 +188,7 @@ class EventLogger:
|
|||
),
|
||||
)
|
||||
|
||||
if (
|
||||
step_type == StepType.memory_retrieval
|
||||
and event_type == EventType.step_complete.value
|
||||
):
|
||||
if step_type == StepType.memory_retrieval and event_type == EventType.step_complete.value:
|
||||
details = event.payload.step_details
|
||||
inserted_context = interleaved_content_as_str(details.inserted_context)
|
||||
content = f"fetched {len(inserted_context)} bytes from {details.vector_db_ids}"
|
||||
|
|
|
@ -7,13 +7,15 @@
|
|||
from typing import List, Optional, Protocol, runtime_checkable
|
||||
|
||||
from llama_models.schema_utils import json_schema_type, webmethod
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
CompletionMessage,
|
||||
ChatCompletionResponse,
|
||||
CompletionResponse,
|
||||
InterleavedContent,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
ToolChoice,
|
||||
ToolDefinition,
|
||||
|
@ -21,35 +23,14 @@ from llama_stack.apis.inference import (
|
|||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchCompletionRequest(BaseModel):
|
||||
model: str
|
||||
content_batch: List[InterleavedContent]
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams()
|
||||
logprobs: Optional[LogProbConfig] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchCompletionResponse(BaseModel):
|
||||
completion_message_batch: List[CompletionMessage]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchChatCompletionRequest(BaseModel):
|
||||
model: str
|
||||
messages_batch: List[List[Message]]
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams()
|
||||
|
||||
# zero-shot tool definitions as input to the model
|
||||
tools: Optional[List[ToolDefinition]] = Field(default_factory=list)
|
||||
tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
|
||||
tool_prompt_format: Optional[ToolPromptFormat] = Field(default=None)
|
||||
logprobs: Optional[LogProbConfig] = None
|
||||
batch: List[CompletionResponse]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchChatCompletionResponse(BaseModel):
|
||||
completion_message_batch: List[CompletionMessage]
|
||||
batch: List[ChatCompletionResponse]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
|
@ -60,6 +41,7 @@ class BatchInference(Protocol):
|
|||
model: str,
|
||||
content_batch: List[InterleavedContent],
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> BatchCompletionResponse: ...
|
||||
|
||||
|
@ -73,5 +55,6 @@ class BatchInference(Protocol):
|
|||
tools: Optional[List[ToolDefinition]] = list,
|
||||
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
|
||||
tool_prompt_format: Optional[ToolPromptFormat] = None,
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> BatchChatCompletionResponse: ...
|
||||
|
|
|
@ -4,14 +4,12 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import base64
|
||||
from enum import Enum
|
||||
from typing import Annotated, List, Literal, Optional, Union
|
||||
|
||||
from llama_models.llama3.api.datatypes import ToolCall
|
||||
|
||||
from llama_models.schema_utils import json_schema_type, register_schema
|
||||
from pydantic import BaseModel, Field, field_serializer, model_validator
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
@ -20,8 +18,16 @@ class URL(BaseModel):
|
|||
|
||||
|
||||
class _URLOrData(BaseModel):
|
||||
"""
|
||||
A URL or a base64 encoded string
|
||||
|
||||
:param url: A URL of the image or data URL in the format of data:image/{type};base64,{data}. Note that URL could have length limits.
|
||||
:param data: base64 encoded image data as string
|
||||
"""
|
||||
|
||||
url: Optional[URL] = None
|
||||
data: Optional[bytes] = None
|
||||
# data is a base64 encoded string, hint with contentEncoding=base64
|
||||
data: Optional[str] = Field(contentEncoding="base64", default=None)
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
|
@ -30,21 +36,27 @@ class _URLOrData(BaseModel):
|
|||
return values
|
||||
return {"url": values}
|
||||
|
||||
@field_serializer("data")
|
||||
def serialize_data(self, data: Optional[bytes], _info):
|
||||
if data is None:
|
||||
return None
|
||||
return base64.b64encode(data).decode("utf-8")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ImageContentItem(BaseModel):
|
||||
"""A image content item
|
||||
|
||||
:param type: Discriminator type of the content item. Always "image"
|
||||
:param image: Image as a base64 encoded string or an URL
|
||||
"""
|
||||
|
||||
type: Literal["image"] = "image"
|
||||
image: _URLOrData
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class TextContentItem(BaseModel):
|
||||
"""A text content item
|
||||
|
||||
:param type: Discriminator type of the content item. Always "text"
|
||||
:param text: Text content
|
||||
"""
|
||||
|
||||
type: Literal["text"] = "text"
|
||||
text: str
|
||||
|
||||
|
@ -77,7 +89,6 @@ class ImageDelta(BaseModel):
|
|||
image: bytes
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ToolCallParseStatus(Enum):
|
||||
started = "started"
|
||||
in_progress = "in_progress"
|
||||
|
|
|
@ -8,7 +8,6 @@ from enum import Enum
|
|||
from typing import Any, Dict, Optional
|
||||
|
||||
from llama_models.schema_utils import json_schema_type
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.common.content_types import URL
|
||||
|
|
|
@ -39,6 +39,4 @@ class DatasetIO(Protocol):
|
|||
) -> PaginatedRowsResult: ...
|
||||
|
||||
@webmethod(route="/datasetio/rows", method="POST")
|
||||
async def append_rows(
|
||||
self, dataset_id: str, rows: List[Dict[str, Any]]
|
||||
) -> None: ...
|
||||
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ...
|
||||
|
|
|
@ -58,7 +58,7 @@ class Datasets(Protocol):
|
|||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None: ...
|
||||
|
||||
@webmethod(route="/datasets/{dataset_id}", method="GET")
|
||||
@webmethod(route="/datasets/{dataset_id:path}", method="GET")
|
||||
async def get_dataset(
|
||||
self,
|
||||
dataset_id: str,
|
||||
|
@ -67,7 +67,7 @@ class Datasets(Protocol):
|
|||
@webmethod(route="/datasets", method="GET")
|
||||
async def list_datasets(self) -> ListDatasetsResponse: ...
|
||||
|
||||
@webmethod(route="/datasets/{dataset_id}", method="DELETE")
|
||||
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE")
|
||||
async def unregister_dataset(
|
||||
self,
|
||||
dataset_id: str,
|
||||
|
|
|
@ -63,9 +63,7 @@ class AppEvalTaskConfig(BaseModel):
|
|||
|
||||
|
||||
EvalTaskConfig = register_schema(
|
||||
Annotated[
|
||||
Union[BenchmarkEvalTaskConfig, AppEvalTaskConfig], Field(discriminator="type")
|
||||
],
|
||||
Annotated[Union[BenchmarkEvalTaskConfig, AppEvalTaskConfig], Field(discriminator="type")],
|
||||
name="EvalTaskConfig",
|
||||
)
|
||||
|
||||
|
|
|
@ -13,8 +13,8 @@ from typing import (
|
|||
Literal,
|
||||
Optional,
|
||||
Protocol,
|
||||
runtime_checkable,
|
||||
Union,
|
||||
runtime_checkable,
|
||||
)
|
||||
|
||||
from llama_models.llama3.api.datatypes import (
|
||||
|
@ -31,15 +31,27 @@ from typing_extensions import Annotated
|
|||
|
||||
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.apis.telemetry.telemetry import MetricResponseMixin
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
|
||||
|
||||
class LogProbConfig(BaseModel):
|
||||
"""
|
||||
|
||||
:param top_k: How many tokens (for each position) to return log probabilities for.
|
||||
"""
|
||||
|
||||
top_k: Optional[int] = 0
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class QuantizationType(Enum):
|
||||
"""Type of model quantization to run inference with.
|
||||
|
||||
:cvar bf16: BFloat16 typically this means _no_ quantization
|
||||
:cvar fp8: 8-bit floating point quantization
|
||||
:cvar int4: 4-bit integer quantization
|
||||
"""
|
||||
|
||||
bf16 = "bf16"
|
||||
fp8 = "fp8"
|
||||
int4 = "int4"
|
||||
|
@ -57,6 +69,12 @@ class Bf16QuantizationConfig(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class Int4QuantizationConfig(BaseModel):
|
||||
"""Configuration for 4-bit integer quantization.
|
||||
|
||||
:param type: Must be "int4" to identify this quantization type
|
||||
:param scheme: Quantization scheme to use. Defaults to "int4_weight_int8_dynamic_activation"
|
||||
"""
|
||||
|
||||
type: Literal["int4"] = "int4"
|
||||
scheme: Optional[str] = "int4_weight_int8_dynamic_activation"
|
||||
|
||||
|
@ -69,6 +87,13 @@ QuantizationConfig = Annotated[
|
|||
|
||||
@json_schema_type
|
||||
class UserMessage(BaseModel):
|
||||
"""A message from the user in a chat conversation.
|
||||
|
||||
:param role: Must be "user" to identify this as a user message
|
||||
:param content: The content of the message, which can include text and other media
|
||||
:param context: (Optional) This field is used internally by Llama Stack to pass RAG context. This field may be removed in the API in the future.
|
||||
"""
|
||||
|
||||
role: Literal["user"] = "user"
|
||||
content: InterleavedContent
|
||||
context: Optional[InterleavedContent] = None
|
||||
|
@ -76,15 +101,27 @@ class UserMessage(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class SystemMessage(BaseModel):
|
||||
"""A system message providing instructions or context to the model.
|
||||
|
||||
:param role: Must be "system" to identify this as a system message
|
||||
:param content: The content of the "system prompt". If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other system messages (for example, for formatting tool definitions).
|
||||
"""
|
||||
|
||||
role: Literal["system"] = "system"
|
||||
content: InterleavedContent
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ToolResponseMessage(BaseModel):
|
||||
"""A message representing the result of a tool invocation.
|
||||
|
||||
:param role: Must be "tool" to identify this as a tool response
|
||||
:param call_id: Unique identifier for the tool call this response is for
|
||||
:param tool_name: Name of the tool that was called
|
||||
:param content: The response content from the tool
|
||||
"""
|
||||
|
||||
role: Literal["tool"] = "tool"
|
||||
# it was nice to re-use the ToolResponse type, but having all messages
|
||||
# have a `content` type makes things nicer too
|
||||
call_id: str
|
||||
tool_name: Union[BuiltinTool, str]
|
||||
content: InterleavedContent
|
||||
|
@ -92,10 +129,21 @@ class ToolResponseMessage(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class CompletionMessage(BaseModel):
|
||||
"""A message containing the model's (assistant) response in a chat conversation.
|
||||
|
||||
:param role: Must be "assistant" to identify this as the model's response
|
||||
:param content: The content of the model's response
|
||||
:param stop_reason: Reason why the model stopped generating. Options are:
|
||||
- `StopReason.end_of_turn`: The model finished generating the entire response.
|
||||
- `StopReason.end_of_message`: The model finished generating but generated a partial response -- usually, a tool call. The user may call the tool and continue the conversation with the tool's response.
|
||||
- `StopReason.out_of_tokens`: The model ran out of token budget.
|
||||
:param tool_calls: List of tool calls. Each tool call is a ToolCall object.
|
||||
"""
|
||||
|
||||
role: Literal["assistant"] = "assistant"
|
||||
content: InterleavedContent
|
||||
stop_reason: StopReason
|
||||
tool_calls: List[ToolCall] = Field(default_factory=list)
|
||||
tool_calls: Optional[List[ToolCall]] = Field(default_factory=list)
|
||||
|
||||
|
||||
Message = register_schema(
|
||||
|
@ -129,19 +177,35 @@ class ToolResponse(BaseModel):
|
|||
return v
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ToolChoice(Enum):
|
||||
"""Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.
|
||||
|
||||
:cvar auto: The model may use tools if it determines that is appropriate.
|
||||
:cvar required: The model must use tools.
|
||||
"""
|
||||
|
||||
auto = "auto"
|
||||
required = "required"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class TokenLogProbs(BaseModel):
|
||||
"""Log probabilities for generated tokens.
|
||||
|
||||
:param logprobs_by_token: Dictionary mapping tokens to their log probabilities
|
||||
"""
|
||||
|
||||
logprobs_by_token: Dict[str, float]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ChatCompletionResponseEventType(Enum):
|
||||
"""Types of events that can occur during chat completion.
|
||||
|
||||
:cvar start: Inference has started
|
||||
:cvar complete: Inference is complete and a full response is available
|
||||
:cvar progress: Inference is in progress and a partial response is available
|
||||
"""
|
||||
|
||||
start = "start"
|
||||
complete = "complete"
|
||||
progress = "progress"
|
||||
|
@ -149,7 +213,13 @@ class ChatCompletionResponseEventType(Enum):
|
|||
|
||||
@json_schema_type
|
||||
class ChatCompletionResponseEvent(BaseModel):
|
||||
"""Chat completion response event."""
|
||||
"""An event during chat completion generation.
|
||||
|
||||
:param event_type: Type of the event
|
||||
:param delta: Content generated since last event. This can be one or more tokens, or a tool call.
|
||||
:param logprobs: Optional log probabilities for generated tokens
|
||||
:param stop_reason: Optional reason why generation stopped, if complete
|
||||
"""
|
||||
|
||||
event_type: ChatCompletionResponseEventType
|
||||
delta: ContentDelta
|
||||
|
@ -157,22 +227,37 @@ class ChatCompletionResponseEvent(BaseModel):
|
|||
stop_reason: Optional[StopReason] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ResponseFormatType(Enum):
|
||||
"""Types of formats for structured (guided) decoding.
|
||||
|
||||
:cvar json_schema: Response should conform to a JSON schema. In a Python SDK, this is often a `pydantic` model.
|
||||
:cvar grammar: Response should conform to a BNF grammar
|
||||
"""
|
||||
|
||||
json_schema = "json_schema"
|
||||
grammar = "grammar"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class JsonSchemaResponseFormat(BaseModel):
|
||||
type: Literal[ResponseFormatType.json_schema.value] = (
|
||||
ResponseFormatType.json_schema.value
|
||||
)
|
||||
"""Configuration for JSON schema-guided response generation.
|
||||
|
||||
:param type: Must be "json_schema" to identify this format type
|
||||
:param json_schema: The JSON schema the response should conform to. In a Python SDK, this is often a `pydantic` model.
|
||||
"""
|
||||
|
||||
type: Literal[ResponseFormatType.json_schema.value] = ResponseFormatType.json_schema.value
|
||||
json_schema: Dict[str, Any]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class GrammarResponseFormat(BaseModel):
|
||||
"""Configuration for grammar-guided response generation.
|
||||
|
||||
:param type: Must be "grammar" to identify this format type
|
||||
:param bnf: The BNF grammar specification the response should conform to
|
||||
"""
|
||||
|
||||
type: Literal[ResponseFormatType.grammar.value] = ResponseFormatType.grammar.value
|
||||
bnf: Dict[str, Any]
|
||||
|
||||
|
@ -186,20 +271,24 @@ ResponseFormat = register_schema(
|
|||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
# This is an internally used class
|
||||
class CompletionRequest(BaseModel):
|
||||
model: str
|
||||
content: InterleavedContent
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams()
|
||||
response_format: Optional[ResponseFormat] = None
|
||||
|
||||
stream: Optional[bool] = False
|
||||
logprobs: Optional[LogProbConfig] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class CompletionResponse(BaseModel):
|
||||
"""Completion response."""
|
||||
"""Response from a completion request.
|
||||
|
||||
:param content: The generated completion text
|
||||
:param stop_reason: Reason why generation stopped
|
||||
:param logprobs: Optional log probabilities for generated tokens
|
||||
"""
|
||||
|
||||
content: str
|
||||
stop_reason: StopReason
|
||||
|
@ -208,80 +297,95 @@ class CompletionResponse(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class CompletionResponseStreamChunk(BaseModel):
|
||||
"""streamed completion response."""
|
||||
"""A chunk of a streamed completion response.
|
||||
|
||||
:param delta: New content generated since last chunk. This can be one or more tokens.
|
||||
:param stop_reason: Optional reason why generation stopped, if complete
|
||||
:param logprobs: Optional log probabilities for generated tokens
|
||||
"""
|
||||
|
||||
delta: str
|
||||
stop_reason: Optional[StopReason] = None
|
||||
logprobs: Optional[List[TokenLogProbs]] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchCompletionRequest(BaseModel):
|
||||
model: str
|
||||
content_batch: List[InterleavedContent]
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams()
|
||||
response_format: Optional[ResponseFormat] = None
|
||||
logprobs: Optional[LogProbConfig] = None
|
||||
class SystemMessageBehavior(Enum):
|
||||
"""Config for how to override the default system prompt.
|
||||
|
||||
:cvar append: Appends the provided system message to the default system prompt:
|
||||
https://www.llama.com/docs/model-cards-and-prompt-formats/llama3_2/#-function-definitions-in-the-system-prompt-
|
||||
:cvar replace: Replaces the default system prompt with the provided system message. The system message can include the string
|
||||
'{{function_definitions}}' to indicate where the function definitions should be inserted.
|
||||
"""
|
||||
|
||||
append = "append"
|
||||
replace = "replace"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchCompletionResponse(BaseModel):
|
||||
"""Batch completion response."""
|
||||
class ToolConfig(BaseModel):
|
||||
"""Configuration for tool use.
|
||||
|
||||
batch: List[CompletionResponse]
|
||||
:param tool_choice: (Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto.
|
||||
:param tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model.
|
||||
- `ToolPromptFormat.json`: The tool calls are formatted as a JSON object.
|
||||
- `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag.
|
||||
- `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls.
|
||||
:param system_message_behavior: (Optional) Config for how to override the default system prompt.
|
||||
- `SystemMessageBehavior.append`: Appends the provided system message to the default system prompt.
|
||||
- `SystemMessageBehavior.replace`: Replaces the default system prompt with the provided system message. The system message can include the string
|
||||
'{{function_definitions}}' to indicate where the function definitions should be inserted.
|
||||
"""
|
||||
|
||||
tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
|
||||
tool_prompt_format: Optional[ToolPromptFormat] = Field(default=None)
|
||||
system_message_behavior: SystemMessageBehavior = Field(default=SystemMessageBehavior.append)
|
||||
|
||||
|
||||
# This is an internally used class
|
||||
@json_schema_type
|
||||
class ChatCompletionRequest(BaseModel):
|
||||
model: str
|
||||
messages: List[Message]
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams()
|
||||
|
||||
# zero-shot tool definitions as input to the model
|
||||
tools: Optional[List[ToolDefinition]] = Field(default_factory=list)
|
||||
tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
|
||||
tool_prompt_format: Optional[ToolPromptFormat] = Field(default=None)
|
||||
response_format: Optional[ResponseFormat] = None
|
||||
tool_config: Optional[ToolConfig] = Field(default_factory=ToolConfig)
|
||||
|
||||
response_format: Optional[ResponseFormat] = None
|
||||
stream: Optional[bool] = False
|
||||
logprobs: Optional[LogProbConfig] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ChatCompletionResponseStreamChunk(BaseModel):
|
||||
"""SSE-stream of these events."""
|
||||
class ChatCompletionResponseStreamChunk(MetricResponseMixin, BaseModel):
|
||||
"""A chunk of a streamed chat completion response.
|
||||
|
||||
:param event: The event containing the new content
|
||||
"""
|
||||
|
||||
event: ChatCompletionResponseEvent
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ChatCompletionResponse(BaseModel):
|
||||
"""Chat completion response."""
|
||||
class ChatCompletionResponse(MetricResponseMixin, BaseModel):
|
||||
"""Response from a chat completion request.
|
||||
|
||||
:param completion_message: The complete response message
|
||||
:param logprobs: Optional log probabilities for generated tokens
|
||||
"""
|
||||
|
||||
completion_message: CompletionMessage
|
||||
logprobs: Optional[List[TokenLogProbs]] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchChatCompletionRequest(BaseModel):
|
||||
model: str
|
||||
messages_batch: List[List[Message]]
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams()
|
||||
|
||||
# zero-shot tool definitions as input to the model
|
||||
tools: Optional[List[ToolDefinition]] = Field(default_factory=list)
|
||||
tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
|
||||
tool_prompt_format: Optional[ToolPromptFormat] = Field(default=None)
|
||||
logprobs: Optional[LogProbConfig] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BatchChatCompletionResponse(BaseModel):
|
||||
batch: List[ChatCompletionResponse]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class EmbeddingsResponse(BaseModel):
|
||||
"""Response containing generated embeddings.
|
||||
|
||||
:param embeddings: List of embedding vectors, one per input content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}
|
||||
"""
|
||||
|
||||
embeddings: List[List[float]]
|
||||
|
||||
|
||||
|
@ -292,6 +396,13 @@ class ModelStore(Protocol):
|
|||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Inference(Protocol):
|
||||
"""Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||
- Embedding models: these models generate embeddings to be used for semantic search.
|
||||
"""
|
||||
|
||||
model_store: ModelStore
|
||||
|
||||
@webmethod(route="/inference/completion", method="POST")
|
||||
|
@ -303,7 +414,19 @@ class Inference(Protocol):
|
|||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]: ...
|
||||
) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]:
|
||||
"""Generate a completion for the given content using the specified model.
|
||||
|
||||
:param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
||||
:param content: The content to generate a completion for
|
||||
:param sampling_params: (Optional) Parameters to control the sampling strategy
|
||||
:param response_format: (Optional) Grammar specification for guided (structured) decoding
|
||||
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
|
||||
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
|
||||
:returns: If stream=False, returns a CompletionResponse with the full completion.
|
||||
If stream=True, returns an SSE event stream of CompletionResponseStreamChunk
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/chat-completion", method="POST")
|
||||
async def chat_completion(
|
||||
|
@ -311,20 +434,50 @@ class Inference(Protocol):
|
|||
model_id: str,
|
||||
messages: List[Message],
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
# zero-shot tool definitions as input to the model
|
||||
tools: Optional[List[ToolDefinition]] = None,
|
||||
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
|
||||
tool_prompt_format: Optional[ToolPromptFormat] = None,
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> Union[
|
||||
ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]
|
||||
]: ...
|
||||
tool_config: Optional[ToolConfig] = None,
|
||||
) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]:
|
||||
"""Generate a chat completion for the given messages using the specified model.
|
||||
|
||||
:param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
||||
:param messages: List of messages in the conversation
|
||||
:param sampling_params: Parameters to control the sampling strategy
|
||||
:param tools: (Optional) List of tool definitions available to the model
|
||||
:param tool_choice: (Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto.
|
||||
.. deprecated::
|
||||
Use tool_config instead.
|
||||
:param tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model.
|
||||
- `ToolPromptFormat.json`: The tool calls are formatted as a JSON object.
|
||||
- `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag.
|
||||
- `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls.
|
||||
.. deprecated::
|
||||
Use tool_config instead.
|
||||
:param response_format: (Optional) Grammar specification for guided (structured) decoding. There are two options:
|
||||
- `ResponseFormat.json_schema`: The grammar is a JSON schema. Most providers support this format.
|
||||
- `ResponseFormat.grammar`: The grammar is a BNF grammar. This format is more flexible, but not all providers support it.
|
||||
:param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
|
||||
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
|
||||
:param tool_config: (Optional) Configuration for tool use.
|
||||
:returns: If stream=False, returns a ChatCompletionResponse with the full completion.
|
||||
If stream=True, returns an SSE event stream of ChatCompletionResponseStreamChunk
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/embeddings", method="POST")
|
||||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: List[InterleavedContent],
|
||||
) -> EmbeddingsResponse: ...
|
||||
) -> EmbeddingsResponse:
|
||||
"""Generate embeddings for content pieces using the specified model.
|
||||
|
||||
:param model_id: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
|
||||
:param contents: List of contents to generate embeddings for. Note that content can be multimodal. The behavior depends on the model and provider. Some models may only support text.
|
||||
:returns: An array of embeddings, one for each content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}
|
||||
"""
|
||||
...
|
||||
|
|
|
@ -62,7 +62,7 @@ class Models(Protocol):
|
|||
@webmethod(route="/models", method="GET")
|
||||
async def list_models(self) -> ListModelsResponse: ...
|
||||
|
||||
@webmethod(route="/models/{model_id}", method="GET")
|
||||
@webmethod(route="/models/{model_id:path}", method="GET")
|
||||
async def get_model(
|
||||
self,
|
||||
model_id: str,
|
||||
|
@ -78,7 +78,7 @@ class Models(Protocol):
|
|||
model_type: Optional[ModelType] = None,
|
||||
) -> Model: ...
|
||||
|
||||
@webmethod(route="/models/{model_id}", method="DELETE")
|
||||
@webmethod(route="/models/{model_id:path}", method="DELETE")
|
||||
async def unregister_model(
|
||||
self,
|
||||
model_id: str,
|
||||
|
|
|
@ -89,9 +89,7 @@ class QATFinetuningConfig(BaseModel):
|
|||
|
||||
|
||||
AlgorithmConfig = register_schema(
|
||||
Annotated[
|
||||
Union[LoraFinetuningConfig, QATFinetuningConfig], Field(discriminator="type")
|
||||
],
|
||||
Annotated[Union[LoraFinetuningConfig, QATFinetuningConfig], Field(discriminator="type")],
|
||||
name="AlgorithmConfig",
|
||||
)
|
||||
|
||||
|
@ -204,14 +202,10 @@ class PostTraining(Protocol):
|
|||
async def get_training_jobs(self) -> ListPostTrainingJobsResponse: ...
|
||||
|
||||
@webmethod(route="/post-training/job/status", method="GET")
|
||||
async def get_training_job_status(
|
||||
self, job_uuid: str
|
||||
) -> Optional[PostTrainingJobStatusResponse]: ...
|
||||
async def get_training_job_status(self, job_uuid: str) -> Optional[PostTrainingJobStatusResponse]: ...
|
||||
|
||||
@webmethod(route="/post-training/job/cancel", method="POST")
|
||||
async def cancel_training_job(self, job_uuid: str) -> None: ...
|
||||
|
||||
@webmethod(route="/post-training/job/artifacts", method="GET")
|
||||
async def get_training_job_artifacts(
|
||||
self, job_uuid: str
|
||||
) -> Optional[PostTrainingJobArtifactsResponse]: ...
|
||||
async def get_training_job_artifacts(self, job_uuid: str) -> Optional[PostTrainingJobArtifactsResponse]: ...
|
||||
|
|
|
@ -6,11 +6,9 @@
|
|||
|
||||
from enum import Enum
|
||||
|
||||
from llama_models.schema_utils import json_schema_type
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ResourceType(Enum):
|
||||
model = "model"
|
||||
shield = "shield"
|
||||
|
@ -25,9 +23,7 @@ class ResourceType(Enum):
|
|||
class Resource(BaseModel):
|
||||
"""Base class for all Llama Stack resources"""
|
||||
|
||||
identifier: str = Field(
|
||||
description="Unique identifier for this resource in llama stack"
|
||||
)
|
||||
identifier: str = Field(description="Unique identifier for this resource in llama stack")
|
||||
|
||||
provider_resource_id: str = Field(
|
||||
description="Unique identifier for this resource in the provider",
|
||||
|
@ -36,6 +32,4 @@ class Resource(BaseModel):
|
|||
|
||||
provider_id: str = Field(description="ID of the provider that owns this resource")
|
||||
|
||||
type: ResourceType = Field(
|
||||
description="Type of resource (e.g. 'model', 'shield', 'vector_db', etc.)"
|
||||
)
|
||||
type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_db', etc.)")
|
||||
|
|
|
@ -12,8 +12,8 @@ from typing import (
|
|||
Literal,
|
||||
Optional,
|
||||
Protocol,
|
||||
runtime_checkable,
|
||||
Union,
|
||||
runtime_checkable,
|
||||
)
|
||||
|
||||
from llama_models.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
@ -43,9 +43,7 @@ class AggregationFunctionType(Enum):
|
|||
|
||||
@json_schema_type
|
||||
class LLMAsJudgeScoringFnParams(BaseModel):
|
||||
type: Literal[ScoringFnParamsType.llm_as_judge.value] = (
|
||||
ScoringFnParamsType.llm_as_judge.value
|
||||
)
|
||||
type: Literal[ScoringFnParamsType.llm_as_judge.value] = ScoringFnParamsType.llm_as_judge.value
|
||||
judge_model: str
|
||||
prompt_template: Optional[str] = None
|
||||
judge_score_regexes: Optional[List[str]] = Field(
|
||||
|
@ -60,9 +58,7 @@ class LLMAsJudgeScoringFnParams(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class RegexParserScoringFnParams(BaseModel):
|
||||
type: Literal[ScoringFnParamsType.regex_parser.value] = (
|
||||
ScoringFnParamsType.regex_parser.value
|
||||
)
|
||||
type: Literal[ScoringFnParamsType.regex_parser.value] = ScoringFnParamsType.regex_parser.value
|
||||
parsing_regexes: Optional[List[str]] = Field(
|
||||
description="Regex to extract the answer from generated response",
|
||||
default_factory=list,
|
||||
|
@ -112,9 +108,7 @@ class CommonScoringFnFields(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class ScoringFn(CommonScoringFnFields, Resource):
|
||||
type: Literal[ResourceType.scoring_function.value] = (
|
||||
ResourceType.scoring_function.value
|
||||
)
|
||||
type: Literal[ResourceType.scoring_function.value] = ResourceType.scoring_function.value
|
||||
|
||||
@property
|
||||
def scoring_fn_id(self) -> str:
|
||||
|
@ -140,10 +134,8 @@ class ScoringFunctions(Protocol):
|
|||
@webmethod(route="/scoring-functions", method="GET")
|
||||
async def list_scoring_functions(self) -> ListScoringFunctionsResponse: ...
|
||||
|
||||
@webmethod(route="/scoring-functions/{scoring_fn_id}", method="GET")
|
||||
async def get_scoring_function(
|
||||
self, scoring_fn_id: str, /
|
||||
) -> Optional[ScoringFn]: ...
|
||||
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET")
|
||||
async def get_scoring_function(self, scoring_fn_id: str, /) -> Optional[ScoringFn]: ...
|
||||
|
||||
@webmethod(route="/scoring-functions", method="POST")
|
||||
async def register_scoring_function(
|
||||
|
|
|
@ -48,7 +48,7 @@ class Shields(Protocol):
|
|||
@webmethod(route="/shields", method="GET")
|
||||
async def list_shields(self) -> ListShieldsResponse: ...
|
||||
|
||||
@webmethod(route="/shields/{identifier}", method="GET")
|
||||
@webmethod(route="/shields/{identifier:path}", method="GET")
|
||||
async def get_shield(self, identifier: str) -> Optional[Shield]: ...
|
||||
|
||||
@webmethod(route="/shields", method="POST")
|
||||
|
|
|
@ -5,11 +5,9 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
|
||||
from typing import Any, Dict, List, Optional, Protocol, Union
|
||||
|
||||
from llama_models.schema_utils import json_schema_type, webmethod
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.inference import Message
|
||||
|
|
|
@ -13,10 +13,11 @@ from typing import (
|
|||
Literal,
|
||||
Optional,
|
||||
Protocol,
|
||||
runtime_checkable,
|
||||
Union,
|
||||
runtime_checkable,
|
||||
)
|
||||
|
||||
from llama_models.llama3.api.datatypes import Primitive
|
||||
from llama_models.schema_utils import json_schema_type, register_schema, webmethod
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import Annotated
|
||||
|
@ -76,7 +77,7 @@ class EventCommon(BaseModel):
|
|||
trace_id: str
|
||||
span_id: str
|
||||
timestamp: datetime
|
||||
attributes: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
||||
attributes: Optional[Dict[str, Primitive]] = Field(default_factory=dict)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
@ -94,6 +95,30 @@ class MetricEvent(EventCommon):
|
|||
unit: str
|
||||
|
||||
|
||||
# This is a short term solution to allow inference API to return metrics
|
||||
# The ideal way to do this is to have a way for all response types to include metrics
|
||||
# and all metric events logged to the telemetry API to be inlcuded with the response
|
||||
# To do this, we will need to augment all response types with a metrics field.
|
||||
# We have hit a blocker from stainless SDK that prevents us from doing this.
|
||||
# The blocker is that if we were to augment the response types that have a data field
|
||||
# in them like so
|
||||
# class ListModelsResponse(BaseModel):
|
||||
# metrics: Optional[List[MetricEvent]] = None
|
||||
# data: List[Models]
|
||||
# ...
|
||||
# The client SDK will need to access the data by using a .data field, which is not
|
||||
# ergonomic. Stainless SDK does support unwrapping the response type, but it
|
||||
# requires that the response type to only have a single field.
|
||||
|
||||
# We will need a way in the client SDK to signal that the metrics are needed
|
||||
# and if they are needed, the client SDK has to return the full response type
|
||||
# without unwrapping it.
|
||||
|
||||
|
||||
class MetricResponseMixin(BaseModel):
|
||||
metrics: Optional[List[MetricEvent]] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class StructuredLogType(Enum):
|
||||
SPAN_START = "span_start"
|
||||
|
@ -102,9 +127,7 @@ class StructuredLogType(Enum):
|
|||
|
||||
@json_schema_type
|
||||
class SpanStartPayload(BaseModel):
|
||||
type: Literal[StructuredLogType.SPAN_START.value] = (
|
||||
StructuredLogType.SPAN_START.value
|
||||
)
|
||||
type: Literal[StructuredLogType.SPAN_START.value] = StructuredLogType.SPAN_START.value
|
||||
name: str
|
||||
parent_span_id: Optional[str] = None
|
||||
|
||||
|
@ -190,9 +213,7 @@ class QuerySpanTreeResponse(BaseModel):
|
|||
@runtime_checkable
|
||||
class Telemetry(Protocol):
|
||||
@webmethod(route="/telemetry/events", method="POST")
|
||||
async def log_event(
|
||||
self, event: Event, ttl_seconds: int = DEFAULT_TTL_DAYS * 86400
|
||||
) -> None: ...
|
||||
async def log_event(self, event: Event, ttl_seconds: int = DEFAULT_TTL_DAYS * 86400) -> None: ...
|
||||
|
||||
@webmethod(route="/telemetry/traces", method="GET")
|
||||
async def query_traces(
|
||||
|
@ -203,13 +224,13 @@ class Telemetry(Protocol):
|
|||
order_by: Optional[List[str]] = None,
|
||||
) -> QueryTracesResponse: ...
|
||||
|
||||
@webmethod(route="/telemetry/traces/{trace_id}", method="GET")
|
||||
@webmethod(route="/telemetry/traces/{trace_id:path}", method="GET")
|
||||
async def get_trace(self, trace_id: str) -> Trace: ...
|
||||
|
||||
@webmethod(route="/telemetry/traces/{trace_id}/spans/{span_id}", method="GET")
|
||||
@webmethod(route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}", method="GET")
|
||||
async def get_span(self, trace_id: str, span_id: str) -> Span: ...
|
||||
|
||||
@webmethod(route="/telemetry/spans/{span_id}/tree", method="GET")
|
||||
@webmethod(route="/telemetry/spans/{span_id:path}/tree", method="GET")
|
||||
async def get_span_tree(
|
||||
self,
|
||||
span_id: str,
|
||||
|
|
|
@ -4,5 +4,5 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .tools import * # noqa: F401 F403
|
||||
from .rag_tool import * # noqa: F401 F403
|
||||
from .tools import * # noqa: F401 F403
|
||||
|
|
|
@ -11,7 +11,7 @@ from llama_models.schema_utils import json_schema_type, register_schema, webmeth
|
|||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import Annotated, Protocol, runtime_checkable
|
||||
|
||||
from llama_stack.apis.common.content_types import InterleavedContent, URL
|
||||
from llama_stack.apis.common.content_types import URL, InterleavedContent
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
|
||||
|
||||
|
@ -64,9 +64,7 @@ RAGQueryGeneratorConfig = register_schema(
|
|||
class RAGQueryConfig(BaseModel):
|
||||
# This config defines how a query is generated using the messages
|
||||
# for memory bank retrieval.
|
||||
query_generator_config: RAGQueryGeneratorConfig = Field(
|
||||
default=DefaultRAGQueryGeneratorConfig()
|
||||
)
|
||||
query_generator_config: RAGQueryGeneratorConfig = Field(default=DefaultRAGQueryGeneratorConfig())
|
||||
max_tokens_in_context: int = 4096
|
||||
max_chunks: int = 5
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ from llama_models.schema_utils import json_schema_type, webmethod
|
|||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import Protocol, runtime_checkable
|
||||
|
||||
from llama_stack.apis.common.content_types import InterleavedContent, URL
|
||||
from llama_stack.apis.common.content_types import URL, InterleavedContent
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
|
||||
|
@ -101,7 +101,7 @@ class ToolGroups(Protocol):
|
|||
"""Register a tool group"""
|
||||
...
|
||||
|
||||
@webmethod(route="/toolgroups/{toolgroup_id}", method="GET")
|
||||
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET")
|
||||
async def get_tool_group(
|
||||
self,
|
||||
toolgroup_id: str,
|
||||
|
@ -117,13 +117,13 @@ class ToolGroups(Protocol):
|
|||
"""List tools with optional tool group"""
|
||||
...
|
||||
|
||||
@webmethod(route="/tools/{tool_name}", method="GET")
|
||||
@webmethod(route="/tools/{tool_name:path}", method="GET")
|
||||
async def get_tool(
|
||||
self,
|
||||
tool_name: str,
|
||||
) -> Tool: ...
|
||||
|
||||
@webmethod(route="/toolgroups/{toolgroup_id}", method="DELETE")
|
||||
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE")
|
||||
async def unregister_toolgroup(
|
||||
self,
|
||||
toolgroup_id: str,
|
||||
|
@ -150,8 +150,6 @@ class ToolRuntime(Protocol):
|
|||
) -> List[ToolDef]: ...
|
||||
|
||||
@webmethod(route="/tool-runtime/invoke", method="POST")
|
||||
async def invoke_tool(
|
||||
self, tool_name: str, kwargs: Dict[str, Any]
|
||||
) -> ToolInvocationResult:
|
||||
async def invoke_tool(self, tool_name: str, kwargs: Dict[str, Any]) -> ToolInvocationResult:
|
||||
"""Run a tool with the given arguments"""
|
||||
...
|
||||
|
|
|
@ -46,7 +46,7 @@ class VectorDBs(Protocol):
|
|||
@webmethod(route="/vector-dbs", method="GET")
|
||||
async def list_vector_dbs(self) -> ListVectorDBsResponse: ...
|
||||
|
||||
@webmethod(route="/vector-dbs/{vector_db_id}", method="GET")
|
||||
@webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET")
|
||||
async def get_vector_db(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
|
@ -62,5 +62,5 @@ class VectorDBs(Protocol):
|
|||
provider_vector_db_id: Optional[str] = None,
|
||||
) -> VectorDB: ...
|
||||
|
||||
@webmethod(route="/vector-dbs/{vector_db_id}", method="DELETE")
|
||||
@webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE")
|
||||
async def unregister_vector_db(self, vector_db_id: str) -> None: ...
|
||||
|
|
|
@ -16,11 +16,9 @@ from pathlib import Path
|
|||
from typing import Dict, List, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from llama_models.datatypes import Model
|
||||
from llama_models.sku_list import LlamaDownloadInfo
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
from rich.console import Console
|
||||
from rich.progress import (
|
||||
BarColumn,
|
||||
|
@ -147,9 +145,7 @@ class ParallelDownloader:
|
|||
"follow_redirects": True,
|
||||
}
|
||||
|
||||
async def retry_with_exponential_backoff(
|
||||
self, task: DownloadTask, func, *args, **kwargs
|
||||
):
|
||||
async def retry_with_exponential_backoff(self, task: DownloadTask, func, *args, **kwargs):
|
||||
last_exception = None
|
||||
for attempt in range(task.max_retries):
|
||||
try:
|
||||
|
@ -166,13 +162,9 @@ class ParallelDownloader:
|
|||
continue
|
||||
raise last_exception
|
||||
|
||||
async def get_file_info(
|
||||
self, client: httpx.AsyncClient, task: DownloadTask
|
||||
) -> None:
|
||||
async def get_file_info(self, client: httpx.AsyncClient, task: DownloadTask) -> None:
|
||||
async def _get_info():
|
||||
response = await client.head(
|
||||
task.url, headers={"Accept-Encoding": "identity"}, **self.client_options
|
||||
)
|
||||
response = await client.head(task.url, headers={"Accept-Encoding": "identity"}, **self.client_options)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
|
@ -201,14 +193,10 @@ class ParallelDownloader:
|
|||
return False
|
||||
return os.path.getsize(task.output_file) == task.total_size
|
||||
|
||||
async def download_chunk(
|
||||
self, client: httpx.AsyncClient, task: DownloadTask, start: int, end: int
|
||||
) -> None:
|
||||
async def download_chunk(self, client: httpx.AsyncClient, task: DownloadTask, start: int, end: int) -> None:
|
||||
async def _download_chunk():
|
||||
headers = {"Range": f"bytes={start}-{end}"}
|
||||
async with client.stream(
|
||||
"GET", task.url, headers=headers, **self.client_options
|
||||
) as response:
|
||||
async with client.stream("GET", task.url, headers=headers, **self.client_options) as response:
|
||||
response.raise_for_status()
|
||||
|
||||
with open(task.output_file, "ab") as file:
|
||||
|
@ -225,8 +213,7 @@ class ParallelDownloader:
|
|||
await self.retry_with_exponential_backoff(task, _download_chunk)
|
||||
except Exception as e:
|
||||
raise DownloadError(
|
||||
f"Failed to download chunk {start}-{end} after "
|
||||
f"{task.max_retries} attempts: {str(e)}"
|
||||
f"Failed to download chunk {start}-{end} after {task.max_retries} attempts: {str(e)}"
|
||||
) from e
|
||||
|
||||
async def prepare_download(self, task: DownloadTask) -> None:
|
||||
|
@ -244,9 +231,7 @@ class ParallelDownloader:
|
|||
# Check if file is already downloaded
|
||||
if os.path.exists(task.output_file):
|
||||
if self.verify_file_integrity(task):
|
||||
self.console.print(
|
||||
f"[green]Already downloaded {task.output_file}[/green]"
|
||||
)
|
||||
self.console.print(f"[green]Already downloaded {task.output_file}[/green]")
|
||||
self.progress.update(task.task_id, completed=task.total_size)
|
||||
return
|
||||
|
||||
|
@ -259,9 +244,7 @@ class ParallelDownloader:
|
|||
|
||||
current_pos = task.downloaded_size
|
||||
while current_pos < task.total_size:
|
||||
chunk_end = min(
|
||||
current_pos + chunk_size - 1, task.total_size - 1
|
||||
)
|
||||
chunk_end = min(current_pos + chunk_size - 1, task.total_size - 1)
|
||||
chunks.append((current_pos, chunk_end))
|
||||
current_pos = chunk_end + 1
|
||||
|
||||
|
@ -273,18 +256,12 @@ class ParallelDownloader:
|
|||
raise DownloadError(f"Download failed: {str(e)}") from e
|
||||
|
||||
except Exception as e:
|
||||
self.progress.update(
|
||||
task.task_id, description=f"[red]Failed: {task.output_file}[/red]"
|
||||
)
|
||||
raise DownloadError(
|
||||
f"Download failed for {task.output_file}: {str(e)}"
|
||||
) from e
|
||||
self.progress.update(task.task_id, description=f"[red]Failed: {task.output_file}[/red]")
|
||||
raise DownloadError(f"Download failed for {task.output_file}: {str(e)}") from e
|
||||
|
||||
def has_disk_space(self, tasks: List[DownloadTask]) -> bool:
|
||||
try:
|
||||
total_remaining_size = sum(
|
||||
task.total_size - task.downloaded_size for task in tasks
|
||||
)
|
||||
total_remaining_size = sum(task.total_size - task.downloaded_size for task in tasks)
|
||||
dir_path = os.path.dirname(os.path.abspath(tasks[0].output_file))
|
||||
free_space = shutil.disk_usage(dir_path).free
|
||||
|
||||
|
@ -314,9 +291,7 @@ class ParallelDownloader:
|
|||
with self.progress:
|
||||
for task in tasks:
|
||||
desc = f"Downloading {Path(task.output_file).name}"
|
||||
task.task_id = self.progress.add_task(
|
||||
desc, total=task.total_size, completed=task.downloaded_size
|
||||
)
|
||||
task.task_id = self.progress.add_task(desc, total=task.total_size, completed=task.downloaded_size)
|
||||
|
||||
semaphore = asyncio.Semaphore(self.max_concurrent_downloads)
|
||||
|
||||
|
@ -332,9 +307,7 @@ class ParallelDownloader:
|
|||
if failed_tasks:
|
||||
self.console.print("\n[red]Some downloads failed:[/red]")
|
||||
for task, error in failed_tasks:
|
||||
self.console.print(
|
||||
f"[red]- {Path(task.output_file).name}: {error}[/red]"
|
||||
)
|
||||
self.console.print(f"[red]- {Path(task.output_file).name}: {error}[/red]")
|
||||
raise DownloadError(f"{len(failed_tasks)} downloads failed")
|
||||
|
||||
|
||||
|
@ -396,11 +369,7 @@ def _meta_download(
|
|||
output_file = str(output_dir / f)
|
||||
url = meta_url.replace("*", f"{info.folder}/{f}")
|
||||
total_size = info.pth_size if "consolidated" in f else 0
|
||||
tasks.append(
|
||||
DownloadTask(
|
||||
url=url, output_file=output_file, total_size=total_size, max_retries=3
|
||||
)
|
||||
)
|
||||
tasks.append(DownloadTask(url=url, output_file=output_file, total_size=total_size, max_retries=3))
|
||||
|
||||
# Initialize and run parallel downloader
|
||||
downloader = ParallelDownloader(max_concurrent_downloads=max_concurrent_downloads)
|
||||
|
@ -446,14 +415,10 @@ def _download_from_manifest(manifest_file: str, max_concurrent_downloads: int):
|
|||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
if any(output_dir.iterdir()):
|
||||
console.print(
|
||||
f"[yellow]Output directory {output_dir} is not empty.[/yellow]"
|
||||
)
|
||||
console.print(f"[yellow]Output directory {output_dir} is not empty.[/yellow]")
|
||||
|
||||
while True:
|
||||
resp = input(
|
||||
"Do you want to (C)ontinue download or (R)estart completely? (continue/restart): "
|
||||
)
|
||||
resp = input("Do you want to (C)ontinue download or (R)estart completely? (continue/restart): ")
|
||||
if resp.lower() in ["restart", "r"]:
|
||||
shutil.rmtree(output_dir)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
@ -471,9 +436,7 @@ def _download_from_manifest(manifest_file: str, max_concurrent_downloads: int):
|
|||
]
|
||||
|
||||
# Initialize and run parallel downloader
|
||||
downloader = ParallelDownloader(
|
||||
max_concurrent_downloads=max_concurrent_downloads
|
||||
)
|
||||
downloader = ParallelDownloader(max_concurrent_downloads=max_concurrent_downloads)
|
||||
asyncio.run(downloader.download_all(tasks))
|
||||
|
||||
|
||||
|
|
|
@ -8,7 +8,6 @@ import argparse
|
|||
import json
|
||||
|
||||
from llama_models.sku_list import resolve_model
|
||||
|
||||
from termcolor import colored
|
||||
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
|
|
|
@ -38,7 +38,7 @@ class ModelList(Subcommand):
|
|||
|
||||
headers = [
|
||||
"Model Descriptor",
|
||||
"Hugging Face Repo",
|
||||
"Model ID",
|
||||
"Context Length",
|
||||
]
|
||||
|
||||
|
|
|
@ -11,7 +11,6 @@ from llama_stack.cli.model.download import ModelDownload
|
|||
from llama_stack.cli.model.list import ModelList
|
||||
from llama_stack.cli.model.prompt_format import ModelPromptFormat
|
||||
from llama_stack.cli.model.verify_download import ModelVerifyDownload
|
||||
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
|
||||
|
||||
|
@ -26,6 +25,8 @@ class ModelParser(Subcommand):
|
|||
description="Work with llama models",
|
||||
)
|
||||
|
||||
self.parser.set_defaults(func=lambda args: self.parser.print_help())
|
||||
|
||||
subparsers = self.parser.add_subparsers(title="model_subcommands")
|
||||
|
||||
# Add sub-commands
|
||||
|
|
|
@ -8,7 +8,7 @@ import argparse
|
|||
import textwrap
|
||||
from io import StringIO
|
||||
|
||||
from llama_models.datatypes import CoreModelId, is_multimodal, model_family, ModelFamily
|
||||
from llama_models.datatypes import CoreModelId, ModelFamily, is_multimodal, model_family
|
||||
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
|
||||
|
@ -47,33 +47,20 @@ class ModelPromptFormat(Subcommand):
|
|||
|
||||
# Only Llama 3.1 and 3.2 are supported
|
||||
supported_model_ids = [
|
||||
m
|
||||
for m in CoreModelId
|
||||
if model_family(m) in {ModelFamily.llama3_1, ModelFamily.llama3_2}
|
||||
m for m in CoreModelId if model_family(m) in {ModelFamily.llama3_1, ModelFamily.llama3_2}
|
||||
]
|
||||
model_str = "\n".join([m.value for m in supported_model_ids])
|
||||
try:
|
||||
model_id = CoreModelId(args.model_name)
|
||||
except ValueError:
|
||||
self.parser.error(
|
||||
f"{args.model_name} is not a valid Model. Choose one from --\n{model_str}"
|
||||
)
|
||||
self.parser.error(f"{args.model_name} is not a valid Model. Choose one from --\n{model_str}")
|
||||
|
||||
if model_id not in supported_model_ids:
|
||||
self.parser.error(
|
||||
f"{model_id} is not a valid Model. Choose one from --\n {model_str}"
|
||||
)
|
||||
self.parser.error(f"{model_id} is not a valid Model. Choose one from --\n {model_str}")
|
||||
|
||||
llama_3_1_file = (
|
||||
importlib.resources.files("llama_models") / "llama3_1/prompt_format.md"
|
||||
)
|
||||
llama_3_2_text_file = (
|
||||
importlib.resources.files("llama_models") / "llama3_2/text_prompt_format.md"
|
||||
)
|
||||
llama_3_2_vision_file = (
|
||||
importlib.resources.files("llama_models")
|
||||
/ "llama3_2/vision_prompt_format.md"
|
||||
)
|
||||
llama_3_1_file = importlib.resources.files("llama_models") / "llama3_1/prompt_format.md"
|
||||
llama_3_2_text_file = importlib.resources.files("llama_models") / "llama3_2/text_prompt_format.md"
|
||||
llama_3_2_vision_file = importlib.resources.files("llama_models") / "llama3_2/vision_prompt_format.md"
|
||||
if model_family(model_id) == ModelFamily.llama3_1:
|
||||
with importlib.resources.as_file(llama_3_1_file) as f:
|
||||
content = f.open("r").read()
|
||||
|
|
|
@ -9,7 +9,6 @@ from typing import Any, Dict, Optional
|
|||
from llama_models.datatypes import CheckpointQuantizationFormat
|
||||
from llama_models.llama3.api.datatypes import SamplingParams
|
||||
from llama_models.sku_list import LlamaDownloadInfo
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
|
||||
|
@ -17,16 +16,12 @@ class PromptGuardModel(BaseModel):
|
|||
"""Make a 'fake' Model-like object for Prompt Guard. Eventually this will be removed."""
|
||||
|
||||
model_id: str = "Prompt-Guard-86M"
|
||||
description: str = (
|
||||
"Prompt Guard. NOTE: this model will not be provided via `llama` CLI soon."
|
||||
)
|
||||
description: str = "Prompt Guard. NOTE: this model will not be provided via `llama` CLI soon."
|
||||
is_featured: bool = False
|
||||
huggingface_repo: str = "meta-llama/Prompt-Guard-86M"
|
||||
max_seq_length: int = 2048
|
||||
is_instruct_model: bool = False
|
||||
quantization_format: CheckpointQuantizationFormat = (
|
||||
CheckpointQuantizationFormat.bf16
|
||||
)
|
||||
quantization_format: CheckpointQuantizationFormat = CheckpointQuantizationFormat.bf16
|
||||
arch_args: Dict[str, Any] = Field(default_factory=dict)
|
||||
recommended_sampling_params: Optional[SamplingParams] = None
|
||||
|
||||
|
|
|
@ -21,8 +21,12 @@ from prompt_toolkit.validation import Validator
|
|||
from termcolor import cprint
|
||||
|
||||
from llama_stack.cli.table import print_table
|
||||
|
||||
from llama_stack.distribution.build import build_image, ImageType
|
||||
from llama_stack.distribution.build import (
|
||||
SERVER_DEPENDENCIES,
|
||||
ImageType,
|
||||
build_image,
|
||||
get_provider_dependencies,
|
||||
)
|
||||
from llama_stack.distribution.datatypes import (
|
||||
BuildConfig,
|
||||
DistributionSpec,
|
||||
|
@ -35,7 +39,6 @@ from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
|
|||
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
|
||||
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
|
||||
|
||||
|
||||
|
@ -52,9 +55,7 @@ def available_templates_specs() -> Dict[str, BuildConfig]:
|
|||
return template_specs
|
||||
|
||||
|
||||
def run_stack_build_command(
|
||||
parser: argparse.ArgumentParser, args: argparse.Namespace
|
||||
) -> None:
|
||||
def run_stack_build_command(args: argparse.Namespace) -> None:
|
||||
if args.list_templates:
|
||||
return _run_template_list_cmd()
|
||||
|
||||
|
@ -74,18 +75,11 @@ def run_stack_build_command(
|
|||
build_config.image_type = args.image_type
|
||||
else:
|
||||
cprint(
|
||||
f"Please specify a image-type (docker | conda | venv) for {args.template}",
|
||||
f"Please specify a image-type (container | conda | venv) for {args.template}",
|
||||
color="red",
|
||||
)
|
||||
return
|
||||
_run_stack_build_command_from_build_config(
|
||||
build_config,
|
||||
image_name=image_name,
|
||||
template_name=args.template,
|
||||
)
|
||||
return
|
||||
|
||||
if not args.config and not args.template:
|
||||
elif not args.config and not args.template:
|
||||
name = prompt(
|
||||
"> Enter a name for your Llama Stack (e.g. my-local-stack): ",
|
||||
validator=Validator.from_callable(
|
||||
|
@ -95,10 +89,10 @@ def run_stack_build_command(
|
|||
)
|
||||
|
||||
image_type = prompt(
|
||||
"> Enter the image type you want your Llama Stack to be built as (docker or conda or venv): ",
|
||||
"> Enter the image type you want your Llama Stack to be built as (container or conda or venv): ",
|
||||
validator=Validator.from_callable(
|
||||
lambda x: x in ["docker", "conda", "venv"],
|
||||
error_message="Invalid image type, please enter conda or docker or venv",
|
||||
lambda x: x in ["container", "conda", "venv"],
|
||||
error_message="Invalid image type, please enter conda or container or venv",
|
||||
),
|
||||
default="conda",
|
||||
)
|
||||
|
@ -132,11 +126,7 @@ def run_stack_build_command(
|
|||
|
||||
providers = dict()
|
||||
for api, providers_for_api in get_provider_registry().items():
|
||||
available_providers = [
|
||||
x
|
||||
for x in providers_for_api.keys()
|
||||
if x not in ("remote", "remote::sample")
|
||||
]
|
||||
available_providers = [x for x in providers_for_api.keys() if x not in ("remote", "remote::sample")]
|
||||
api_provider = prompt(
|
||||
"> Enter provider for API {}: ".format(api.value),
|
||||
completer=WordCompleter(available_providers),
|
||||
|
@ -159,9 +149,7 @@ def run_stack_build_command(
|
|||
description=description,
|
||||
)
|
||||
|
||||
build_config = BuildConfig(
|
||||
image_type=image_type, distribution_spec=distribution_spec
|
||||
)
|
||||
build_config = BuildConfig(image_type=image_type, distribution_spec=distribution_spec)
|
||||
else:
|
||||
with open(args.config, "r") as f:
|
||||
try:
|
||||
|
@ -180,8 +168,20 @@ def run_stack_build_command(
|
|||
)
|
||||
return
|
||||
|
||||
if args.print_deps_only:
|
||||
print(f"# Dependencies for {args.template or args.config or image_name}")
|
||||
normal_deps, special_deps = get_provider_dependencies(build_config.distribution_spec.providers)
|
||||
normal_deps += SERVER_DEPENDENCIES
|
||||
print(f"uv pip install {' '.join(normal_deps)}")
|
||||
for special_dep in special_deps:
|
||||
print(f"uv pip install {special_dep}")
|
||||
return
|
||||
|
||||
_run_stack_build_command_from_build_config(
|
||||
build_config, image_name=image_name, config_path=args.config
|
||||
build_config,
|
||||
image_name=image_name,
|
||||
config_path=args.config,
|
||||
template_name=args.template,
|
||||
)
|
||||
|
||||
|
||||
|
@ -195,9 +195,7 @@ def _generate_run_config(
|
|||
"""
|
||||
apis = list(build_config.distribution_spec.providers.keys())
|
||||
run_config = StackRunConfig(
|
||||
container_image=(
|
||||
image_name if build_config.image_type == ImageType.container.value else None
|
||||
),
|
||||
container_image=(image_name if build_config.image_type == ImageType.container.value else None),
|
||||
image_name=image_name,
|
||||
apis=apis,
|
||||
providers={},
|
||||
|
@ -217,13 +215,9 @@ def _generate_run_config(
|
|||
if p.deprecation_error:
|
||||
raise InvalidProviderError(p.deprecation_error)
|
||||
|
||||
config_type = instantiate_class_type(
|
||||
provider_registry[Api(api)][provider_type].config_class
|
||||
)
|
||||
config_type = instantiate_class_type(provider_registry[Api(api)][provider_type].config_class)
|
||||
if hasattr(config_type, "sample_run_config"):
|
||||
config = config_type.sample_run_config(
|
||||
__distro_dir__=f"distributions/{image_name}"
|
||||
)
|
||||
config = config_type.sample_run_config(__distro_dir__=f"distributions/{image_name}")
|
||||
else:
|
||||
config = {}
|
||||
|
||||
|
@ -258,9 +252,7 @@ def _run_stack_build_command_from_build_config(
|
|||
image_name = f"distribution-{template_name}"
|
||||
else:
|
||||
if not image_name:
|
||||
raise ValueError(
|
||||
"Please specify an image name when building a docker image without a template"
|
||||
)
|
||||
raise ValueError("Please specify an image name when building a container image without a template")
|
||||
elif build_config.image_type == ImageType.conda.value:
|
||||
if not image_name:
|
||||
raise ValueError("Please specify an image name when building a conda image")
|
||||
|
@ -288,10 +280,7 @@ def _run_stack_build_command_from_build_config(
|
|||
|
||||
if template_name:
|
||||
# copy run.yaml from template to build_dir instead of generating it again
|
||||
template_path = (
|
||||
importlib.resources.files("llama_stack")
|
||||
/ f"templates/{template_name}/run.yaml"
|
||||
)
|
||||
template_path = importlib.resources.files("llama_stack") / f"templates/{template_name}/run.yaml"
|
||||
with importlib.resources.as_file(template_path) as path:
|
||||
run_config_file = build_dir / f"{template_name}-run.yaml"
|
||||
shutil.copy(path, run_config_file)
|
||||
|
|
|
@ -63,10 +63,16 @@ environment is active, you must specify a name.
|
|||
),
|
||||
default=None,
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--print-deps-only",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Print the dependencies for the stack only, without building the stack",
|
||||
)
|
||||
|
||||
def _run_stack_build_command(self, args: argparse.Namespace) -> None:
|
||||
# always keep implementation completely silo-ed away from CLI so CLI
|
||||
# can be fast to load and reduces dependencies
|
||||
from ._build import run_stack_build_command
|
||||
|
||||
return run_stack_build_command(self.parser, args)
|
||||
return run_stack_build_command(args)
|
||||
|
|
|
@ -21,15 +21,19 @@ class StackListProviders(Subcommand):
|
|||
self._add_arguments()
|
||||
self.parser.set_defaults(func=self._run_providers_list_cmd)
|
||||
|
||||
def _add_arguments(self):
|
||||
from llama_stack.distribution.datatypes import Api
|
||||
@property
|
||||
def providable_apis(self):
|
||||
from llama_stack.distribution.distribution import providable_apis
|
||||
|
||||
api_values = [a.value for a in Api]
|
||||
return [api.value for api in providable_apis()]
|
||||
|
||||
def _add_arguments(self):
|
||||
self.parser.add_argument(
|
||||
"api",
|
||||
type=str,
|
||||
choices=api_values,
|
||||
help="API to list providers for (one of: {})".format(api_values),
|
||||
choices=self.providable_apis,
|
||||
nargs="?",
|
||||
help="API to list providers for. List all if not specified.",
|
||||
)
|
||||
|
||||
def _run_providers_list_cmd(self, args: argparse.Namespace) -> None:
|
||||
|
@ -37,20 +41,29 @@ class StackListProviders(Subcommand):
|
|||
from llama_stack.distribution.distribution import Api, get_provider_registry
|
||||
|
||||
all_providers = get_provider_registry()
|
||||
providers_for_api = all_providers[Api(args.api)]
|
||||
if args.api:
|
||||
providers = [(args.api, all_providers[Api(args.api)])]
|
||||
else:
|
||||
providers = [(k.value, prov) for k, prov in all_providers.items()]
|
||||
|
||||
providers = [p for api, p in providers if api in self.providable_apis]
|
||||
|
||||
# eventually, this should query a registry at llama.meta.com/llamastack/distributions
|
||||
headers = [
|
||||
"API Type",
|
||||
"Provider Type",
|
||||
"PIP Package Dependencies",
|
||||
]
|
||||
|
||||
rows = []
|
||||
for spec in providers_for_api.values():
|
||||
if spec.provider_type == "sample":
|
||||
|
||||
specs = [spec for p in providers for spec in p.values()]
|
||||
for spec in specs:
|
||||
if spec.is_sample:
|
||||
continue
|
||||
rows.append(
|
||||
[
|
||||
spec.api.value,
|
||||
spec.provider_type,
|
||||
",".join(spec.pip_packages),
|
||||
]
|
||||
|
@ -59,4 +72,5 @@ class StackListProviders(Subcommand):
|
|||
rows,
|
||||
headers,
|
||||
separate_rows=True,
|
||||
sort_by=(0, 1),
|
||||
)
|
||||
|
|
|
@ -55,6 +55,23 @@ class StackRun(Subcommand):
|
|||
default=[],
|
||||
metavar="KEY=VALUE",
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--tls-keyfile",
|
||||
type=str,
|
||||
help="Path to TLS key file for HTTPS",
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--tls-certfile",
|
||||
type=str,
|
||||
help="Path to TLS certificate file for HTTPS",
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--image-type",
|
||||
type=str,
|
||||
help="Image Type used during the build. This can be either conda or container or venv.",
|
||||
choices=["conda", "container", "venv"],
|
||||
default="conda",
|
||||
)
|
||||
|
||||
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
|
||||
import importlib.resources
|
||||
|
@ -82,31 +99,21 @@ class StackRun(Subcommand):
|
|||
|
||||
if not config_file.exists() and not has_yaml_suffix:
|
||||
# check if this is a template
|
||||
config_file = (
|
||||
Path(REPO_ROOT) / "llama_stack" / "templates" / args.config / "run.yaml"
|
||||
)
|
||||
config_file = Path(REPO_ROOT) / "llama_stack" / "templates" / args.config / "run.yaml"
|
||||
if config_file.exists():
|
||||
template_name = args.config
|
||||
|
||||
if not config_file.exists() and not has_yaml_suffix:
|
||||
# check if it's a build config saved to conda dir
|
||||
config_file = Path(
|
||||
BUILDS_BASE_DIR / ImageType.conda.value / f"{args.config}-run.yaml"
|
||||
)
|
||||
config_file = Path(BUILDS_BASE_DIR / ImageType.conda.value / f"{args.config}-run.yaml")
|
||||
|
||||
if not config_file.exists() and not has_yaml_suffix:
|
||||
# check if it's a build config saved to container dir
|
||||
config_file = Path(
|
||||
BUILDS_BASE_DIR / ImageType.container.value / f"{args.config}-run.yaml"
|
||||
)
|
||||
config_file = Path(BUILDS_BASE_DIR / ImageType.container.value / f"{args.config}-run.yaml")
|
||||
|
||||
if not config_file.exists() and not has_yaml_suffix:
|
||||
# check if it's a build config saved to ~/.llama dir
|
||||
config_file = Path(
|
||||
DISTRIBS_BASE_DIR
|
||||
/ f"llamastack-{args.config}"
|
||||
/ f"{args.config}-run.yaml"
|
||||
)
|
||||
config_file = Path(DISTRIBS_BASE_DIR / f"llamastack-{args.config}" / f"{args.config}-run.yaml")
|
||||
|
||||
if not config_file.exists():
|
||||
self.parser.error(
|
||||
|
@ -118,18 +125,11 @@ class StackRun(Subcommand):
|
|||
config_dict = yaml.safe_load(config_file.read_text())
|
||||
config = parse_and_maybe_upgrade_config(config_dict)
|
||||
|
||||
if config.container_image:
|
||||
script = (
|
||||
importlib.resources.files("llama_stack")
|
||||
/ "distribution/start_container.sh"
|
||||
)
|
||||
image_name = (
|
||||
f"distribution-{template_name}"
|
||||
if template_name
|
||||
else config.container_image
|
||||
)
|
||||
if args.image_type == ImageType.container.value or config.container_image:
|
||||
script = importlib.resources.files("llama_stack") / "distribution/start_container.sh"
|
||||
image_name = f"distribution-{template_name}" if template_name else config.container_image
|
||||
run_args = [script, image_name]
|
||||
else:
|
||||
elif args.image_type == ImageType.conda.value:
|
||||
current_conda_env = os.environ.get("CONDA_DEFAULT_ENV")
|
||||
image_name = args.image_name or current_conda_env
|
||||
if not image_name:
|
||||
|
@ -140,12 +140,12 @@ class StackRun(Subcommand):
|
|||
return
|
||||
|
||||
def get_conda_prefix(env_name):
|
||||
# Conda "base" environment does not end with "base" in the
|
||||
# prefix, so should be handled separately.
|
||||
if env_name == "base":
|
||||
return os.environ.get("CONDA_PREFIX")
|
||||
# Get conda environments info
|
||||
conda_env_info = json.loads(
|
||||
subprocess.check_output(
|
||||
["conda", "info", "--envs", "--json"]
|
||||
).decode()
|
||||
)
|
||||
conda_env_info = json.loads(subprocess.check_output(["conda", "info", "--envs", "--json"]).decode())
|
||||
envs = conda_env_info["envs"]
|
||||
for envpath in envs:
|
||||
if envpath.endswith(env_name):
|
||||
|
@ -169,14 +169,20 @@ class StackRun(Subcommand):
|
|||
)
|
||||
return
|
||||
|
||||
script = (
|
||||
importlib.resources.files("llama_stack")
|
||||
/ "distribution/start_conda_env.sh"
|
||||
)
|
||||
script = importlib.resources.files("llama_stack") / "distribution/start_conda_env.sh"
|
||||
run_args = [
|
||||
script,
|
||||
image_name,
|
||||
]
|
||||
else:
|
||||
# else must be venv since that is the only valid option left.
|
||||
current_venv = os.environ.get("VIRTUAL_ENV")
|
||||
venv = args.image_name or current_venv
|
||||
script = importlib.resources.files("llama_stack") / "distribution/start_venv.sh"
|
||||
run_args = [
|
||||
script,
|
||||
venv,
|
||||
]
|
||||
|
||||
run_args.extend([str(config_file), str(args.port)])
|
||||
if args.disable_ipv6:
|
||||
|
@ -198,4 +204,7 @@ class StackRun(Subcommand):
|
|||
return
|
||||
run_args.extend(["--env", f"{key}={value}"])
|
||||
|
||||
if args.tls_keyfile and args.tls_certfile:
|
||||
run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile])
|
||||
|
||||
run_with_pty(run_args)
|
||||
|
|
|
@ -31,6 +31,8 @@ class StackParser(Subcommand):
|
|||
version=f"{version('llama-stack')}",
|
||||
)
|
||||
|
||||
self.parser.set_defaults(func=lambda args: self.parser.print_help())
|
||||
|
||||
subparsers = self.parser.add_subparsers(title="stack_subcommands")
|
||||
|
||||
# Add sub-commands
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
import re
|
||||
import textwrap
|
||||
from typing import Iterable
|
||||
|
||||
from termcolor import cprint
|
||||
|
||||
|
@ -22,11 +23,7 @@ def format_row(row, col_widths):
|
|||
if line.strip() == "":
|
||||
lines.append("")
|
||||
else:
|
||||
lines.extend(
|
||||
textwrap.wrap(
|
||||
line, width, break_long_words=False, replace_whitespace=False
|
||||
)
|
||||
)
|
||||
lines.extend(textwrap.wrap(line, width, break_long_words=False, replace_whitespace=False))
|
||||
return lines
|
||||
|
||||
wrapped = [wrap(item, width) for item, width in zip(row, col_widths)]
|
||||
|
@ -43,11 +40,15 @@ def format_row(row, col_widths):
|
|||
return "\n".join(lines)
|
||||
|
||||
|
||||
def print_table(rows, headers=None, separate_rows: bool = False):
|
||||
def print_table(rows, headers=None, separate_rows: bool = False, sort_by: Iterable[int] = tuple()):
|
||||
def itemlen(item):
|
||||
return max([len(line) for line in strip_ansi_colors(item).split("\n")])
|
||||
|
||||
rows = [[x or "" for x in row] for row in rows]
|
||||
|
||||
if sort_by:
|
||||
rows.sort(key=lambda x: tuple(x[i] for i in sort_by))
|
||||
|
||||
if not headers:
|
||||
col_widths = [max(itemlen(item) for item in col) for col in zip(*rows)]
|
||||
else:
|
||||
|
|
|
@ -8,6 +8,7 @@ from datetime import datetime
|
|||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
from llama_stack.distribution.configure import (
|
||||
LLAMA_STACK_RUN_CONFIG_VERSION,
|
||||
parse_and_maybe_upgrade_config,
|
||||
|
@ -41,9 +42,7 @@ def up_to_date_config():
|
|||
- provider_id: provider1
|
||||
provider_type: inline::meta-reference
|
||||
config: {{}}
|
||||
""".format(
|
||||
version=LLAMA_STACK_RUN_CONFIG_VERSION, built_at=datetime.now().isoformat()
|
||||
)
|
||||
""".format(version=LLAMA_STACK_RUN_CONFIG_VERSION, built_at=datetime.now().isoformat())
|
||||
)
|
||||
|
||||
|
||||
|
@ -83,9 +82,7 @@ def old_config():
|
|||
telemetry:
|
||||
provider_type: noop
|
||||
config: {{}}
|
||||
""".format(
|
||||
built_at=datetime.now().isoformat()
|
||||
)
|
||||
""".format(built_at=datetime.now().isoformat())
|
||||
)
|
||||
|
||||
|
||||
|
@ -108,10 +105,7 @@ def test_parse_and_maybe_upgrade_config_up_to_date(up_to_date_config):
|
|||
def test_parse_and_maybe_upgrade_config_old_format(old_config):
|
||||
result = parse_and_maybe_upgrade_config(old_config)
|
||||
assert result.version == LLAMA_STACK_RUN_CONFIG_VERSION
|
||||
assert all(
|
||||
api in result.providers
|
||||
for api in ["inference", "safety", "memory", "telemetry"]
|
||||
)
|
||||
assert all(api in result.providers for api in ["inference", "safety", "memory", "telemetry"])
|
||||
safety_provider = result.providers["safety"][0]
|
||||
assert safety_provider.provider_type == "meta-reference"
|
||||
assert "llama_guard_shield" in safety_provider.config
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue