mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-15 14:08:00 +00:00
Merge branch 'main' into openai-vector-store/qdrant
This commit is contained in:
commit
970d0f307f
338 changed files with 15301 additions and 15997 deletions
198
.github/actions/run-and-record-tests/action.yml
vendored
Normal file
198
.github/actions/run-and-record-tests/action.yml
vendored
Normal file
|
@ -0,0 +1,198 @@
|
||||||
|
name: 'Run and Record Tests'
|
||||||
|
description: 'Run integration tests and handle recording/artifact upload'
|
||||||
|
|
||||||
|
inputs:
|
||||||
|
test-types:
|
||||||
|
description: 'JSON array of test types to run'
|
||||||
|
required: true
|
||||||
|
stack-config:
|
||||||
|
description: 'Stack configuration to use'
|
||||||
|
required: true
|
||||||
|
provider:
|
||||||
|
description: 'Provider to use for tests'
|
||||||
|
required: true
|
||||||
|
inference-mode:
|
||||||
|
description: 'Inference mode (record or replay)'
|
||||||
|
required: true
|
||||||
|
run-vision-tests:
|
||||||
|
description: 'Whether to run vision tests'
|
||||||
|
required: false
|
||||||
|
default: 'false'
|
||||||
|
|
||||||
|
runs:
|
||||||
|
using: 'composite'
|
||||||
|
steps:
|
||||||
|
- name: Check Storage and Memory Available Before Tests
|
||||||
|
if: ${{ always() }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
free -h
|
||||||
|
df -h
|
||||||
|
|
||||||
|
- name: Set environment variables
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "LLAMA_STACK_CLIENT_TIMEOUT=300" >> $GITHUB_ENV
|
||||||
|
echo "LLAMA_STACK_TEST_INFERENCE_MODE=${{ inputs.inference-mode }}" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
# Configure provider-specific settings
|
||||||
|
if [ "${{ inputs.provider }}" == "ollama" ]; then
|
||||||
|
echo "OLLAMA_URL=http://0.0.0.0:11434" >> $GITHUB_ENV
|
||||||
|
echo "TEXT_MODEL=ollama/llama3.2:3b-instruct-fp16" >> $GITHUB_ENV
|
||||||
|
echo "SAFETY_MODEL=ollama/llama-guard3:1b" >> $GITHUB_ENV
|
||||||
|
else
|
||||||
|
echo "VLLM_URL=http://localhost:8000/v1" >> $GITHUB_ENV
|
||||||
|
echo "TEXT_MODEL=vllm/meta-llama/Llama-3.2-1B-Instruct" >> $GITHUB_ENV
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
||||||
|
echo "LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings/vision" >> $GITHUB_ENV
|
||||||
|
else
|
||||||
|
echo "LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings" >> $GITHUB_ENV
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Run Llama Stack Server
|
||||||
|
if: ${{ contains(inputs.stack-config, 'server:') }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
# Run this so pytest in a loop doesn't start-stop servers in a loop
|
||||||
|
echo "Starting Llama Stack Server"
|
||||||
|
nohup uv run llama stack run ci-tests --image-type venv > server.log 2>&1 &
|
||||||
|
|
||||||
|
echo "Waiting for Llama Stack Server to start"
|
||||||
|
for i in {1..30}; do
|
||||||
|
if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
|
||||||
|
echo "Llama Stack Server started"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Llama Stack Server failed to start"
|
||||||
|
cat server.log
|
||||||
|
exit 1
|
||||||
|
|
||||||
|
- name: Run Integration Tests
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
stack_config="${{ inputs.stack-config }}"
|
||||||
|
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
|
||||||
|
|
||||||
|
# Configure provider-specific settings
|
||||||
|
if [ "${{ inputs.provider }}" == "ollama" ]; then
|
||||||
|
EXTRA_PARAMS="--safety-shield=llama-guard"
|
||||||
|
else
|
||||||
|
EXTRA_PARAMS=""
|
||||||
|
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
||||||
|
if uv run pytest -s -v tests/integration/inference/test_vision_inference.py --stack-config=${stack_config} \
|
||||||
|
-k "not( ${EXCLUDE_TESTS} )" \
|
||||||
|
--vision-model=ollama/llama3.2-vision:11b \
|
||||||
|
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
||||||
|
--color=yes ${EXTRA_PARAMS} \
|
||||||
|
--capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-vision.log; then
|
||||||
|
echo "✅ Tests completed for vision"
|
||||||
|
else
|
||||||
|
echo "❌ Tests failed for vision"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Run non-vision tests
|
||||||
|
TEST_TYPES='${{ inputs.test-types }}'
|
||||||
|
echo "Test types to run: $TEST_TYPES"
|
||||||
|
|
||||||
|
# Collect all test files for the specified test types
|
||||||
|
TEST_FILES=""
|
||||||
|
for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do
|
||||||
|
# if provider is vllm, exclude the following tests: (safety, post_training, tool_runtime)
|
||||||
|
if [ "${{ inputs.provider }}" == "vllm" ]; then
|
||||||
|
if [ "$test_type" == "safety" ] || [ "$test_type" == "post_training" ] || [ "$test_type" == "tool_runtime" ]; then
|
||||||
|
echo "Skipping $test_type for vllm provider"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -d "tests/integration/$test_type" ]; then
|
||||||
|
# Find all Python test files in this directory
|
||||||
|
test_files=$(find tests/integration/$test_type -name "test_*.py" -o -name "*_test.py")
|
||||||
|
if [ -n "$test_files" ]; then
|
||||||
|
TEST_FILES="$TEST_FILES $test_files"
|
||||||
|
echo "Added test files from $test_type: $(echo $test_files | wc -w) files"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Warning: Directory tests/integration/$test_type does not exist"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "$TEST_FILES" ]; then
|
||||||
|
echo "No test files found for the specified test types"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "=== Running all collected tests in a single pytest command ==="
|
||||||
|
echo "Total test files: $(echo $TEST_FILES | wc -w)"
|
||||||
|
|
||||||
|
if uv run pytest -s -v $TEST_FILES --stack-config=${stack_config} \
|
||||||
|
-k "not( ${EXCLUDE_TESTS} )" \
|
||||||
|
--text-model=$TEXT_MODEL \
|
||||||
|
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
||||||
|
--color=yes ${EXTRA_PARAMS} \
|
||||||
|
--capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-all.log; then
|
||||||
|
echo "✅ All tests completed successfully"
|
||||||
|
else
|
||||||
|
echo "❌ Tests failed"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Check Storage and Memory Available After Tests
|
||||||
|
if: ${{ always() }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
free -h
|
||||||
|
df -h
|
||||||
|
|
||||||
|
- name: Commit and push recordings
|
||||||
|
if: ${{ inputs.inference-mode == 'record' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "Checking for recording changes"
|
||||||
|
git status --porcelain tests/integration/recordings/
|
||||||
|
|
||||||
|
if [[ -n $(git status --porcelain tests/integration/recordings/) ]]; then
|
||||||
|
echo "New recordings detected, committing and pushing"
|
||||||
|
git add tests/integration/recordings/
|
||||||
|
|
||||||
|
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
||||||
|
git commit -m "Recordings update from CI (vision)"
|
||||||
|
else
|
||||||
|
git commit -m "Recordings update from CI"
|
||||||
|
fi
|
||||||
|
|
||||||
|
git fetch origin ${{ github.event.pull_request.head.ref }}
|
||||||
|
git rebase origin/${{ github.event.pull_request.head.ref }}
|
||||||
|
echo "Rebased successfully"
|
||||||
|
git push origin HEAD:${{ github.event.pull_request.head.ref }}
|
||||||
|
echo "Pushed successfully"
|
||||||
|
else
|
||||||
|
echo "No recording changes"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Write inference logs to file
|
||||||
|
if: ${{ always() }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
|
||||||
|
|
||||||
|
- name: Upload logs
|
||||||
|
if: ${{ always() }}
|
||||||
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||||
|
with:
|
||||||
|
name: logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ strategy.job-index }}
|
||||||
|
path: |
|
||||||
|
*.log
|
||||||
|
retention-days: 1
|
73
.github/actions/run-integration-tests/action.yml
vendored
73
.github/actions/run-integration-tests/action.yml
vendored
|
@ -1,73 +0,0 @@
|
||||||
name: 'Run Integration Tests'
|
|
||||||
description: 'Run integration tests with configurable execution mode and provider settings'
|
|
||||||
|
|
||||||
inputs:
|
|
||||||
test-types:
|
|
||||||
description: 'Test types to run (JSON array)'
|
|
||||||
required: true
|
|
||||||
stack-config:
|
|
||||||
description: 'Stack configuration: "ci-tests" or "server:ci-tests"'
|
|
||||||
required: true
|
|
||||||
provider:
|
|
||||||
description: 'Provider to use: "ollama" or "vllm"'
|
|
||||||
required: true
|
|
||||||
inference-mode:
|
|
||||||
description: 'Inference mode: "record" or "replay"'
|
|
||||||
required: true
|
|
||||||
|
|
||||||
outputs:
|
|
||||||
logs-path:
|
|
||||||
description: 'Path to generated log files'
|
|
||||||
value: '*.log'
|
|
||||||
|
|
||||||
runs:
|
|
||||||
using: 'composite'
|
|
||||||
steps:
|
|
||||||
- name: Run Integration Tests
|
|
||||||
env:
|
|
||||||
LLAMA_STACK_CLIENT_TIMEOUT: "300"
|
|
||||||
LLAMA_STACK_TEST_RECORDING_DIR: "tests/integration/recordings"
|
|
||||||
LLAMA_STACK_TEST_INFERENCE_MODE: ${{ inputs.inference-mode }}
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
stack_config="${{ inputs.stack-config }}"
|
|
||||||
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
|
|
||||||
|
|
||||||
# Configure provider-specific settings
|
|
||||||
if [ "${{ inputs.provider }}" == "ollama" ]; then
|
|
||||||
export OLLAMA_URL="http://0.0.0.0:11434"
|
|
||||||
export TEXT_MODEL="ollama/llama3.2:3b-instruct-fp16"
|
|
||||||
export SAFETY_MODEL="ollama/llama-guard3:1b"
|
|
||||||
EXTRA_PARAMS="--safety-shield=llama-guard"
|
|
||||||
else
|
|
||||||
export VLLM_URL="http://localhost:8000/v1"
|
|
||||||
export TEXT_MODEL="vllm/meta-llama/Llama-3.2-1B-Instruct"
|
|
||||||
EXTRA_PARAMS=""
|
|
||||||
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
|
|
||||||
fi
|
|
||||||
|
|
||||||
TEST_TYPES='${{ inputs.test-types }}'
|
|
||||||
echo "Test types to run: $TEST_TYPES"
|
|
||||||
|
|
||||||
for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do
|
|
||||||
# if provider is vllm, exclude the following tests: (safety, post_training, tool_runtime)
|
|
||||||
if [ "${{ inputs.provider }}" == "vllm" ]; then
|
|
||||||
if [ "$test_type" == "safety" ] || [ "$test_type" == "post_training" ] || [ "$test_type" == "tool_runtime" ]; then
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "=== Running tests for: $test_type ==="
|
|
||||||
|
|
||||||
if uv run pytest -s -v tests/integration/$test_type --stack-config=${stack_config} \
|
|
||||||
-k "not( ${EXCLUDE_TESTS} )" \
|
|
||||||
--text-model=$TEXT_MODEL \
|
|
||||||
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
|
||||||
--color=yes ${EXTRA_PARAMS} \
|
|
||||||
--capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-$test_type.log; then
|
|
||||||
echo "✅ Tests completed for $test_type"
|
|
||||||
else
|
|
||||||
echo "❌ Tests failed for $test_type"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
done
|
|
14
.github/actions/setup-ollama/action.yml
vendored
14
.github/actions/setup-ollama/action.yml
vendored
|
@ -1,11 +1,23 @@
|
||||||
name: Setup Ollama
|
name: Setup Ollama
|
||||||
description: Start Ollama
|
description: Start Ollama
|
||||||
|
inputs:
|
||||||
|
run-vision-tests:
|
||||||
|
description: 'Run vision tests: "true" or "false"'
|
||||||
|
required: false
|
||||||
|
default: 'false'
|
||||||
runs:
|
runs:
|
||||||
using: "composite"
|
using: "composite"
|
||||||
steps:
|
steps:
|
||||||
- name: Start Ollama
|
- name: Start Ollama
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
docker run -d --name ollama -p 11434:11434 docker.io/leseb/ollama-with-models
|
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
||||||
|
image="ollama-with-vision-model"
|
||||||
|
else
|
||||||
|
image="ollama-with-models"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Starting Ollama with image: $image"
|
||||||
|
docker run -d --name ollama -p 11434:11434 docker.io/llamastack/$image
|
||||||
echo "Verifying Ollama status..."
|
echo "Verifying Ollama status..."
|
||||||
timeout 30 bash -c 'while ! curl -s -L http://127.0.0.1:11434; do sleep 1 && echo "."; done'
|
timeout 30 bash -c 'while ! curl -s -L http://127.0.0.1:11434; do sleep 1 && echo "."; done'
|
||||||
|
|
51
.github/actions/setup-test-environment/action.yml
vendored
Normal file
51
.github/actions/setup-test-environment/action.yml
vendored
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
name: 'Setup Test Environment'
|
||||||
|
description: 'Common setup steps for integration tests including dependencies, providers, and build'
|
||||||
|
|
||||||
|
inputs:
|
||||||
|
python-version:
|
||||||
|
description: 'Python version to use'
|
||||||
|
required: true
|
||||||
|
client-version:
|
||||||
|
description: 'Client version (latest or published)'
|
||||||
|
required: true
|
||||||
|
provider:
|
||||||
|
description: 'Provider to setup (ollama or vllm)'
|
||||||
|
required: true
|
||||||
|
default: 'ollama'
|
||||||
|
run-vision-tests:
|
||||||
|
description: 'Whether to setup provider for vision tests'
|
||||||
|
required: false
|
||||||
|
default: 'false'
|
||||||
|
inference-mode:
|
||||||
|
description: 'Inference mode (record or replay)'
|
||||||
|
required: true
|
||||||
|
|
||||||
|
runs:
|
||||||
|
using: 'composite'
|
||||||
|
steps:
|
||||||
|
- name: Install dependencies
|
||||||
|
uses: ./.github/actions/setup-runner
|
||||||
|
with:
|
||||||
|
python-version: ${{ inputs.python-version }}
|
||||||
|
client-version: ${{ inputs.client-version }}
|
||||||
|
|
||||||
|
- name: Setup ollama
|
||||||
|
if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }}
|
||||||
|
uses: ./.github/actions/setup-ollama
|
||||||
|
with:
|
||||||
|
run-vision-tests: ${{ inputs.run-vision-tests }}
|
||||||
|
|
||||||
|
- name: Setup vllm
|
||||||
|
if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}
|
||||||
|
uses: ./.github/actions/setup-vllm
|
||||||
|
|
||||||
|
- name: Build Llama Stack
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
uv run llama stack build --template ci-tests --image-type venv
|
||||||
|
|
||||||
|
- name: Configure git for commits
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
git config --local user.email "github-actions[bot]@users.noreply.github.com"
|
||||||
|
git config --local user.name "github-actions[bot]"
|
4
.github/workflows/README.md
vendored
4
.github/workflows/README.md
vendored
|
@ -1,6 +1,6 @@
|
||||||
# Llama Stack CI
|
# Llama Stack CI
|
||||||
|
|
||||||
Llama Stack uses GitHub Actions for Continous Integration (CI). Below is a table detailing what CI the project includes and the purpose.
|
Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a table detailing what CI the project includes and the purpose.
|
||||||
|
|
||||||
| Name | File | Purpose |
|
| Name | File | Purpose |
|
||||||
| ---- | ---- | ------- |
|
| ---- | ---- | ------- |
|
||||||
|
@ -8,7 +8,7 @@ Llama Stack uses GitHub Actions for Continous Integration (CI). Below is a table
|
||||||
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
||||||
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
|
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
|
||||||
| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
|
| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
|
||||||
| Integration Tests | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration |
|
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration in replay mode |
|
||||||
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
||||||
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
||||||
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
||||||
|
|
160
.github/workflows/integration-tests.yml
vendored
160
.github/workflows/integration-tests.yml
vendored
|
@ -1,22 +1,22 @@
|
||||||
name: Integration Tests
|
name: Integration Tests (Replay)
|
||||||
|
|
||||||
run-name: Run the integration test suite from tests/integration
|
run-name: Run the integration test suite from tests/integration in replay mode
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
types: [opened, synchronize, labeled]
|
types: [opened, synchronize, reopened]
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/**'
|
- 'llama_stack/**'
|
||||||
- 'tests/**'
|
- 'tests/**'
|
||||||
- 'uv.lock'
|
- 'uv.lock'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
- 'requirements.txt'
|
|
||||||
- '.github/workflows/integration-tests.yml' # This workflow
|
- '.github/workflows/integration-tests.yml' # This workflow
|
||||||
- '.github/actions/setup-ollama/action.yml'
|
- '.github/actions/setup-ollama/action.yml'
|
||||||
- '.github/actions/run-integration-tests/action.yml'
|
- '.github/actions/setup-test-environment/action.yml'
|
||||||
|
- '.github/actions/run-and-record-tests/action.yml'
|
||||||
schedule:
|
schedule:
|
||||||
# If changing the cron schedule, update the provider in the test-matrix job
|
# If changing the cron schedule, update the provider in the test-matrix job
|
||||||
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
|
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
|
||||||
|
@ -33,31 +33,15 @@ on:
|
||||||
default: 'ollama'
|
default: 'ollama'
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
# This creates three concurrency groups:
|
# Skip concurrency for pushes to main - each commit should be tested independently
|
||||||
# ${{ github.workflow }}-${{ github.ref }}-rerecord (for valid triggers with re-record-tests label)
|
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
||||||
# ${{ github.workflow }}-${{ github.ref }}-replay (for valid triggers without re-record-tests label)
|
|
||||||
# ${{ github.workflow }}-${{ github.ref }}-no-run (for invalid triggers that will be skipped)
|
|
||||||
# The "no-run" group ensures that irrelevant label events don't interfere with the real workflows.
|
|
||||||
group: >-
|
|
||||||
${{ github.workflow }}-${{ github.ref }}-${{
|
|
||||||
(github.event.action == 'opened' ||
|
|
||||||
github.event.action == 'synchronize' ||
|
|
||||||
(github.event.action == 'labeled' && contains(github.event.pull_request.labels.*.name, 're-record-tests'))) &&
|
|
||||||
(contains(github.event.pull_request.labels.*.name, 're-record-tests') && 'rerecord' || 'replay') ||
|
|
||||||
'no-run'
|
|
||||||
}}
|
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
discover-tests:
|
discover-tests:
|
||||||
if: |
|
|
||||||
github.event.action == 'opened' ||
|
|
||||||
github.event.action == 'synchronize' ||
|
|
||||||
(github.event.action == 'labeled' && contains(github.event.pull_request.labels.*.name, 're-record-tests'))
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
outputs:
|
outputs:
|
||||||
test-types: ${{ steps.generate-test-types.outputs.test-types }}
|
test-types: ${{ steps.generate-test-types.outputs.test-types }}
|
||||||
rerecord-tests: ${{ steps.check-rerecord-tests.outputs.rerecord-tests }}
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
@ -67,94 +51,13 @@ jobs:
|
||||||
id: generate-test-types
|
id: generate-test-types
|
||||||
run: |
|
run: |
|
||||||
# Get test directories dynamically, excluding non-test directories
|
# Get test directories dynamically, excluding non-test directories
|
||||||
|
# NOTE: we are excluding post_training since the tests take too long
|
||||||
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
|
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
|
||||||
grep -Ev "^(__pycache__|fixtures|test_cases|recordings)$" |
|
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
|
||||||
sort | jq -R -s -c 'split("\n")[:-1]')
|
sort | jq -R -s -c 'split("\n")[:-1]')
|
||||||
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
|
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
- name: Check if re-record-tests label exists
|
run-replay-mode-tests:
|
||||||
id: check-rerecord-tests
|
|
||||||
run: |
|
|
||||||
if [[ "${{ contains(github.event.pull_request.labels.*.name, 're-record-tests') }}" == "true" ]]; then
|
|
||||||
echo "rerecord-tests=true" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
echo "rerecord-tests=false" >> $GITHUB_OUTPUT
|
|
||||||
fi
|
|
||||||
|
|
||||||
record-tests:
|
|
||||||
# Sequential job for recording to avoid SQLite conflicts
|
|
||||||
if: ${{ needs.discover-tests.outputs.rerecord-tests == 'true' }}
|
|
||||||
needs: discover-tests
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
pull-requests: write
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
uses: ./.github/actions/setup-runner
|
|
||||||
with:
|
|
||||||
python-version: "3.12" # Use single Python version for recording
|
|
||||||
client-version: "latest"
|
|
||||||
|
|
||||||
- name: Setup ollama
|
|
||||||
if: ${{ inputs.test-provider == 'ollama' }}
|
|
||||||
uses: ./.github/actions/setup-ollama
|
|
||||||
|
|
||||||
- name: Setup vllm
|
|
||||||
if: ${{ inputs.test-provider == 'vllm' }}
|
|
||||||
uses: ./.github/actions/setup-vllm
|
|
||||||
|
|
||||||
- name: Build Llama Stack
|
|
||||||
run: |
|
|
||||||
uv run llama stack build --template ci-tests --image-type venv
|
|
||||||
|
|
||||||
- name: Configure git for commits
|
|
||||||
run: |
|
|
||||||
git config --local user.email "github-actions[bot]@users.noreply.github.com"
|
|
||||||
git config --local user.name "github-actions[bot]"
|
|
||||||
|
|
||||||
- name: Run Integration Tests for All Types (Recording Mode)
|
|
||||||
uses: ./.github/actions/run-integration-tests
|
|
||||||
with:
|
|
||||||
test-types: ${{ needs.discover-tests.outputs.test-types }}
|
|
||||||
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
|
|
||||||
provider: ${{ inputs.test-provider }}
|
|
||||||
inference-mode: 'record'
|
|
||||||
|
|
||||||
- name: Commit and push recordings
|
|
||||||
run: |
|
|
||||||
if ! git diff --quiet tests/integration/recordings/; then
|
|
||||||
echo "Committing recordings"
|
|
||||||
git add tests/integration/recordings/
|
|
||||||
git commit -m "Update recordings"
|
|
||||||
echo "Pushing all recording commits to PR"
|
|
||||||
git push origin HEAD:${{ github.head_ref }}
|
|
||||||
else
|
|
||||||
echo "No recording changes"
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Write inference logs to file
|
|
||||||
if: ${{ always() }}
|
|
||||||
run: |
|
|
||||||
sudo docker logs ollama > ollama-recording.log || true
|
|
||||||
|
|
||||||
- name: Upload recording logs
|
|
||||||
if: ${{ always() }}
|
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
|
||||||
with:
|
|
||||||
name: recording-logs-${{ github.run_id }}
|
|
||||||
path: |
|
|
||||||
*.log
|
|
||||||
retention-days: 1
|
|
||||||
|
|
||||||
run-tests:
|
|
||||||
# Skip this job if we're in recording mode (handled by record-tests job)
|
|
||||||
if: ${{ needs.discover-tests.outputs.rerecord-tests != 'true' }}
|
|
||||||
needs: discover-tests
|
needs: discover-tests
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
@ -164,48 +67,29 @@ jobs:
|
||||||
client-type: [library, server]
|
client-type: [library, server]
|
||||||
# Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
|
# Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
|
||||||
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
|
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
|
||||||
python-version: ["3.12", "3.13"]
|
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||||
client-version: ${{ (github.event.schedule == '0 0 * * 0' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||||
|
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||||
|
run-vision-tests: ['true', 'false']
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Setup test environment
|
||||||
uses: ./.github/actions/setup-runner
|
uses: ./.github/actions/setup-test-environment
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
client-version: ${{ matrix.client-version }}
|
client-version: ${{ matrix.client-version }}
|
||||||
|
provider: ${{ matrix.provider }}
|
||||||
|
run-vision-tests: ${{ matrix.run-vision-tests }}
|
||||||
|
inference-mode: 'replay'
|
||||||
|
|
||||||
- name: Build Llama Stack
|
- name: Run tests
|
||||||
run: |
|
uses: ./.github/actions/run-and-record-tests
|
||||||
uv run llama stack build --template ci-tests --image-type venv
|
|
||||||
|
|
||||||
- name: Check Storage and Memory Available Before Tests
|
|
||||||
if: ${{ always() }}
|
|
||||||
run: |
|
|
||||||
free -h
|
|
||||||
df -h
|
|
||||||
|
|
||||||
- name: Run Integration Tests (Replay Mode)
|
|
||||||
uses: ./.github/actions/run-integration-tests
|
|
||||||
with:
|
with:
|
||||||
test-types: ${{ needs.discover-tests.outputs.test-types }}
|
test-types: ${{ needs.discover-tests.outputs.test-types }}
|
||||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||||
provider: ${{ matrix.provider }}
|
provider: ${{ matrix.provider }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
|
run-vision-tests: ${{ matrix.run-vision-tests }}
|
||||||
- name: Check Storage and Memory Available After Tests
|
|
||||||
if: ${{ always() }}
|
|
||||||
run: |
|
|
||||||
free -h
|
|
||||||
df -h
|
|
||||||
|
|
||||||
- name: Upload test logs on failure
|
|
||||||
if: ${{ failure() }}
|
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
|
||||||
with:
|
|
||||||
name: test-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.provider }}-${{ matrix.client-type }}-${{ matrix.python-version }}-${{ matrix.client-version }}
|
|
||||||
path: |
|
|
||||||
*.log
|
|
||||||
retention-days: 1
|
|
||||||
|
|
12
.github/workflows/providers-build.yml
vendored
12
.github/workflows/providers-build.yml
vendored
|
@ -9,8 +9,8 @@ on:
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/cli/stack/build.py'
|
- 'llama_stack/cli/stack/build.py'
|
||||||
- 'llama_stack/cli/stack/_build.py'
|
- 'llama_stack/cli/stack/_build.py'
|
||||||
- 'llama_stack/distribution/build.*'
|
- 'llama_stack/core/build.*'
|
||||||
- 'llama_stack/distribution/*.sh'
|
- 'llama_stack/core/*.sh'
|
||||||
- '.github/workflows/providers-build.yml'
|
- '.github/workflows/providers-build.yml'
|
||||||
- 'llama_stack/templates/**'
|
- 'llama_stack/templates/**'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
@ -19,8 +19,8 @@ on:
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/cli/stack/build.py'
|
- 'llama_stack/cli/stack/build.py'
|
||||||
- 'llama_stack/cli/stack/_build.py'
|
- 'llama_stack/cli/stack/_build.py'
|
||||||
- 'llama_stack/distribution/build.*'
|
- 'llama_stack/core/build.*'
|
||||||
- 'llama_stack/distribution/*.sh'
|
- 'llama_stack/core/*.sh'
|
||||||
- '.github/workflows/providers-build.yml'
|
- '.github/workflows/providers-build.yml'
|
||||||
- 'llama_stack/templates/**'
|
- 'llama_stack/templates/**'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
@ -108,7 +108,7 @@ jobs:
|
||||||
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
|
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
|
||||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||||
echo "Entrypoint: $entrypoint"
|
echo "Entrypoint: $entrypoint"
|
||||||
if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
|
if [ "$entrypoint" != "[python -m llama_stack.core.server.server --config /app/run.yaml]" ]; then
|
||||||
echo "Entrypoint is not correct"
|
echo "Entrypoint is not correct"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
@ -142,7 +142,7 @@ jobs:
|
||||||
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
|
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
|
||||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||||
echo "Entrypoint: $entrypoint"
|
echo "Entrypoint: $entrypoint"
|
||||||
if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
|
if [ "$entrypoint" != "[python -m llama_stack.core.server.server --config /app/run.yaml]" ]; then
|
||||||
echo "Entrypoint is not correct"
|
echo "Entrypoint is not correct"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -12,12 +12,13 @@ on:
|
||||||
- 'tests/integration/**'
|
- 'tests/integration/**'
|
||||||
- 'uv.lock'
|
- 'uv.lock'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
- 'requirements.txt'
|
|
||||||
- 'tests/external/*'
|
- 'tests/external/*'
|
||||||
- '.github/workflows/test-external-provider-module.yml' # This workflow
|
- '.github/workflows/test-external-provider-module.yml' # This workflow
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test-external-providers-from-module:
|
test-external-providers-from-module:
|
||||||
|
# This workflow is disabled. See https://github.com/meta-llama/llama-stack/pull/2975#issuecomment-3138702984 for details
|
||||||
|
if: false
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
|
@ -47,7 +48,7 @@ jobs:
|
||||||
|
|
||||||
- name: Build distro from config file
|
- name: Build distro from config file
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/ramalama-stack/build.yaml
|
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/ramalama-stack/build.yaml
|
||||||
|
|
||||||
- name: Start Llama Stack server in background
|
- name: Start Llama Stack server in background
|
||||||
if: ${{ matrix.image-type }} == 'venv'
|
if: ${{ matrix.image-type }} == 'venv'
|
||||||
|
|
4
.github/workflows/test-external.yml
vendored
4
.github/workflows/test-external.yml
vendored
|
@ -43,11 +43,11 @@ jobs:
|
||||||
|
|
||||||
- name: Print distro dependencies
|
- name: Print distro dependencies
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/build.yaml --print-deps-only
|
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/build.yaml --print-deps-only
|
||||||
|
|
||||||
- name: Build distro from config file
|
- name: Build distro from config file
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/build.yaml
|
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/build.yaml
|
||||||
|
|
||||||
- name: Start Llama Stack server in background
|
- name: Start Llama Stack server in background
|
||||||
if: ${{ matrix.image-type }} == 'venv'
|
if: ${{ matrix.image-type }} == 'venv'
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
include pyproject.toml
|
include pyproject.toml
|
||||||
include llama_stack/models/llama/llama3/tokenizer.model
|
include llama_stack/models/llama/llama3/tokenizer.model
|
||||||
include llama_stack/models/llama/llama4/tokenizer.model
|
include llama_stack/models/llama/llama4/tokenizer.model
|
||||||
include llama_stack/distribution/*.sh
|
include llama_stack.core/*.sh
|
||||||
include llama_stack/cli/scripts/*.sh
|
include llama_stack/cli/scripts/*.sh
|
||||||
include llama_stack/templates/*/*.yaml
|
include llama_stack/templates/*/*.yaml
|
||||||
include llama_stack/providers/tests/test_cases/inference/*.json
|
include llama_stack/providers/tests/test_cases/inference/*.json
|
||||||
|
|
|
@ -6,7 +6,6 @@
|
||||||
[](https://discord.gg/llama-stack)
|
[](https://discord.gg/llama-stack)
|
||||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
||||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
||||||

|
|
||||||
|
|
||||||
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
||||||
|
|
||||||
|
|
20
docs/_static/llama-stack-spec.html
vendored
20
docs/_static/llama-stack-spec.html
vendored
|
@ -15078,22 +15078,6 @@
|
||||||
"DPOAlignmentConfig": {
|
"DPOAlignmentConfig": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"reward_scale": {
|
|
||||||
"type": "number",
|
|
||||||
"description": "Scaling factor for the reward signal"
|
|
||||||
},
|
|
||||||
"reward_clip": {
|
|
||||||
"type": "number",
|
|
||||||
"description": "Maximum absolute value for reward clipping"
|
|
||||||
},
|
|
||||||
"epsilon": {
|
|
||||||
"type": "number",
|
|
||||||
"description": "Small value added for numerical stability"
|
|
||||||
},
|
|
||||||
"gamma": {
|
|
||||||
"type": "number",
|
|
||||||
"description": "Discount factor for future rewards"
|
|
||||||
},
|
|
||||||
"beta": {
|
"beta": {
|
||||||
"type": "number",
|
"type": "number",
|
||||||
"description": "Temperature parameter for the DPO loss"
|
"description": "Temperature parameter for the DPO loss"
|
||||||
|
@ -15106,10 +15090,6 @@
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"reward_scale",
|
|
||||||
"reward_clip",
|
|
||||||
"epsilon",
|
|
||||||
"gamma",
|
|
||||||
"beta",
|
"beta",
|
||||||
"loss_type"
|
"loss_type"
|
||||||
],
|
],
|
||||||
|
|
18
docs/_static/llama-stack-spec.yaml
vendored
18
docs/_static/llama-stack-spec.yaml
vendored
|
@ -11163,20 +11163,6 @@ components:
|
||||||
DPOAlignmentConfig:
|
DPOAlignmentConfig:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
reward_scale:
|
|
||||||
type: number
|
|
||||||
description: Scaling factor for the reward signal
|
|
||||||
reward_clip:
|
|
||||||
type: number
|
|
||||||
description: >-
|
|
||||||
Maximum absolute value for reward clipping
|
|
||||||
epsilon:
|
|
||||||
type: number
|
|
||||||
description: >-
|
|
||||||
Small value added for numerical stability
|
|
||||||
gamma:
|
|
||||||
type: number
|
|
||||||
description: Discount factor for future rewards
|
|
||||||
beta:
|
beta:
|
||||||
type: number
|
type: number
|
||||||
description: Temperature parameter for the DPO loss
|
description: Temperature parameter for the DPO loss
|
||||||
|
@ -11186,10 +11172,6 @@ components:
|
||||||
description: The type of loss function to use for DPO
|
description: The type of loss function to use for DPO
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- reward_scale
|
|
||||||
- reward_clip
|
|
||||||
- epsilon
|
|
||||||
- gamma
|
|
||||||
- beta
|
- beta
|
||||||
- loss_type
|
- loss_type
|
||||||
title: DPOAlignmentConfig
|
title: DPOAlignmentConfig
|
||||||
|
|
|
@ -165,7 +165,7 @@
|
||||||
"# use this helper if needed to kill the server \n",
|
"# use this helper if needed to kill the server \n",
|
||||||
"def kill_llama_stack_server():\n",
|
"def kill_llama_stack_server():\n",
|
||||||
" # Kill any existing llama stack server processes\n",
|
" # Kill any existing llama stack server processes\n",
|
||||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -275,7 +275,7 @@
|
||||||
"# use this helper if needed to kill the server \n",
|
"# use this helper if needed to kill the server \n",
|
||||||
"def kill_llama_stack_server():\n",
|
"def kill_llama_stack_server():\n",
|
||||||
" # Kill any existing llama stack server processes\n",
|
" # Kill any existing llama stack server processes\n",
|
||||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -265,7 +265,7 @@
|
||||||
"# use this helper if needed to kill the server \n",
|
"# use this helper if needed to kill the server \n",
|
||||||
"def kill_llama_stack_server():\n",
|
"def kill_llama_stack_server():\n",
|
||||||
" # Kill any existing llama stack server processes\n",
|
" # Kill any existing llama stack server processes\n",
|
||||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -3216,19 +3216,19 @@
|
||||||
"INFO:datasets:Duckdb version 1.1.3 available.\n",
|
"INFO:datasets:Duckdb version 1.1.3 available.\n",
|
||||||
"INFO:datasets:TensorFlow version 2.18.0 available.\n",
|
"INFO:datasets:TensorFlow version 2.18.0 available.\n",
|
||||||
"INFO:datasets:JAX version 0.4.33 available.\n",
|
"INFO:datasets:JAX version 0.4.33 available.\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: basic::equality served by basic\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: basic::equality served by basic\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: basic::subset_of served by basic\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: basic::subset_of served by basic\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: basic::regex_parser_multiple_choice_answer served by basic\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: basic::regex_parser_multiple_choice_answer served by basic\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::factuality served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::factuality served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-correctness served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-correctness served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-relevancy served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-relevancy served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-similarity served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-similarity served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::faithfulness served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::faithfulness served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-entity-recall served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-entity-recall served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-precision served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-precision served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-recall served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-recall served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-relevancy served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-relevancy served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:\n"
|
"INFO:llama_stack.core.stack:\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -3448,7 +3448,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n",
|
"os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||||
"client = LlamaStackAsLibraryClient(\"experimental-post-training\")\n",
|
"client = LlamaStackAsLibraryClient(\"experimental-post-training\")\n",
|
||||||
"_ = client.initialize()"
|
"_ = client.initialize()"
|
||||||
]
|
]
|
||||||
|
|
|
@ -48,7 +48,7 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from llama_stack_client import LlamaStackClient, Agent\n",
|
"from llama_stack_client import LlamaStackClient, Agent\n",
|
||||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||||
"from rich.pretty import pprint\n",
|
"from rich.pretty import pprint\n",
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"import uuid\n",
|
"import uuid\n",
|
||||||
|
|
|
@ -661,7 +661,7 @@
|
||||||
"except ImportError:\n",
|
"except ImportError:\n",
|
||||||
" print(\"Not in Google Colab environment\")\n",
|
" print(\"Not in Google Colab environment\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||||
"\n",
|
"\n",
|
||||||
"client = LlamaStackAsLibraryClient(\"together\")\n",
|
"client = LlamaStackAsLibraryClient(\"together\")\n",
|
||||||
"_ = client.initialize()"
|
"_ = client.initialize()"
|
||||||
|
|
|
@ -35,7 +35,7 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"from llama_stack_client import LlamaStackClient, Agent\n",
|
"from llama_stack_client import LlamaStackClient, Agent\n",
|
||||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||||
"from rich.pretty import pprint\n",
|
"from rich.pretty import pprint\n",
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"import uuid\n",
|
"import uuid\n",
|
||||||
|
|
|
@ -194,7 +194,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||||
"\n",
|
"\n",
|
||||||
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
||||||
"client.initialize()"
|
"client.initialize()"
|
||||||
|
|
|
@ -56,7 +56,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||||
"\n",
|
"\n",
|
||||||
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
||||||
"client.initialize()"
|
"client.initialize()"
|
||||||
|
|
|
@ -56,7 +56,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||||
"\n",
|
"\n",
|
||||||
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
||||||
"client.initialize()"
|
"client.initialize()"
|
||||||
|
|
|
@ -56,7 +56,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||||
"\n",
|
"\n",
|
||||||
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
||||||
"client.initialize()"
|
"client.initialize()"
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/distribution/server/endpoints.py` using the `generate.py` utility.
|
The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack.core/server/endpoints.py` using the `generate.py` utility.
|
||||||
|
|
|
@ -17,7 +17,7 @@ import fire
|
||||||
import ruamel.yaml as yaml
|
import ruamel.yaml as yaml
|
||||||
|
|
||||||
from llama_stack.apis.version import LLAMA_STACK_API_VERSION # noqa: E402
|
from llama_stack.apis.version import LLAMA_STACK_API_VERSION # noqa: E402
|
||||||
from llama_stack.distribution.stack import LlamaStack # noqa: E402
|
from llama_stack.core.stack import LlamaStack # noqa: E402
|
||||||
|
|
||||||
from .pyopenapi.options import Options # noqa: E402
|
from .pyopenapi.options import Options # noqa: E402
|
||||||
from .pyopenapi.specification import Info, Server # noqa: E402
|
from .pyopenapi.specification import Info, Server # noqa: E402
|
||||||
|
|
|
@ -12,7 +12,7 @@ from typing import TextIO
|
||||||
from typing import Any, List, Optional, Union, get_type_hints, get_origin, get_args
|
from typing import Any, List, Optional, Union, get_type_hints, get_origin, get_args
|
||||||
|
|
||||||
from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
|
from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
|
||||||
from llama_stack.distribution.resolver import api_protocol_map
|
from llama_stack.core.resolver import api_protocol_map
|
||||||
|
|
||||||
from .generator import Generator
|
from .generator import Generator
|
||||||
from .options import Options
|
from .options import Options
|
||||||
|
|
|
@ -73,7 +73,7 @@ The API is defined in the [YAML](_static/llama-stack-spec.yaml) and [HTML](_stat
|
||||||
|
|
||||||
To prove out the API, we implemented a handful of use cases to make things more concrete. The [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps) repository contains [6 different examples](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) ranging from very basic to a multi turn agent.
|
To prove out the API, we implemented a handful of use cases to make things more concrete. The [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps) repository contains [6 different examples](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) ranging from very basic to a multi turn agent.
|
||||||
|
|
||||||
There is also a sample inference endpoint implementation in the [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distribution/server/server.py) repository.
|
There is also a sample inference endpoint implementation in the [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/llama_stack.core/server/server.py) repository.
|
||||||
|
|
||||||
## Limitations
|
## Limitations
|
||||||
|
|
||||||
|
|
|
@ -187,7 +187,7 @@
|
||||||
"# use this helper if needed to kill the server \n",
|
"# use this helper if needed to kill the server \n",
|
||||||
"def kill_llama_stack_server():\n",
|
"def kill_llama_stack_server():\n",
|
||||||
" # Kill any existing llama stack server processes\n",
|
" # Kill any existing llama stack server processes\n",
|
||||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -355,7 +355,7 @@ server:
|
||||||
8. Run the server:
|
8. Run the server:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python -m llama_stack.distribution.server.server --yaml-config ~/.llama/run-byoa.yaml
|
python -m llama_stack.core.server.server --yaml-config ~/.llama/run-byoa.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
9. Test the API:
|
9. Test the API:
|
||||||
|
|
|
@ -103,5 +103,5 @@ llama stack run together
|
||||||
|
|
||||||
2. Start Streamlit UI
|
2. Start Streamlit UI
|
||||||
```bash
|
```bash
|
||||||
uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py
|
uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
|
||||||
```
|
```
|
||||||
|
|
|
@ -174,7 +174,7 @@ spec:
|
||||||
- name: llama-stack
|
- name: llama-stack
|
||||||
image: localhost/llama-stack-run-k8s:latest
|
image: localhost/llama-stack-run-k8s:latest
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/app/config.yaml"]
|
command: ["python", "-m", "llama_stack.core.server.server", "--config", "/app/config.yaml"]
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 5000
|
- containerPort: 5000
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
|
|
|
@ -59,7 +59,7 @@ Build a Llama stack container
|
||||||
|
|
||||||
options:
|
options:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
--config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack/distributions/**/build.yaml. If this argument is not provided, you will
|
--config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will
|
||||||
be prompted to enter information interactively (default: None)
|
be prompted to enter information interactively (default: None)
|
||||||
--template TEMPLATE Name of the example template config to use for build. You may use `llama stack build --list-templates` to check out the available templates (default: None)
|
--template TEMPLATE Name of the example template config to use for build. You may use `llama stack build --list-templates` to check out the available templates (default: None)
|
||||||
--list-templates Show the available templates for building a Llama Stack distribution (default: False)
|
--list-templates Show the available templates for building a Llama Stack distribution (default: False)
|
||||||
|
|
|
@ -10,7 +10,7 @@ llama stack build --template starter --image-type venv
|
||||||
```
|
```
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
|
from llama_stack.core.library_client import LlamaStackAsLibraryClient
|
||||||
|
|
||||||
client = LlamaStackAsLibraryClient(
|
client = LlamaStackAsLibraryClient(
|
||||||
"starter",
|
"starter",
|
||||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
||||||
value: "${SAFETY_MODEL}"
|
value: "${SAFETY_MODEL}"
|
||||||
- name: TAVILY_SEARCH_API_KEY
|
- name: TAVILY_SEARCH_API_KEY
|
||||||
value: "${TAVILY_SEARCH_API_KEY}"
|
value: "${TAVILY_SEARCH_API_KEY}"
|
||||||
command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
command: ["python", "-m", "llama_stack.core.server.server", "--config", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8321
|
- containerPort: 8321
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
|
|
|
@ -1,9 +1,4 @@
|
||||||
# External Providers Guide
|
# Creating External Providers
|
||||||
|
|
||||||
Llama Stack supports external providers that live outside of the main codebase. This allows you to:
|
|
||||||
- Create and maintain your own providers independently
|
|
||||||
- Share providers with others without contributing to the main codebase
|
|
||||||
- Keep provider-specific code separate from the core Llama Stack code
|
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
|
@ -55,17 +50,6 @@ Llama Stack supports two types of external providers:
|
||||||
1. **Remote Providers**: Providers that communicate with external services (e.g., cloud APIs)
|
1. **Remote Providers**: Providers that communicate with external services (e.g., cloud APIs)
|
||||||
2. **Inline Providers**: Providers that run locally within the Llama Stack process
|
2. **Inline Providers**: Providers that run locally within the Llama Stack process
|
||||||
|
|
||||||
## Known External Providers
|
|
||||||
|
|
||||||
Here's a list of known external providers that you can use with Llama Stack:
|
|
||||||
|
|
||||||
| Name | Description | API | Type | Repository |
|
|
||||||
|------|-------------|-----|------|------------|
|
|
||||||
| KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
|
|
||||||
| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
|
|
||||||
| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
|
|
||||||
| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
|
|
||||||
|
|
||||||
### Remote Provider Specification
|
### Remote Provider Specification
|
||||||
|
|
||||||
Remote providers are used when you need to communicate with external services. Here's an example for a custom Ollama provider:
|
Remote providers are used when you need to communicate with external services. Here's an example for a custom Ollama provider:
|
||||||
|
@ -119,9 +103,9 @@ container_image: custom-vector-store:latest # optional
|
||||||
- `provider_data_validator`: Optional validator for provider data
|
- `provider_data_validator`: Optional validator for provider data
|
||||||
- `container_image`: Optional container image to use instead of pip packages
|
- `container_image`: Optional container image to use instead of pip packages
|
||||||
|
|
||||||
## Required Implementation
|
## Required Fields
|
||||||
|
|
||||||
## All Providers
|
### All Providers
|
||||||
|
|
||||||
All providers must contain a `get_provider_spec` function in their `provider` module. This is a standardized structure that Llama Stack expects and is necessary for getting things such as the config class. The `get_provider_spec` method returns a structure identical to the `adapter`. An example function may look like:
|
All providers must contain a `get_provider_spec` function in their `provider` module. This is a standardized structure that Llama Stack expects and is necessary for getting things such as the config class. The `get_provider_spec` method returns a structure identical to the `adapter`. An example function may look like:
|
||||||
|
|
||||||
|
@ -146,7 +130,7 @@ def get_provider_spec() -> ProviderSpec:
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Remote Providers
|
#### Remote Providers
|
||||||
|
|
||||||
Remote providers must expose a `get_adapter_impl()` function in their module that takes two arguments:
|
Remote providers must expose a `get_adapter_impl()` function in their module that takes two arguments:
|
||||||
1. `config`: An instance of the provider's config class
|
1. `config`: An instance of the provider's config class
|
||||||
|
@ -162,7 +146,7 @@ async def get_adapter_impl(
|
||||||
return OllamaInferenceAdapter(config)
|
return OllamaInferenceAdapter(config)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Inline Providers
|
#### Inline Providers
|
||||||
|
|
||||||
Inline providers must expose a `get_provider_impl()` function in their module that takes two arguments:
|
Inline providers must expose a `get_provider_impl()` function in their module that takes two arguments:
|
||||||
1. `config`: An instance of the provider's config class
|
1. `config`: An instance of the provider's config class
|
||||||
|
@ -189,7 +173,40 @@ Version: 0.1.0
|
||||||
Location: /path/to/venv/lib/python3.10/site-packages
|
Location: /path/to/venv/lib/python3.10/site-packages
|
||||||
```
|
```
|
||||||
|
|
||||||
## Example using `external_providers_dir`: Custom Ollama Provider
|
## Best Practices
|
||||||
|
|
||||||
|
1. **Package Naming**: Use the prefix `llama-stack-provider-` for your provider packages to make them easily identifiable.
|
||||||
|
|
||||||
|
2. **Version Management**: Keep your provider package versioned and compatible with the Llama Stack version you're using.
|
||||||
|
|
||||||
|
3. **Dependencies**: Only include the minimum required dependencies in your provider package.
|
||||||
|
|
||||||
|
4. **Documentation**: Include clear documentation in your provider package about:
|
||||||
|
- Installation requirements
|
||||||
|
- Configuration options
|
||||||
|
- Usage examples
|
||||||
|
- Any limitations or known issues
|
||||||
|
|
||||||
|
5. **Testing**: Include tests in your provider package to ensure it works correctly with Llama Stack.
|
||||||
|
You can refer to the [integration tests
|
||||||
|
guide](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more
|
||||||
|
information. Execute the test for the Provider type you are developing.
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
If your external provider isn't being loaded:
|
||||||
|
|
||||||
|
1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
|
||||||
|
1. Check that the `external_providers_dir` path is correct and accessible.
|
||||||
|
2. Verify that the YAML files are properly formatted.
|
||||||
|
3. Ensure all required Python packages are installed.
|
||||||
|
4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
|
||||||
|
information using `LLAMA_STACK_LOGGING=all=debug`.
|
||||||
|
5. Verify that the provider package is installed in your Python environment if using `external_providers_dir`.
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
### Example using `external_providers_dir`: Custom Ollama Provider
|
||||||
|
|
||||||
Here's a complete example of creating and using a custom Ollama provider:
|
Here's a complete example of creating and using a custom Ollama provider:
|
||||||
|
|
||||||
|
@ -241,7 +258,7 @@ external_providers_dir: ~/.llama/providers.d/
|
||||||
The provider will now be available in Llama Stack with the type `remote::custom_ollama`.
|
The provider will now be available in Llama Stack with the type `remote::custom_ollama`.
|
||||||
|
|
||||||
|
|
||||||
## Example using `module`: ramalama-stack
|
### Example using `module`: ramalama-stack
|
||||||
|
|
||||||
[ramalama-stack](https://github.com/containers/ramalama-stack) is a recognized external provider that supports installation via module.
|
[ramalama-stack](https://github.com/containers/ramalama-stack) is a recognized external provider that supports installation via module.
|
||||||
|
|
||||||
|
@ -266,35 +283,4 @@ additional_pip_packages:
|
||||||
|
|
||||||
No other steps are required other than `llama stack build` and `llama stack run`. The build process will use `module` to install all of the provider dependencies, retrieve the spec, etc.
|
No other steps are required other than `llama stack build` and `llama stack run`. The build process will use `module` to install all of the provider dependencies, retrieve the spec, etc.
|
||||||
|
|
||||||
The provider will now be available in Llama Stack with the type `remote::ramalama`.
|
The provider will now be available in Llama Stack with the type `remote::ramalama`.
|
||||||
|
|
||||||
## Best Practices
|
|
||||||
|
|
||||||
1. **Package Naming**: Use the prefix `llama-stack-provider-` for your provider packages to make them easily identifiable.
|
|
||||||
|
|
||||||
2. **Version Management**: Keep your provider package versioned and compatible with the Llama Stack version you're using.
|
|
||||||
|
|
||||||
3. **Dependencies**: Only include the minimum required dependencies in your provider package.
|
|
||||||
|
|
||||||
4. **Documentation**: Include clear documentation in your provider package about:
|
|
||||||
- Installation requirements
|
|
||||||
- Configuration options
|
|
||||||
- Usage examples
|
|
||||||
- Any limitations or known issues
|
|
||||||
|
|
||||||
5. **Testing**: Include tests in your provider package to ensure it works correctly with Llama Stack.
|
|
||||||
You can refer to the [integration tests
|
|
||||||
guide](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more
|
|
||||||
information. Execute the test for the Provider type you are developing.
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
If your external provider isn't being loaded:
|
|
||||||
|
|
||||||
1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
|
|
||||||
1. Check that the `external_providers_dir` path is correct and accessible.
|
|
||||||
2. Verify that the YAML files are properly formatted.
|
|
||||||
3. Ensure all required Python packages are installed.
|
|
||||||
4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
|
|
||||||
information using `LLAMA_STACK_LOGGING=all=debug`.
|
|
||||||
5. Verify that the provider package is installed in your Python environment if using `external_providers_dir`.
|
|
10
docs/source/providers/external/external-providers-list.md
vendored
Normal file
10
docs/source/providers/external/external-providers-list.md
vendored
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
# Known External Providers
|
||||||
|
|
||||||
|
Here's a list of known external providers that you can use with Llama Stack:
|
||||||
|
|
||||||
|
| Name | Description | API | Type | Repository |
|
||||||
|
|------|-------------|-----|------|------------|
|
||||||
|
| KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
|
||||||
|
| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
|
||||||
|
| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
|
||||||
|
| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
|
13
docs/source/providers/external/index.md
vendored
Normal file
13
docs/source/providers/external/index.md
vendored
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# External Providers
|
||||||
|
|
||||||
|
Llama Stack supports external providers that live outside of the main codebase. This allows you to:
|
||||||
|
- Create and maintain your own providers independently
|
||||||
|
- Share providers with others without contributing to the main codebase
|
||||||
|
- Keep provider-specific code separate from the core Llama Stack code
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
external-providers-list
|
||||||
|
external-providers-guide
|
||||||
|
```
|
|
@ -15,7 +15,7 @@ Importantly, Llama Stack always strives to provide at least one fully inline pro
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|
||||||
external
|
external/index
|
||||||
openai
|
openai
|
||||||
inference/index
|
inference/index
|
||||||
agents/index
|
agents/index
|
||||||
|
|
|
@ -24,6 +24,10 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin
|
||||||
| `weight_decay` | `<class 'float'>` | No | 0.01 | |
|
| `weight_decay` | `<class 'float'>` | No | 0.01 | |
|
||||||
| `dataloader_num_workers` | `<class 'int'>` | No | 4 | |
|
| `dataloader_num_workers` | `<class 'int'>` | No | 4 | |
|
||||||
| `dataloader_pin_memory` | `<class 'bool'>` | No | True | |
|
| `dataloader_pin_memory` | `<class 'bool'>` | No | True | |
|
||||||
|
| `dpo_beta` | `<class 'float'>` | No | 0.1 | |
|
||||||
|
| `use_reference_model` | `<class 'bool'>` | No | True | |
|
||||||
|
| `dpo_loss_type` | `Literal['sigmoid', 'hinge', 'ipo', 'kto_pair'` | No | sigmoid | |
|
||||||
|
| `dpo_output_dir` | `<class 'str'>` | No | ./checkpoints/dpo | |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
|
|
|
@ -66,7 +66,7 @@
|
||||||
"from pydantic import BaseModel\n",
|
"from pydantic import BaseModel\n",
|
||||||
"from termcolor import cprint\n",
|
"from termcolor import cprint\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from llama_stack.distribution.datatypes import RemoteProviderConfig\n",
|
"from llama_stack.core.datatypes import RemoteProviderConfig\n",
|
||||||
"from llama_stack.apis.safety import Safety\n",
|
"from llama_stack.apis.safety import Safety\n",
|
||||||
"from llama_stack_client import LlamaStackClient\n",
|
"from llama_stack_client import LlamaStackClient\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.distribution.library_client import ( # noqa: F401
|
from llama_stack.core.library_client import ( # noqa: F401
|
||||||
AsyncLlamaStackAsLibraryClient,
|
AsyncLlamaStackAsLibraryClient,
|
||||||
LlamaStackAsLibraryClient,
|
LlamaStackAsLibraryClient,
|
||||||
)
|
)
|
||||||
|
|
|
@ -193,18 +193,10 @@ class DPOLossType(Enum):
|
||||||
class DPOAlignmentConfig(BaseModel):
|
class DPOAlignmentConfig(BaseModel):
|
||||||
"""Configuration for Direct Preference Optimization (DPO) alignment.
|
"""Configuration for Direct Preference Optimization (DPO) alignment.
|
||||||
|
|
||||||
:param reward_scale: Scaling factor for the reward signal
|
|
||||||
:param reward_clip: Maximum absolute value for reward clipping
|
|
||||||
:param epsilon: Small value added for numerical stability
|
|
||||||
:param gamma: Discount factor for future rewards
|
|
||||||
:param beta: Temperature parameter for the DPO loss
|
:param beta: Temperature parameter for the DPO loss
|
||||||
:param loss_type: The type of loss function to use for DPO
|
:param loss_type: The type of loss function to use for DPO
|
||||||
"""
|
"""
|
||||||
|
|
||||||
reward_scale: float
|
|
||||||
reward_clip: float
|
|
||||||
epsilon: float
|
|
||||||
gamma: float
|
|
||||||
beta: float
|
beta: float
|
||||||
loss_type: DPOLossType = DPOLossType.sigmoid
|
loss_type: DPOLossType = DPOLossType.sigmoid
|
||||||
|
|
||||||
|
|
|
@ -323,7 +323,7 @@ def _hf_download(
|
||||||
from huggingface_hub import snapshot_download
|
from huggingface_hub import snapshot_download
|
||||||
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
|
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
|
||||||
|
|
||||||
from llama_stack.distribution.utils.model_utils import model_local_dir
|
from llama_stack.core.utils.model_utils import model_local_dir
|
||||||
|
|
||||||
repo_id = model.huggingface_repo
|
repo_id = model.huggingface_repo
|
||||||
if repo_id is None:
|
if repo_id is None:
|
||||||
|
@ -361,7 +361,7 @@ def _meta_download(
|
||||||
info: "LlamaDownloadInfo",
|
info: "LlamaDownloadInfo",
|
||||||
max_concurrent_downloads: int,
|
max_concurrent_downloads: int,
|
||||||
):
|
):
|
||||||
from llama_stack.distribution.utils.model_utils import model_local_dir
|
from llama_stack.core.utils.model_utils import model_local_dir
|
||||||
|
|
||||||
output_dir = Path(model_local_dir(model.descriptor()))
|
output_dir = Path(model_local_dir(model.descriptor()))
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
@ -403,7 +403,7 @@ class Manifest(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
def _download_from_manifest(manifest_file: str, max_concurrent_downloads: int):
|
def _download_from_manifest(manifest_file: str, max_concurrent_downloads: int):
|
||||||
from llama_stack.distribution.utils.model_utils import model_local_dir
|
from llama_stack.core.utils.model_utils import model_local_dir
|
||||||
|
|
||||||
with open(manifest_file) as f:
|
with open(manifest_file) as f:
|
||||||
d = json.load(f)
|
d = json.load(f)
|
||||||
|
|
|
@ -11,7 +11,7 @@ from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
from llama_stack.cli.table import print_table
|
from llama_stack.cli.table import print_table
|
||||||
from llama_stack.distribution.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
|
from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
from llama_stack.models.llama.sku_list import all_registered_models
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
from llama_stack.distribution.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
|
from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR
|
||||||
from llama_stack.models.llama.sku_list import resolve_model
|
from llama_stack.models.llama.sku_list import resolve_model
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -23,27 +23,27 @@ from termcolor import colored, cprint
|
||||||
|
|
||||||
from llama_stack.cli.stack.utils import ImageType
|
from llama_stack.cli.stack.utils import ImageType
|
||||||
from llama_stack.cli.table import print_table
|
from llama_stack.cli.table import print_table
|
||||||
from llama_stack.distribution.build import (
|
from llama_stack.core.build import (
|
||||||
SERVER_DEPENDENCIES,
|
SERVER_DEPENDENCIES,
|
||||||
build_image,
|
build_image,
|
||||||
get_provider_dependencies,
|
get_provider_dependencies,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
|
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
BuildConfig,
|
BuildConfig,
|
||||||
BuildProvider,
|
BuildProvider,
|
||||||
DistributionSpec,
|
DistributionSpec,
|
||||||
Provider,
|
Provider,
|
||||||
StackRunConfig,
|
StackRunConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.distribution import get_provider_registry
|
from llama_stack.core.distribution import get_provider_registry
|
||||||
from llama_stack.distribution.external import load_external_apis
|
from llama_stack.core.external import load_external_apis
|
||||||
from llama_stack.distribution.resolver import InvalidProviderError
|
from llama_stack.core.resolver import InvalidProviderError
|
||||||
from llama_stack.distribution.stack import replace_env_vars
|
from llama_stack.core.stack import replace_env_vars
|
||||||
from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
|
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
|
||||||
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||||
from llama_stack.distribution.utils.exec import formulate_run_args, run_command
|
from llama_stack.core.utils.exec import formulate_run_args, run_command
|
||||||
from llama_stack.distribution.utils.image_types import LlamaStackImageType
|
from llama_stack.core.utils.image_types import LlamaStackImageType
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
|
|
||||||
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
|
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
|
||||||
|
|
|
@ -27,7 +27,7 @@ class StackBuild(Subcommand):
|
||||||
"--config",
|
"--config",
|
||||||
type=str,
|
type=str,
|
||||||
default=None,
|
default=None,
|
||||||
help="Path to a config file to use for the build. You can find example configs in llama_stack/distributions/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively",
|
help="Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively",
|
||||||
)
|
)
|
||||||
|
|
||||||
self.parser.add_argument(
|
self.parser.add_argument(
|
||||||
|
|
|
@ -26,7 +26,7 @@ class StackListApis(Subcommand):
|
||||||
|
|
||||||
def _run_apis_list_cmd(self, args: argparse.Namespace) -> None:
|
def _run_apis_list_cmd(self, args: argparse.Namespace) -> None:
|
||||||
from llama_stack.cli.table import print_table
|
from llama_stack.cli.table import print_table
|
||||||
from llama_stack.distribution.distribution import stack_apis
|
from llama_stack.core.distribution import stack_apis
|
||||||
|
|
||||||
# eventually, this should query a registry at llama.meta.com/llamastack/distributions
|
# eventually, this should query a registry at llama.meta.com/llamastack/distributions
|
||||||
headers = [
|
headers = [
|
||||||
|
|
|
@ -23,7 +23,7 @@ class StackListProviders(Subcommand):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def providable_apis(self):
|
def providable_apis(self):
|
||||||
from llama_stack.distribution.distribution import providable_apis
|
from llama_stack.core.distribution import providable_apis
|
||||||
|
|
||||||
return [api.value for api in providable_apis()]
|
return [api.value for api in providable_apis()]
|
||||||
|
|
||||||
|
@ -38,7 +38,7 @@ class StackListProviders(Subcommand):
|
||||||
|
|
||||||
def _run_providers_list_cmd(self, args: argparse.Namespace) -> None:
|
def _run_providers_list_cmd(self, args: argparse.Namespace) -> None:
|
||||||
from llama_stack.cli.table import print_table
|
from llama_stack.cli.table import print_table
|
||||||
from llama_stack.distribution.distribution import Api, get_provider_registry
|
from llama_stack.core.distribution import Api, get_provider_registry
|
||||||
|
|
||||||
all_providers = get_provider_registry()
|
all_providers = get_provider_registry()
|
||||||
if args.api:
|
if args.api:
|
||||||
|
|
|
@ -85,8 +85,8 @@ class StackRun(Subcommand):
|
||||||
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
|
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
|
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
||||||
from llama_stack.distribution.utils.exec import formulate_run_args, run_command
|
from llama_stack.core.utils.exec import formulate_run_args, run_command
|
||||||
|
|
||||||
if args.enable_ui:
|
if args.enable_ui:
|
||||||
self._start_ui_development_server(args.port)
|
self._start_ui_development_server(args.port)
|
||||||
|
@ -94,7 +94,7 @@ class StackRun(Subcommand):
|
||||||
|
|
||||||
if args.config:
|
if args.config:
|
||||||
try:
|
try:
|
||||||
from llama_stack.distribution.utils.config_resolution import Mode, resolve_config_or_template
|
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_template
|
||||||
|
|
||||||
config_file = resolve_config_or_template(args.config, Mode.RUN)
|
config_file = resolve_config_or_template(args.config, Mode.RUN)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
|
@ -127,7 +127,7 @@ class StackRun(Subcommand):
|
||||||
# using the current environment packages.
|
# using the current environment packages.
|
||||||
if not image_type and not image_name:
|
if not image_type and not image_name:
|
||||||
logger.info("No image type or image name provided. Assuming environment packages.")
|
logger.info("No image type or image name provided. Assuming environment packages.")
|
||||||
from llama_stack.distribution.server.server import main as server_main
|
from llama_stack.core.server.server import main as server_main
|
||||||
|
|
||||||
# Build the server args from the current args passed to the CLI
|
# Build the server args from the current args passed to the CLI
|
||||||
server_args = argparse.Namespace()
|
server_args = argparse.Namespace()
|
||||||
|
|
|
@ -107,7 +107,7 @@ def verify_files(model_dir: Path, checksums: dict[str, str], console: Console) -
|
||||||
|
|
||||||
|
|
||||||
def run_verify_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser):
|
def run_verify_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser):
|
||||||
from llama_stack.distribution.utils.model_utils import model_local_dir
|
from llama_stack.core.utils.model_utils import model_local_dir
|
||||||
|
|
||||||
console = Console()
|
console = Console()
|
||||||
model_dir = Path(model_local_dir(args.model_id))
|
model_dir = Path(model_local_dir(args.model_id))
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import User
|
from llama_stack.core.datatypes import User
|
||||||
|
|
||||||
from .conditions import (
|
from .conditions import (
|
||||||
Condition,
|
Condition,
|
|
@ -12,11 +12,11 @@ from pathlib import Path
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import BuildConfig
|
from llama_stack.core.datatypes import BuildConfig
|
||||||
from llama_stack.distribution.distribution import get_provider_registry
|
from llama_stack.core.distribution import get_provider_registry
|
||||||
from llama_stack.distribution.external import load_external_apis
|
from llama_stack.core.external import load_external_apis
|
||||||
from llama_stack.distribution.utils.exec import run_command
|
from llama_stack.core.utils.exec import run_command
|
||||||
from llama_stack.distribution.utils.image_types import LlamaStackImageType
|
from llama_stack.core.utils.image_types import LlamaStackImageType
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
from llama_stack.templates.template import DistributionTemplate
|
from llama_stack.templates.template import DistributionTemplate
|
||||||
|
|
||||||
|
@ -122,7 +122,7 @@ def build_image(
|
||||||
normal_deps.extend(api_spec.pip_packages)
|
normal_deps.extend(api_spec.pip_packages)
|
||||||
|
|
||||||
if build_config.image_type == LlamaStackImageType.CONTAINER.value:
|
if build_config.image_type == LlamaStackImageType.CONTAINER.value:
|
||||||
script = str(importlib.resources.files("llama_stack") / "distribution/build_container.sh")
|
script = str(importlib.resources.files("llama_stack") / "core/build_container.sh")
|
||||||
args = [
|
args = [
|
||||||
script,
|
script,
|
||||||
"--template-or-config",
|
"--template-or-config",
|
||||||
|
@ -139,7 +139,7 @@ def build_image(
|
||||||
if run_config is not None:
|
if run_config is not None:
|
||||||
args.extend(["--run-config", run_config])
|
args.extend(["--run-config", run_config])
|
||||||
elif build_config.image_type == LlamaStackImageType.CONDA.value:
|
elif build_config.image_type == LlamaStackImageType.CONDA.value:
|
||||||
script = str(importlib.resources.files("llama_stack") / "distribution/build_conda_env.sh")
|
script = str(importlib.resources.files("llama_stack") / "core/build_conda_env.sh")
|
||||||
args = [
|
args = [
|
||||||
script,
|
script,
|
||||||
"--env-name",
|
"--env-name",
|
||||||
|
@ -150,7 +150,7 @@ def build_image(
|
||||||
" ".join(normal_deps),
|
" ".join(normal_deps),
|
||||||
]
|
]
|
||||||
elif build_config.image_type == LlamaStackImageType.VENV.value:
|
elif build_config.image_type == LlamaStackImageType.VENV.value:
|
||||||
script = str(importlib.resources.files("llama_stack") / "distribution/build_venv.sh")
|
script = str(importlib.resources.files("llama_stack") / "core/build_venv.sh")
|
||||||
args = [
|
args = [
|
||||||
script,
|
script,
|
||||||
"--env-name",
|
"--env-name",
|
|
@ -327,12 +327,12 @@ EOF
|
||||||
# If a run config is provided, we use the --config flag
|
# If a run config is provided, we use the --config flag
|
||||||
if [[ -n "$run_config" ]]; then
|
if [[ -n "$run_config" ]]; then
|
||||||
add_to_container << EOF
|
add_to_container << EOF
|
||||||
ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "$RUN_CONFIG_PATH"]
|
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "--config", "$RUN_CONFIG_PATH"]
|
||||||
EOF
|
EOF
|
||||||
# If a template is provided (not a yaml file), we use the --template flag
|
# If a template is provided (not a yaml file), we use the --template flag
|
||||||
elif [[ "$template_or_config" != *.yaml ]]; then
|
elif [[ "$template_or_config" != *.yaml ]]; then
|
||||||
add_to_container << EOF
|
add_to_container << EOF
|
||||||
ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--template", "$template_or_config"]
|
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "--template", "$template_or_config"]
|
||||||
EOF
|
EOF
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -7,20 +7,20 @@ import logging
|
||||||
import textwrap
|
import textwrap
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
LLAMA_STACK_RUN_CONFIG_VERSION,
|
LLAMA_STACK_RUN_CONFIG_VERSION,
|
||||||
DistributionSpec,
|
DistributionSpec,
|
||||||
Provider,
|
Provider,
|
||||||
StackRunConfig,
|
StackRunConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.distribution import (
|
from llama_stack.core.distribution import (
|
||||||
builtin_automatically_routed_apis,
|
builtin_automatically_routed_apis,
|
||||||
get_provider_registry,
|
get_provider_registry,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.stack import cast_image_name_to_string, replace_env_vars
|
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
|
||||||
from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
|
from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
|
||||||
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||||
from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
|
from llama_stack.core.utils.prompt_for_config import prompt_for_config
|
||||||
from llama_stack.providers.datatypes import Api, ProviderSpec
|
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
|
@ -24,7 +24,7 @@ from llama_stack.apis.shields import Shield, ShieldInput
|
||||||
from llama_stack.apis.tools import Tool, ToolGroup, ToolGroupInput, ToolRuntime
|
from llama_stack.apis.tools import Tool, ToolGroup, ToolGroupInput, ToolRuntime
|
||||||
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
|
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
|
||||||
from llama_stack.apis.vector_io import VectorIO
|
from llama_stack.apis.vector_io import VectorIO
|
||||||
from llama_stack.distribution.access_control.datatypes import AccessRule
|
from llama_stack.core.access_control.datatypes import AccessRule
|
||||||
from llama_stack.providers.datatypes import Api, ProviderSpec
|
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||||
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
||||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
|
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
|
|
@ -12,8 +12,8 @@ from typing import Any
|
||||||
import yaml
|
import yaml
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import BuildConfig, DistributionSpec
|
from llama_stack.core.datatypes import BuildConfig, DistributionSpec
|
||||||
from llama_stack.distribution.external import load_external_apis
|
from llama_stack.core.external import load_external_apis
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import (
|
from llama_stack.providers.datatypes import (
|
||||||
AdapterSpec,
|
AdapterSpec,
|
|
@ -8,7 +8,7 @@
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from llama_stack.apis.datatypes import Api, ExternalApiSpec
|
from llama_stack.apis.datatypes import Api, ExternalApiSpec
|
||||||
from llama_stack.distribution.datatypes import BuildConfig, StackRunConfig
|
from llama_stack.core.datatypes import BuildConfig, StackRunConfig
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="core")
|
logger = get_logger(name=__name__, category="core")
|
|
@ -15,9 +15,9 @@ from llama_stack.apis.inspect import (
|
||||||
RouteInfo,
|
RouteInfo,
|
||||||
VersionInfo,
|
VersionInfo,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.datatypes import StackRunConfig
|
from llama_stack.core.datatypes import StackRunConfig
|
||||||
from llama_stack.distribution.external import load_external_apis
|
from llama_stack.core.external import load_external_apis
|
||||||
from llama_stack.distribution.server.routes import get_all_api_routes
|
from llama_stack.core.server.routes import get_all_api_routes
|
||||||
from llama_stack.providers.datatypes import HealthStatus
|
from llama_stack.providers.datatypes import HealthStatus
|
||||||
|
|
||||||
|
|
|
@ -31,23 +31,23 @@ from pydantic import BaseModel, TypeAdapter
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
from llama_stack.distribution.build import print_pip_install_help
|
from llama_stack.core.build import print_pip_install_help
|
||||||
from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
|
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
||||||
from llama_stack.distribution.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec
|
from llama_stack.core.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec
|
||||||
from llama_stack.distribution.request_headers import (
|
from llama_stack.core.request_headers import (
|
||||||
PROVIDER_DATA_VAR,
|
PROVIDER_DATA_VAR,
|
||||||
request_provider_data_context,
|
request_provider_data_context,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.resolver import ProviderRegistry
|
from llama_stack.core.resolver import ProviderRegistry
|
||||||
from llama_stack.distribution.server.routes import RouteImpls, find_matching_route, initialize_route_impls
|
from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
|
||||||
from llama_stack.distribution.stack import (
|
from llama_stack.core.stack import (
|
||||||
construct_stack,
|
construct_stack,
|
||||||
get_stack_run_config_from_template,
|
get_stack_run_config_from_template,
|
||||||
replace_env_vars,
|
replace_env_vars,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.utils.config import redact_sensitive_fields
|
from llama_stack.core.utils.config import redact_sensitive_fields
|
||||||
from llama_stack.distribution.utils.context import preserve_contexts_async_generator
|
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
||||||
from llama_stack.distribution.utils.exec import in_notebook
|
from llama_stack.core.utils.exec import in_notebook
|
||||||
from llama_stack.providers.utils.telemetry.tracing import (
|
from llama_stack.providers.utils.telemetry.tracing import (
|
||||||
CURRENT_TRACE_CONTEXT,
|
CURRENT_TRACE_CONTEXT,
|
||||||
end_trace,
|
end_trace,
|
|
@ -10,7 +10,7 @@ import logging
|
||||||
from contextlib import AbstractContextManager
|
from contextlib import AbstractContextManager
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import User
|
from llama_stack.core.datatypes import User
|
||||||
|
|
||||||
from .utils.dynamic import instantiate_class_type
|
from .utils.dynamic import instantiate_class_type
|
||||||
|
|
|
@ -27,18 +27,18 @@ from llama_stack.apis.telemetry import Telemetry
|
||||||
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
from llama_stack.apis.tools import ToolGroups, ToolRuntime
|
||||||
from llama_stack.apis.vector_dbs import VectorDBs
|
from llama_stack.apis.vector_dbs import VectorDBs
|
||||||
from llama_stack.apis.vector_io import VectorIO
|
from llama_stack.apis.vector_io import VectorIO
|
||||||
from llama_stack.distribution.client import get_client_impl
|
from llama_stack.core.client import get_client_impl
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
AccessRule,
|
AccessRule,
|
||||||
AutoRoutedProviderSpec,
|
AutoRoutedProviderSpec,
|
||||||
Provider,
|
Provider,
|
||||||
RoutingTableProviderSpec,
|
RoutingTableProviderSpec,
|
||||||
StackRunConfig,
|
StackRunConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.distribution import builtin_automatically_routed_apis
|
from llama_stack.core.distribution import builtin_automatically_routed_apis
|
||||||
from llama_stack.distribution.external import load_external_apis
|
from llama_stack.core.external import load_external_apis
|
||||||
from llama_stack.distribution.store import DistributionRegistry
|
from llama_stack.core.store import DistributionRegistry
|
||||||
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import (
|
from llama_stack.providers.datatypes import (
|
||||||
Api,
|
Api,
|
||||||
|
@ -183,7 +183,7 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str,
|
||||||
spec=RoutingTableProviderSpec(
|
spec=RoutingTableProviderSpec(
|
||||||
api=info.routing_table_api,
|
api=info.routing_table_api,
|
||||||
router_api=info.router_api,
|
router_api=info.router_api,
|
||||||
module="llama_stack.distribution.routers",
|
module="llama_stack.core.routers",
|
||||||
api_dependencies=[],
|
api_dependencies=[],
|
||||||
deps__=[f"inner-{info.router_api.value}"],
|
deps__=[f"inner-{info.router_api.value}"],
|
||||||
),
|
),
|
||||||
|
@ -197,7 +197,7 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str,
|
||||||
config={},
|
config={},
|
||||||
spec=AutoRoutedProviderSpec(
|
spec=AutoRoutedProviderSpec(
|
||||||
api=info.router_api,
|
api=info.router_api,
|
||||||
module="llama_stack.distribution.routers",
|
module="llama_stack.core.routers",
|
||||||
routing_table_api=info.routing_table_api,
|
routing_table_api=info.routing_table_api,
|
||||||
api_dependencies=[info.routing_table_api],
|
api_dependencies=[info.routing_table_api],
|
||||||
# Add telemetry as an optional dependency to all auto-routed providers
|
# Add telemetry as an optional dependency to all auto-routed providers
|
|
@ -6,9 +6,9 @@
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import AccessRule, RoutedProtocol
|
from llama_stack.core.datatypes import AccessRule, RoutedProtocol
|
||||||
from llama_stack.distribution.stack import StackRunConfig
|
from llama_stack.core.stack import StackRunConfig
|
||||||
from llama_stack.distribution.store import DistributionRegistry
|
from llama_stack.core.store import DistributionRegistry
|
||||||
from llama_stack.providers.datatypes import Api, RoutingTable
|
from llama_stack.providers.datatypes import Api, RoutingTable
|
||||||
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
|
from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
BenchmarkWithOwner,
|
BenchmarkWithOwner,
|
||||||
)
|
)
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
|
@ -10,16 +10,16 @@ from llama_stack.apis.common.errors import ModelNotFoundError
|
||||||
from llama_stack.apis.models import Model
|
from llama_stack.apis.models import Model
|
||||||
from llama_stack.apis.resource import ResourceType
|
from llama_stack.apis.resource import ResourceType
|
||||||
from llama_stack.apis.scoring_functions import ScoringFn
|
from llama_stack.apis.scoring_functions import ScoringFn
|
||||||
from llama_stack.distribution.access_control.access_control import AccessDeniedError, is_action_allowed
|
from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
|
||||||
from llama_stack.distribution.access_control.datatypes import Action
|
from llama_stack.core.access_control.datatypes import Action
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
AccessRule,
|
AccessRule,
|
||||||
RoutableObject,
|
RoutableObject,
|
||||||
RoutableObjectWithProvider,
|
RoutableObjectWithProvider,
|
||||||
RoutedProtocol,
|
RoutedProtocol,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.request_headers import get_authenticated_user
|
from llama_stack.core.request_headers import get_authenticated_user
|
||||||
from llama_stack.distribution.store import DistributionRegistry
|
from llama_stack.core.store import DistributionRegistry
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import Api, RoutingTable
|
from llama_stack.providers.datatypes import Api, RoutingTable
|
||||||
|
|
|
@ -19,7 +19,7 @@ from llama_stack.apis.datasets import (
|
||||||
URIDataSource,
|
URIDataSource,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.resource import ResourceType
|
from llama_stack.apis.resource import ResourceType
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
DatasetWithOwner,
|
DatasetWithOwner,
|
||||||
)
|
)
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
|
@ -9,7 +9,7 @@ from typing import Any
|
||||||
|
|
||||||
from llama_stack.apis.common.errors import ModelNotFoundError
|
from llama_stack.apis.common.errors import ModelNotFoundError
|
||||||
from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
|
from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
ModelWithOwner,
|
ModelWithOwner,
|
||||||
RegistryEntrySource,
|
RegistryEntrySource,
|
||||||
)
|
)
|
|
@ -12,7 +12,7 @@ from llama_stack.apis.scoring_functions import (
|
||||||
ScoringFnParams,
|
ScoringFnParams,
|
||||||
ScoringFunctions,
|
ScoringFunctions,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
ScoringFnWithOwner,
|
ScoringFnWithOwner,
|
||||||
)
|
)
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
|
@ -8,7 +8,7 @@ from typing import Any
|
||||||
|
|
||||||
from llama_stack.apis.resource import ResourceType
|
from llama_stack.apis.resource import ResourceType
|
||||||
from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
|
from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
ShieldWithOwner,
|
ShieldWithOwner,
|
||||||
)
|
)
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
|
@ -8,7 +8,7 @@ from typing import Any
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import URL
|
from llama_stack.apis.common.content_types import URL
|
||||||
from llama_stack.apis.tools import ListToolGroupsResponse, ListToolsResponse, Tool, ToolGroup, ToolGroups
|
from llama_stack.apis.tools import ListToolGroupsResponse, ListToolsResponse, Tool, ToolGroup, ToolGroups
|
||||||
from llama_stack.distribution.datatypes import ToolGroupWithOwner
|
from llama_stack.core.datatypes import ToolGroupWithOwner
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
|
||||||
from .common import CommonRoutingTableImpl
|
from .common import CommonRoutingTableImpl
|
|
@ -23,7 +23,7 @@ from llama_stack.apis.vector_io.vector_io import (
|
||||||
VectorStoreObject,
|
VectorStoreObject,
|
||||||
VectorStoreSearchResponsePage,
|
VectorStoreSearchResponsePage,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
VectorDBWithOwner,
|
VectorDBWithOwner,
|
||||||
)
|
)
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
@ -84,8 +84,6 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
|
||||||
|
|
||||||
async def unregister_vector_db(self, vector_db_id: str) -> None:
|
async def unregister_vector_db(self, vector_db_id: str) -> None:
|
||||||
existing_vector_db = await self.get_vector_db(vector_db_id)
|
existing_vector_db = await self.get_vector_db(vector_db_id)
|
||||||
if existing_vector_db is None:
|
|
||||||
raise VectorStoreNotFoundError(vector_db_id)
|
|
||||||
await self.unregister_object(existing_vector_db)
|
await self.unregister_object(existing_vector_db)
|
||||||
|
|
||||||
async def openai_retrieve_vector_store(
|
async def openai_retrieve_vector_store(
|
|
@ -9,10 +9,10 @@ import json
|
||||||
import httpx
|
import httpx
|
||||||
from aiohttp import hdrs
|
from aiohttp import hdrs
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import AuthenticationConfig, User
|
from llama_stack.core.datatypes import AuthenticationConfig, User
|
||||||
from llama_stack.distribution.request_headers import user_from_scope
|
from llama_stack.core.request_headers import user_from_scope
|
||||||
from llama_stack.distribution.server.auth_providers import create_auth_provider
|
from llama_stack.core.server.auth_providers import create_auth_provider
|
||||||
from llama_stack.distribution.server.routes import find_matching_route, initialize_route_impls
|
from llama_stack.core.server.routes import find_matching_route, initialize_route_impls
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="auth")
|
logger = get_logger(name=__name__, category="auth")
|
|
@ -14,7 +14,7 @@ import httpx
|
||||||
from jose import jwt
|
from jose import jwt
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
AuthenticationConfig,
|
AuthenticationConfig,
|
||||||
CustomAuthConfig,
|
CustomAuthConfig,
|
||||||
GitHubTokenAuthConfig,
|
GitHubTokenAuthConfig,
|
|
@ -15,7 +15,7 @@ from starlette.routing import Route
|
||||||
from llama_stack.apis.datatypes import Api, ExternalApiSpec
|
from llama_stack.apis.datatypes import Api, ExternalApiSpec
|
||||||
from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
|
from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup
|
||||||
from llama_stack.apis.version import LLAMA_STACK_API_VERSION
|
from llama_stack.apis.version import LLAMA_STACK_API_VERSION
|
||||||
from llama_stack.distribution.resolver import api_protocol_map
|
from llama_stack.core.resolver import api_protocol_map
|
||||||
from llama_stack.schema_utils import WebMethod
|
from llama_stack.schema_utils import WebMethod
|
||||||
|
|
||||||
EndpointFunc = Callable[..., Any]
|
EndpointFunc = Callable[..., Any]
|
|
@ -33,35 +33,35 @@ from pydantic import BaseModel, ValidationError
|
||||||
|
|
||||||
from llama_stack.apis.common.responses import PaginatedResponse
|
from llama_stack.apis.common.responses import PaginatedResponse
|
||||||
from llama_stack.cli.utils import add_config_template_args, get_config_from_args
|
from llama_stack.cli.utils import add_config_template_args, get_config_from_args
|
||||||
from llama_stack.distribution.access_control.access_control import AccessDeniedError
|
from llama_stack.core.access_control.access_control import AccessDeniedError
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.core.datatypes import (
|
||||||
AuthenticationRequiredError,
|
AuthenticationRequiredError,
|
||||||
LoggingConfig,
|
LoggingConfig,
|
||||||
StackRunConfig,
|
StackRunConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.distribution import builtin_automatically_routed_apis
|
from llama_stack.core.distribution import builtin_automatically_routed_apis
|
||||||
from llama_stack.distribution.external import ExternalApiSpec, load_external_apis
|
from llama_stack.core.external import ExternalApiSpec, load_external_apis
|
||||||
from llama_stack.distribution.request_headers import (
|
from llama_stack.core.request_headers import (
|
||||||
PROVIDER_DATA_VAR,
|
PROVIDER_DATA_VAR,
|
||||||
request_provider_data_context,
|
request_provider_data_context,
|
||||||
user_from_scope,
|
user_from_scope,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.resolver import InvalidProviderError
|
from llama_stack.core.resolver import InvalidProviderError
|
||||||
from llama_stack.distribution.server.routes import (
|
from llama_stack.core.server.routes import (
|
||||||
find_matching_route,
|
find_matching_route,
|
||||||
get_all_api_routes,
|
get_all_api_routes,
|
||||||
initialize_route_impls,
|
initialize_route_impls,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.stack import (
|
from llama_stack.core.stack import (
|
||||||
cast_image_name_to_string,
|
cast_image_name_to_string,
|
||||||
construct_stack,
|
construct_stack,
|
||||||
replace_env_vars,
|
replace_env_vars,
|
||||||
shutdown_stack,
|
shutdown_stack,
|
||||||
validate_env_pair,
|
validate_env_pair,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.utils.config import redact_sensitive_fields
|
from llama_stack.core.utils.config import redact_sensitive_fields
|
||||||
from llama_stack.distribution.utils.config_resolution import Mode, resolve_config_or_template
|
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_template
|
||||||
from llama_stack.distribution.utils.context import preserve_contexts_async_generator
|
from llama_stack.core.utils.context import preserve_contexts_async_generator
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig
|
from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig
|
|
@ -34,14 +34,14 @@ from llama_stack.apis.telemetry import Telemetry
|
||||||
from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
|
from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
|
||||||
from llama_stack.apis.vector_dbs import VectorDBs
|
from llama_stack.apis.vector_dbs import VectorDBs
|
||||||
from llama_stack.apis.vector_io import VectorIO
|
from llama_stack.apis.vector_io import VectorIO
|
||||||
from llama_stack.distribution.datatypes import Provider, StackRunConfig
|
from llama_stack.core.datatypes import Provider, StackRunConfig
|
||||||
from llama_stack.distribution.distribution import get_provider_registry
|
from llama_stack.core.distribution import get_provider_registry
|
||||||
from llama_stack.distribution.inspect import DistributionInspectConfig, DistributionInspectImpl
|
from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
|
||||||
from llama_stack.distribution.providers import ProviderImpl, ProviderImplConfig
|
from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
|
||||||
from llama_stack.distribution.resolver import ProviderRegistry, resolve_impls
|
from llama_stack.core.resolver import ProviderRegistry, resolve_impls
|
||||||
from llama_stack.distribution.routing_tables.common import CommonRoutingTableImpl
|
from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
|
||||||
from llama_stack.distribution.store.registry import create_dist_registry
|
from llama_stack.core.store.registry import create_dist_registry
|
||||||
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
|
|
|
@ -122,7 +122,7 @@ if [[ "$env_type" == "venv" || "$env_type" == "conda" ]]; then
|
||||||
yaml_config_arg=""
|
yaml_config_arg=""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
$PYTHON_BINARY -m llama_stack.distribution.server.server \
|
$PYTHON_BINARY -m llama_stack.core.server.server \
|
||||||
$yaml_config_arg \
|
$yaml_config_arg \
|
||||||
--port "$port" \
|
--port "$port" \
|
||||||
$env_vars \
|
$env_vars \
|
|
@ -10,8 +10,8 @@ from typing import Protocol
|
||||||
|
|
||||||
import pydantic
|
import pydantic
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import RoutableObjectWithProvider
|
from llama_stack.core.datatypes import RoutableObjectWithProvider
|
||||||
from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
|
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
||||||
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
|
@ -36,7 +36,7 @@ llama-stack-client benchmarks register \
|
||||||
3. Start Streamlit UI
|
3. Start Streamlit UI
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py
|
uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
|
||||||
```
|
```
|
||||||
|
|
||||||
## Environment Variables
|
## Environment Variables
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue