diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 9c5c5486f..8097d5f7c 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -2,4 +2,4 @@ # These owners will be the default owners for everything in # the repo. Unless a later match takes precedence, -* @ashwinb @yanxi0830 @hardikjshah @dltn @raghotham @dineshyv @vladimirivic @sixianyi0721 @ehhuang @terrytangyuan +* @ashwinb @yanxi0830 @hardikjshah @dltn @raghotham @dineshyv @vladimirivic @sixianyi0721 @ehhuang @terrytangyuan @SLR722 diff --git a/.github/TRIAGERS.md b/.github/TRIAGERS.md new file mode 100644 index 000000000..d4ef6d1ac --- /dev/null +++ b/.github/TRIAGERS.md @@ -0,0 +1,2 @@ +# This file documents Triage members in the Llama Stack community +@franciscojavierarceo @leseb diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..d68af5615 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,23 @@ +# GitHub Dependabot configuration +version: 2 +updates: + # Enable version updates for GitHub Actions + - package-ecosystem: "github-actions" + directory: "/" # Will use the default workflow location of `.github/workflows` + schedule: + interval: "weekly" + day: "saturday" + commit-message: + prefix: chore(github-deps) + - package-ecosystem: "uv" + directory: "/" + schedule: + interval: "weekly" + day: "saturday" + # ignore all non-security updates: https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#open-pull-requests-limit + open-pull-requests-limit: 0 + labels: + - type/dependencies + - python + commit-message: + prefix: chore(python-deps) diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml new file mode 100644 index 000000000..5b63e231c --- /dev/null +++ b/.github/workflows/changelog.yml @@ -0,0 +1,29 @@ +name: Update Changelog + +on: + release: + types: [published, unpublished, created, edited, deleted, released] + +permissions: + contents: read + +jobs: + generate_changelog: + name: Generate changelog + permissions: + contents: write # for peter-evans/create-pull-request to create branch + pull-requests: write # for peter-evans/create-pull-request to create a PR + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + ref: main + fetch-depth: 0 + - run: | + python ./scripts/gen-changelog.py + - uses: peter-evans/create-pull-request@v7 + with: + title: 'docs: update CHANGELOG.md for ${{ github.ref_name }}' + commit-message: 'docs: update CHANGELOG.md for ${{ github.ref_name }}' + branch: create-pull-request/changelog + signoff: true diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows/gha_workflow_llama_stack_tests.yml index 89e5edf71..b10a40974 100644 --- a/.github/workflows/gha_workflow_llama_stack_tests.yml +++ b/.github/workflows/gha_workflow_llama_stack_tests.yml @@ -310,7 +310,7 @@ jobs: - name: "PR - Upload Test Summary" id: pr_test_summary_upload if: github.event_name == 'pull_request_target' - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: test-summary path: test-summary.md @@ -320,7 +320,7 @@ jobs: - name: "PR - Update comment" id: pr_update_comment if: github.event_name == 'pull_request_target' - uses: thollander/actions-comment-pull-request@v2 + uses: thollander/actions-comment-pull-request@v3 with: filePath: test-summary.md diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml new file mode 100644 index 000000000..475b26d0a --- /dev/null +++ b/.github/workflows/integration-tests.yml @@ -0,0 +1,97 @@ +name: Integration Tests + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - 'distributions/**' + - 'llama_stack/**' + - 'tests/integration/**' + - 'uv.lock' + - 'pyproject.toml' + - 'requirements.txt' + - '.github/workflows/integration-tests.yml' # This workflow + +jobs: + test-matrix: + runs-on: ubuntu-latest + strategy: + matrix: + # Listing tests manually since some of them currently fail + # TODO: generate matrix list from tests/integration when fixed + test-type: [inference, datasets, inspect, scoring, post_training, providers] + fail-fast: false # we want to run all tests regardless of failure + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + python-version: "3.10" + + - name: Install Ollama + run: | + curl -fsSL https://ollama.com/install.sh | sh + + - name: Pull Ollama image + run: | + ollama pull llama3.2:3b-instruct-fp16 + + - name: Start Ollama in background + run: | + nohup ollama run llama3.2:3b-instruct-fp16 > ollama.log 2>&1 & + + - name: Set Up Environment and Install Dependencies + run: | + uv sync --extra dev --extra test + uv pip install ollama faiss-cpu + # always test against the latest version of the client + uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main + uv pip install -e . + llama stack build --template ollama --image-type venv + + - name: Wait for Ollama to start + run: | + echo "Waiting for Ollama..." + for i in {1..30}; do + if curl -s http://localhost:11434 | grep -q "Ollama is running"; then + echo "Ollama is running!" + exit 0 + fi + sleep 1 + done + echo "Ollama failed to start" + ollama ps + ollama.log + exit 1 + + - name: Start Llama Stack server in background + env: + INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" + run: | + source .venv/bin/activate + nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv > server.log 2>&1 & + + - name: Wait for Llama Stack server to be ready + run: | + echo "Waiting for Llama Stack server..." + for i in {1..30}; do + if curl -s http://localhost:8321/v1/health | grep -q "OK"; then + echo "Llama Stack server is up!" + exit 0 + fi + sleep 1 + done + echo "Llama Stack server failed to start" + cat server.log + exit 1 + + - name: Run Integration Tests + env: + INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" + run: | + uv run pytest -v tests/integration/${{ matrix.test-type }} --stack-config=ollama --text-model="meta-llama/Llama-3.2-3B-Instruct" --embedding-model=all-MiniLM-L6-v2 diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml new file mode 100644 index 000000000..e6871bf99 --- /dev/null +++ b/.github/workflows/providers-build.yml @@ -0,0 +1,79 @@ +name: Test Llama Stack Build + +on: + push: + branches: + - main + paths: + - 'llama_stack/cli/stack/build.py' + - 'llama_stack/cli/stack/_build.py' + - 'llama_stack/distribution/build.*' + - 'llama_stack/distribution/*.sh' + - '.github/workflows/providers-build.yml' + pull_request: + paths: + - 'llama_stack/cli/stack/build.py' + - 'llama_stack/cli/stack/_build.py' + - 'llama_stack/distribution/build.*' + - 'llama_stack/distribution/*.sh' + - '.github/workflows/providers-build.yml' + +jobs: + generate-matrix: + runs-on: ubuntu-latest + outputs: + templates: ${{ steps.set-matrix.outputs.templates }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Generate Template List + id: set-matrix + run: | + templates=$(ls llama_stack/templates/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]') + echo "templates=$templates" >> "$GITHUB_OUTPUT" + + build: + needs: generate-matrix + runs-on: ubuntu-latest + strategy: + matrix: + template: ${{ fromJson(needs.generate-matrix.outputs.templates) }} + image-type: [venv, container] + fail-fast: false # We want to run all jobs even if some fail + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + python-version: "3.10" + + - name: Install LlamaStack + run: | + uv venv + source .venv/bin/activate + uv pip install -e . + + - name: Print build dependencies + run: | + uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only + + - name: Run Llama Stack Build + run: | + # USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead + # LLAMA_STACK_DIR is set to the current directory so we are building from the source + USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test + + - name: Print dependencies in the image + if: matrix.image-type == 'venv' + run: | + source test/bin/activate + uv pip list diff --git a/.github/workflows/stale_bot.yml b/.github/workflows/stale_bot.yml new file mode 100644 index 000000000..2039fcbb4 --- /dev/null +++ b/.github/workflows/stale_bot.yml @@ -0,0 +1,45 @@ +name: Close stale issues and PRs + +on: + schedule: + - cron: '0 0 * * *' # every day at midnight + +env: + LC_ALL: en_US.UTF-8 + +defaults: + run: + shell: bash + +permissions: + contents: read + +jobs: + stale: + permissions: + issues: write + pull-requests: write + runs-on: ubuntu-latest + steps: + - name: Stale Action + uses: actions/stale@v9 + with: + stale-issue-label: 'stale' + stale-issue-message: > + This issue has been automatically marked as stale because it has not had activity within 60 days. + It will be automatically closed if no further activity occurs within 30 days. + close-issue-message: > + This issue has been automatically closed due to inactivity. + Please feel free to reopen if you feel it is still relevant! + days-before-issue-stale: 60 + days-before-issue-close: 30 + stale-pr-label: 'stale' + stale-pr-message: > + This pull request has been automatically marked as stale because it has not had activity within 60 days. + It will be automatically closed if no further activity occurs within 30 days. + close-pr-message: > + This pull request has been automatically closed due to inactivity. + Please feel free to reopen if you intend to continue working on it! + days-before-pr-stale: 60 + days-before-pr-close: 30 + operations-per-run: 300 diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml new file mode 100644 index 000000000..6d6e91f22 --- /dev/null +++ b/.github/workflows/unit-tests.yml @@ -0,0 +1,55 @@ +name: Unit Tests + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - 'distributions/**' + - 'llama_stack/**' + - 'tests/unit/**' + - 'uv.lock' + - 'pyproject.toml' + - 'requirements.txt' + - '.github/workflows/unit-tests.yml' # This workflow + workflow_dispatch: + +jobs: + unit-tests: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python: + - "3.10" + - "3.11" + - "3.12" + - "3.13" + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + + - uses: astral-sh/setup-uv@v5 + with: + python-version: ${{ matrix.python }} + enable-cache: false + + - name: Run unit tests + run: | + PYTHON_VERSION=${{ matrix.python }} ./scripts/unit-tests.sh --cov=llama_stack --junitxml=pytest-report-${{ matrix.python }}.xml --cov-report=html:htmlcov-${{ matrix.python }} + + - name: Upload test results + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-results-${{ matrix.python }} + path: | + .pytest_cache/ + pytest-report-${{ matrix.python }}.xml + htmlcov-${{ matrix.python }}/ + retention-days: 7 diff --git a/.github/workflows/update-readthedocs.yml b/.github/workflows/update-readthedocs.yml index 23bafa1e5..e8f14dbba 100644 --- a/.github/workflows/update-readthedocs.yml +++ b/.github/workflows/update-readthedocs.yml @@ -12,12 +12,14 @@ on: - main paths: - 'docs/**' + - 'pyproject.toml' - '.github/workflows/update-readthedocs.yml' pull_request: branches: - main paths: - 'docs/**' + - 'pyproject.toml' - '.github/workflows/update-readthedocs.yml' jobs: diff --git a/.gitignore b/.gitignore index f54d1563d..0ef25cdf1 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,6 @@ _build docs/src pyrightconfig.json venv/ +pytest-report.xml +.coverage +.python-version diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 611875287..000000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "llama_stack/providers/impls/ios/inference/executorch"] - path = llama_stack/providers/inline/ios/inference/executorch - url = https://github.com/pytorch/executorch diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 70af72a62..e83e64672 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,15 +8,14 @@ repos: rev: v5.0.0 # Latest stable version hooks: - id: check-merge-conflict + args: ['--assume-in-merge'] + - id: trailing-whitespace + exclude: '\.py$' # Exclude Python files as Ruff already handles them - id: check-added-large-files args: ['--maxkb=1000'] - id: end-of-file-fixer exclude: '^(.*\.svg)$' -# Temporarily disabling this -# - id: no-commit-to-branch -# args: ['--branch=main'] - - repo: https://github.com/Lucas-C/pre-commit-hooks rev: v1.5.4 hooks: @@ -42,8 +41,9 @@ repos: - black==24.3.0 - repo: https://github.com/astral-sh/uv-pre-commit - rev: 0.5.26 + rev: 0.6.3 hooks: + - id: uv-lock - id: uv-export args: [ "--frozen", @@ -51,8 +51,6 @@ repos: "--no-emit-project", "--output-file=requirements.txt" ] - files: ^pyproject\.toml$ - - id: uv-sync - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.15.0 @@ -67,12 +65,6 @@ repos: - pydantic pass_filenames: false -# - repo: https://github.com/jsh9/pydoclint -# rev: d88180a8632bb1602a4d81344085cf320f288c5a -# hooks: -# - id: pydoclint -# args: [--config=pyproject.toml] - # - repo: https://github.com/tcort/markdown-link-check # rev: v3.11.2 # hooks: @@ -84,15 +76,23 @@ repos: - id: distro-codegen name: Distribution Template Codegen additional_dependencies: - - rich - - pydantic - uv==0.6.0 - entry: uv run python -m llama_stack.scripts.distro_codegen + entry: uv run --extra codegen ./scripts/distro_codegen.py + language: python + pass_filenames: false + require_serial: true + files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$ + +- repo: local + hooks: + - id: openapi-codegen + name: API Spec Codegen + additional_dependencies: + - uv==0.6.2 + entry: sh -c 'uv run --with ".[dev]" ./docs/openapi_generator/run_openapi_generator.sh > /dev/null 2>&1' language: python pass_filenames: false require_serial: true - files: ^llama_stack/templates/.*$ - files: ^llama_stack/providers/.*/inference/.*/models\.py$ ci: autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..62862ebdc --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,304 @@ +# Changelog + +# v0.1.6 +Published on: 2025-03-08T04:35:08Z + +## 0.1.6 Release Notes + +### Build and Test Agents +* Inference: Fixed support for inline vllm provider +* (**New**) Agent: Build & Monitor Agent Workflows with Llama Stack + Anthropic's Best Practice [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb) +* (**New**) Agent: Revamped agent [documentation](https://llama-stack.readthedocs.io/en/latest/building_applications/agent.html) with more details and examples +* Agent: Unify tools and Python SDK Agents API +* Agent: AsyncAgent Python SDK wrapper supporting async client tool calls +* Agent: Support python functions without @client_tool decorator as client tools +* Agent: deprecation for allow_resume_turn flag, and remove need to specify tool_prompt_format +* VectorIO: MilvusDB support added + +### Agent Evals and Model Customization +* (**New**) Agent: Llama Stack RAG Lifecycle [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb) +* Eval: Documentation for eval, scoring, adding new benchmarks +* Eval: Distribution template to run benchmarks on llama & non-llama models +* Eval: Ability to register new custom LLM-as-judge scoring functions +* (**New**) Looking for contributors for open benchmarks. See [documentation](https://llama-stack.readthedocs.io/en/latest/references/evals_reference/index.html#open-benchmark-contributing-guide) for details. + +### Deploy and Monitoring of Agents +* Better support for different log levels across all components for better monitoring + +### Better Engineering +* Enhance OpenAPI spec to include Error types across all APIs +* Moved all tests to /tests and created unit tests to run on each PR +* Removed all dependencies on llama-models repo + + +--- + +# v0.1.5.1 +Published on: 2025-02-28T22:37:44Z + +## 0.1.5.1 Release Notes +* Fixes for security risk in https://github.com/meta-llama/llama-stack/pull/1327 and https://github.com/meta-llama/llama-stack/pull/1328 + +**Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.1.5...v0.1.5.1 + +--- + +# v0.1.5 +Published on: 2025-02-28T18:14:01Z + +## 0.1.5 Release Notes +### Build Agents +* Inference: Support more non-llama models (openai, anthropic, gemini) +* Inference: Can use the provider's model name in addition to the HF alias +* Inference: Fixed issues with calling tools that weren't specified in the prompt +* RAG: Improved system prompt for RAG and no more need for hard-coded rag-tool calling +* Embeddings: Added support for Nemo retriever embedding models +* Tools: Added support for MCP tools in Ollama Distribution +* Distributions: Added new Groq distribution + +### Customize Models +* Save post-trained checkpoint in SafeTensor format to allow Ollama inference provider to use the post-trained model + +### Monitor agents +* More comprehensive logging of agent steps including client tools +* Telemetry inputs/outputs are now structured and queryable +* Ability to retrieve agents session, turn, step by ids + +### Better Engineering +* Moved executorch Swift code out of this repo into the llama-stack-client-swift repo, similar to kotlin +* Move most logging to use logger instead of prints +* Completed text /chat-completion and /completion tests + + +--- + +# v0.1.4 +Published on: 2025-02-25T00:02:43Z + +## v0.1.4 Release Notes +Here are the key changes coming as part of this release: + +### Build and Test Agents +* Inference: Added support for non-llama models +* Inference: Added option to list all downloaded models and remove models +* Agent: Introduce new api agents.resume_turn to include client side tool execution in the same turn +* Agent: AgentConfig introduces new variable “tool_config” that allows for better tool configuration and system prompt overrides +* Agent: Added logging for agent step start and completion times +* Agent: Added support for logging for tool execution metadata +* Embedding: Updated /inference/embeddings to support asymmetric models, truncation and variable sized outputs +* Embedding: Updated embedding models for Ollama, Together, and Fireworks with available defaults +* VectorIO: Improved performance of sqlite-vec using chunked writes +### Agent Evals and Model Customization +* Deprecated api /eval-tasks. Use /eval/benchmark instead +* Added CPU training support for TorchTune +### Deploy and Monitoring of Agents +* Consistent view of client and server tool calls in telemetry +### Better Engineering +* Made tests more data-driven for consistent evaluation +* Fixed documentation links and improved API reference generation +* Various small fixes for build scripts and system reliability + + + +--- + +# v0.1.3 +Published on: 2025-02-14T20:24:32Z + +## v0.1.3 Release + +Here are some key changes that are coming as part of this release. + +### Build and Test Agents +Streamlined the initial development experience +- Added support for llama stack run --image-type venv +- Enhanced vector store options with new sqlite-vec provider and improved Qdrant integration +- vLLM improvements for tool calling and logprobs +- Better handling of sporadic code_interpreter tool calls + +### Agent Evals +Better benchmarking and Agent performance assessment +- Renamed eval API /eval-task to /benchmarks +- Improved documentation and notebooks for RAG and evals + +### Deploy and Monitoring of Agents +Improved production readiness +- Added usage metrics collection for chat completions +- CLI improvements for provider information +- Improved error handling and system reliability +- Better model endpoint handling and accessibility +- Improved signal handling on distro server + +### Better Engineering +Infrastructure and code quality improvements +- Faster text-based chat completion tests +- Improved testing for non-streaming agent apis +- Standardized import formatting with ruff linter +- Added conventional commits standard +- Fixed documentation parsing issues + + +--- + +# v0.1.2 +Published on: 2025-02-07T22:06:49Z + +# TL;DR +- Several stabilizations to development flows after the switch to `uv` +- Migrated CI workflows to new OSS repo - [llama-stack-ops](https://github.com/meta-llama/llama-stack-ops) +- Added automated rebuilds for ReadTheDocs +- Llama Stack server supports HTTPS +- Added system prompt overrides support +- Several bug fixes and improvements to documentation (check out Kubernetes deployment guide by @terrytangyuan ) + + +--- + +# v0.1.1 +Published on: 2025-02-02T02:29:24Z + +A bunch of small / big improvements everywhere including support for Windows, switching to `uv` and many provider improvements. + + +--- + +# v0.1.0 +Published on: 2025-01-24T17:47:47Z + +We are excited to announce a stable API release of Llama Stack, which enables developers to build RAG applications and Agents using tools and safety shields, monitor and those agents with telemetry, and evaluate the agent with scoring functions. + +## Context +GenAI application developers need more than just an LLM - they need to integrate tools, connect with their data sources, establish guardrails, and ground the LLM responses effectively. Currently, developers must piece together various tools and APIs, complicating the development lifecycle and increasing costs. The result is that developers are spending more time on these integrations rather than focusing on the application logic itself. The bespoke coupling of components also makes it challenging to adopt state-of-the-art solutions in the rapidly evolving GenAI space. This is particularly difficult for open models like Llama, as best practices are not widely established in the open. + +Llama Stack was created to provide developers with a comprehensive and coherent interface that simplifies AI application development and codifies best practices across the Llama ecosystem. Since our launch in September 2024, we have seen a huge uptick in interest in Llama Stack APIs by both AI developers and from partners building AI services with Llama models. Partners like Nvidia, Fireworks, and Ollama have collaborated with us to develop implementations across various APIs, including inference, memory, and safety. + +With Llama Stack, you can easily build a RAG agent which can also search the web, do complex math, and custom tool calling. You can use telemetry to inspect those traces, and convert telemetry into evals datasets. And with Llama Stack’s plugin architecture and prepackage distributions, you choose to run your agent anywhere - in the cloud with our partners, deploy your own environment using virtualenv, conda, or Docker, operate locally with Ollama, or even run on mobile devices with our SDKs. Llama Stack offers unprecedented flexibility while also simplifying the developer experience. + +## Release +After iterating on the APIs for the last 3 months, today we’re launching a stable release (V1) of the Llama Stack APIs and the corresponding llama-stack server and client packages(v0.1.0). We now have automated tests for providers. These tests make sure that all provider implementations are verified. Developers can now easily and reliably select distributions or providers based on their specific requirements. + +There are example standalone apps in llama-stack-apps. + + +## Key Features of this release + +- **Unified API Layer** + - Inference: Run LLM models + - RAG: Store and retrieve knowledge for RAG + - Agents: Build multi-step agentic workflows + - Tools: Register tools that can be called by the agent + - Safety: Apply content filtering and safety policies + - Evaluation: Test model and agent quality + - Telemetry: Collect and analyze usage data and complex agentic traces + - Post Training ( Coming Soon ): Fine tune models for specific use cases + +- **Rich Provider Ecosystem** + - Local Development: Meta's Reference, Ollama + - Cloud: Fireworks, Together, Nvidia, AWS Bedrock, Groq, Cerebras + - On-premises: Nvidia NIM, vLLM, TGI, Dell-TGI + - On-device: iOS and Android support + +- **Built for Production** + - Pre-packaged distributions for common deployment scenarios + - Backwards compatibility across model versions + - Comprehensive evaluation capabilities + - Full observability and monitoring + +- **Multiple developer interfaces** + - CLI: Command line interface + - Python SDK + - Swift iOS SDK + - Kotlin Android SDK + +- **Sample llama stack applications** + - Python + - iOS + - Android + + + +--- + +# v0.1.0rc12 +Published on: 2025-01-22T22:24:01Z + + + +--- + +# v0.0.63 +Published on: 2024-12-18T07:17:43Z + +A small but important bug-fix release to update the URL datatype for the client-SDKs. The issue affected multimodal agentic turns especially. + +**Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.0.62...v0.0.63 + +--- + +# v0.0.62 +Published on: 2024-12-18T02:39:43Z + + + +--- + +# v0.0.61 +Published on: 2024-12-10T20:50:33Z + + + +--- + +# v0.0.55 +Published on: 2024-11-23T17:14:07Z + + + +--- + +# v0.0.54 +Published on: 2024-11-22T00:36:09Z + + + +--- + +# v0.0.53 +Published on: 2024-11-20T22:18:00Z + +🚀 Initial Release Notes for Llama Stack! + +### Added +- Resource-oriented design for models, shields, memory banks, datasets and eval tasks +- Persistence for registered objects with distribution +- Ability to persist memory banks created for FAISS +- PostgreSQL KVStore implementation +- Environment variable placeholder support in run.yaml files +- Comprehensive Zero-to-Hero notebooks and quickstart guides +- Support for quantized models in Ollama +- Vision models support for Together, Fireworks, Meta-Reference, and Ollama, and vLLM +- Bedrock distribution with safety shields support +- Evals API with task registration and scoring functions +- MMLU and SimpleQA benchmark scoring functions +- Huggingface dataset provider integration for benchmarks +- Support for custom dataset registration from local paths +- Benchmark evaluation CLI tools with visualization tables +- RAG evaluation scoring functions and metrics +- Local persistence for datasets and eval tasks + +### Changed +- Split safety into distinct providers (llama-guard, prompt-guard, code-scanner) +- Changed provider naming convention (`impls` → `inline`, `adapters` → `remote`) +- Updated API signatures for dataset and eval task registration +- Restructured folder organization for providers +- Enhanced Docker build configuration +- Added version prefixing for REST API routes +- Enhanced evaluation task registration workflow +- Improved benchmark evaluation output formatting +- Restructured evals folder organization for better modularity + +### Removed +- `llama stack configure` command + + +--- diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1e4a88f13..505d6b162 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -61,13 +61,32 @@ outlined on that page and do not file a public issue. We use [uv](https://github.com/astral-sh/uv) to manage python dependencies and virtual environments. You can install `uv` by following this [guide](https://docs.astral.sh/uv/getting-started/installation/). + You can install the dependencies by running: ```bash -$ cd llama-stack -$ uv sync --extra dev -$ uv pip install -e . -$ source .venv/bin/activate +cd llama-stack +uv sync --extra dev +uv pip install -e . +source .venv/bin/activate +``` + +> [!NOTE] +> You can pin a specific version of Python to use for `uv` by adding a `.python-version` file in the root project directory. +> Otherwise, `uv` will automatically select a Python version according to the `requires-python` section of the `pyproject.toml`. +> For more info, see the [uv docs around Python versions](https://docs.astral.sh/uv/concepts/python-versions/). + +Note that you can create a dotenv file `.env` that includes necessary environment variables: +``` +LLAMA_STACK_BASE_URL=http://localhost:8321 +LLAMA_STACK_CLIENT_LOG=debug +LLAMA_STACK_PORT=8321 +LLAMA_STACK_CONFIG= +``` + +And then use this dotenv file when running client SDK tests via the following: +```bash +uv run --env-file .env -- pytest -v tests/integration/inference/test_text_inference.py ``` ## Pre-commit Hooks @@ -75,7 +94,7 @@ $ source .venv/bin/activate We use [pre-commit](https://pre-commit.com/) to run linting and formatting checks on your code. You can install the pre-commit hooks by running: ```bash -$ uv run pre-commit install +uv run pre-commit install ``` After that, pre-commit hooks will run automatically before each commit. @@ -83,19 +102,35 @@ After that, pre-commit hooks will run automatically before each commit. Alternatively, if you don't want to install the pre-commit hooks, you can run the checks manually by running: ```bash -$ uv run pre-commit run --all-files +uv run pre-commit run --all-files ``` > [!CAUTION] > Before pushing your changes, make sure that the pre-commit hooks have passed successfully. +## Running unit tests + +You can run the unit tests by running: + +```bash +source .venv/bin/activate +./scripts/unit-tests.sh +``` + +If you'd like to run for a non-default version of Python (currently 3.10), pass `PYTHON_VERSION` variable as follows: + +``` +source .venv/bin/activate +PYTHON_VERSION=3.13 ./scripts/unit-tests.sh +``` + ## Adding a new dependency to the project To add a new dependency to the project, you can use the `uv` command. For example, to add `foo` to the project, you can run: ```bash -$ uv add foo -$ uv sync +uv add foo +uv sync ``` ## Coding Style @@ -110,35 +145,35 @@ Some tips about common tasks you work on while contributing to Llama Stack: ### Using `llama stack build` -Building a stack image (conda / docker) will use the production version of the `llama-stack`, `llama-models` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_MODELS_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands. +Building a stack image (conda / docker) will use the production version of the `llama-stack` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_STACK_CLIENT_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands. Example: ```bash -$ cd work/ -$ git clone https://github.com/meta-llama/llama-stack.git -$ git clone https://github.com/meta-llama/llama-models.git -$ cd llama-stack -$ LLAMA_STACK_DIR=$(pwd) LLAMA_MODELS_DIR=../llama-models llama stack build --template <...> +cd work/ +git clone https://github.com/meta-llama/llama-stack.git +git clone https://github.com/meta-llama/llama-stack-client-python.git +cd llama-stack +LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --template <...> ``` ### Updating Provider Configurations -If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `python llama_stack/scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated. +If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `./scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated. ### Building the Documentation If you are making changes to the documentation at [https://llama-stack.readthedocs.io/en/latest/](https://llama-stack.readthedocs.io/en/latest/), you can use the following command to build the documentation and preview your changes. You will need [Sphinx](https://www.sphinx-doc.org/en/master/) and the readthedocs theme. ```bash -$ cd llama-stack/docs -$ uv sync --extra docs +cd llama-stack/docs +uv sync --extra docs # This rebuilds the documentation pages. -$ uv run make html +uv run make html # This will start a local server (usually at http://127.0.0.1:8000) that automatically rebuilds and refreshes when you make changes to the documentation. -$ uv run sphinx-autobuild source build/html --write-all +uv run sphinx-autobuild source build/html --write-all ``` ### Update API Documentation @@ -146,8 +181,7 @@ $ uv run sphinx-autobuild source build/html --write-all If you modify or add new API endpoints, update the API documentation accordingly. You can do this by running the following command: ```bash -$ uv sync --extra dev -$ uv run ./docs/openapi_generator/run_openapi_generator.sh +uv run --with ".[dev]" ./docs/openapi_generator/run_openapi_generator.sh ``` The generated API documentation will be available in `docs/_static/`. Make sure to review the changes before committing. diff --git a/MANIFEST.in b/MANIFEST.in index ec45d8f08..572a9ac0a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,8 @@ include pyproject.toml include distributions/dependencies.json +include llama_stack/models/llama/llama3/tokenizer.model include llama_stack/distribution/*.sh include llama_stack/cli/scripts/*.sh include llama_stack/templates/*/*.yaml -include llama_stack/providers/tests/test_cases/*.json +include llama_stack/providers/tests/test_cases/inference/*.json +include llama_stack/models/llama/*/*.md diff --git a/README.md b/README.md index 3946deea6..918433d51 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,8 @@ [![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/) [![License](https://img.shields.io/pypi/l/llama_stack.svg)](https://github.com/meta-llama/llama-stack/blob/main/LICENSE) [![Discord](https://img.shields.io/discord/1257833999603335178)](https://discord.gg/llama-stack) +[![Unit Tests](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain) +[![Integration Tests](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain) [**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) @@ -32,7 +34,7 @@ Llama Stack standardizes the core building blocks that simplify AI application d By reducing friction and complexity, Llama Stack empowers developers to focus on what they do best: building transformative generative AI applications. ### API Providers -Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack. +Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack. | **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** | |:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:| @@ -50,6 +52,10 @@ Here is a list of the various API providers and available distributions that can | PG Vector | Single Node | | | ✅ | | | | PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | vLLM | Hosted and Single Node | | ✅ | | | | +| OpenAI | Hosted | | ✅ | | | | +| Anthropic | Hosted | | ✅ | | | | +| Gemini | Hosted | | ✅ | | | | + ### Distributions @@ -67,26 +73,6 @@ A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider | Fireworks | [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/fireworks.html) | | vLLM | [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html) | -### Installation - -You have two ways to install this repository: - -* **Install as a package**: - You can install the repository directly from [PyPI](https://pypi.org/project/llama-stack/) by running the following command: - ```bash - pip install llama-stack - ``` - -* **Install from source**: - If you prefer to install from the source code, we recommend using [uv](https://github.com/astral-sh/uv). - Then, run the following commands: - ```bash - git clone git@github.com:meta-llama/llama-stack.git - cd llama-stack - - uv sync - uv pip install -e . - ``` ### Documentation diff --git a/distributions/dependencies.json b/distributions/dependencies.json index 18a2484f2..da0de2820 100644 --- a/distributions/dependencies.json +++ b/distributions/dependencies.json @@ -7,10 +7,12 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", + "langdetect", "matplotlib", "mcp", "nltk", @@ -23,6 +25,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -30,6 +33,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn" ], "cerebras": [ @@ -40,10 +44,12 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", + "langdetect", "matplotlib", "nltk", "numpy", @@ -55,6 +61,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -62,6 +69,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" @@ -73,10 +81,12 @@ "chardet", "chromadb-client", "datasets", + "emoji", "fastapi", "fire", "fireworks-ai", "httpx", + "langdetect", "matplotlib", "mcp", "nltk", @@ -89,6 +99,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -97,6 +108,7 @@ "sqlite-vec", "tqdm", "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" @@ -109,11 +121,13 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", "huggingface_hub", + "langdetect", "matplotlib", "nltk", "numpy", @@ -125,6 +139,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -132,6 +147,47 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "dev": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "fastapi", + "fire", + "fireworks-ai", + "httpx", + "langdetect", + "litellm", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlite-vec", + "tqdm", + "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" @@ -143,11 +199,13 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "fireworks-ai", "httpx", + "langdetect", "matplotlib", "mcp", "nltk", @@ -160,6 +218,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -167,10 +226,46 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" ], + "groq": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "langdetect", + "litellm", + "matplotlib", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn" + ], "hf-endpoint": [ "aiohttp", "aiosqlite", @@ -179,11 +274,13 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", "huggingface_hub", + "langdetect", "matplotlib", "mcp", "nltk", @@ -196,6 +293,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -203,6 +301,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn" ], "hf-serverless": [ @@ -213,11 +312,13 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", "huggingface_hub", + "langdetect", "matplotlib", "mcp", "nltk", @@ -230,6 +331,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -237,6 +339,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" @@ -249,11 +352,13 @@ "chardet", "chromadb-client", "datasets", + "emoji", "fairscale", "faiss-cpu", "fastapi", "fire", "httpx", + "langdetect", "lm-format-enforcer", "matplotlib", "mcp", @@ -267,6 +372,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -277,6 +383,7 @@ "torchvision", "tqdm", "transformers", + "tree_sitter", "uvicorn", "zmq" ], @@ -288,12 +395,14 @@ "chardet", "chromadb-client", "datasets", + "emoji", "fairscale", "faiss-cpu", "fastapi", "fbgemm-gpu", "fire", "httpx", + "langdetect", "lm-format-enforcer", "matplotlib", "mcp", @@ -307,6 +416,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -318,21 +428,21 @@ "torchvision", "tqdm", "transformers", + "tree_sitter", "uvicorn", "zmq" ], "nvidia": [ "aiosqlite", - "autoevals", "blobfile", "chardet", - "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", + "langdetect", "matplotlib", - "mcp", "nltk", "numpy", "openai", @@ -343,6 +453,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -350,6 +461,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn" ], "ollama": [ @@ -360,10 +472,14 @@ "chardet", "chromadb-client", "datasets", + "emoji", + "faiss-cpu", "fastapi", "fire", "httpx", + "langdetect", "matplotlib", + "mcp", "nltk", "numpy", "ollama", @@ -375,27 +491,30 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", "scipy", "sentencepiece", - "sqlite-vec", "tqdm", "transformers", + "tree_sitter", "uvicorn" ], - "remote-vllm": [ + "open-benchmark": [ "aiosqlite", "autoevals", "blobfile", "chardet", "chromadb-client", "datasets", - "faiss-cpu", + "emoji", "fastapi", "fire", "httpx", + "langdetect", + "litellm", "matplotlib", "mcp", "nltk", @@ -408,6 +527,45 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlite-vec", + "together", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn" + ], + "passthrough": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -415,6 +573,45 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "remote-vllm": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "tqdm", + "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" @@ -456,11 +653,13 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", "huggingface_hub", + "langdetect", "matplotlib", "mcp", "nltk", @@ -473,6 +672,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -480,6 +680,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" @@ -491,10 +692,12 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", + "langdetect", "matplotlib", "mcp", "nltk", @@ -507,6 +710,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -515,6 +719,7 @@ "together", "tqdm", "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" @@ -526,10 +731,12 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", + "langdetect", "matplotlib", "mcp", "nltk", @@ -542,6 +749,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -549,6 +757,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn", "vllm", "sentence-transformers --no-deps", diff --git a/distributions/ramalama/faiss_store.db b/distributions/ramalama/faiss_store.db new file mode 100644 index 000000000..573e60e90 Binary files /dev/null and b/distributions/ramalama/faiss_store.db differ diff --git a/distributions/remote-vllm/compose.yaml b/distributions/remote-vllm/compose.yaml index c387e1049..9c21a4c13 100644 --- a/distributions/remote-vllm/compose.yaml +++ b/distributions/remote-vllm/compose.yaml @@ -71,7 +71,6 @@ services: condition: service_healthy - vllm-${VLLM_SAFETY_MODEL:+safety}: condition: service_healthy - # image: llamastack/distribution-remote-vllm image: llamastack/distribution-remote-vllm:test-0.0.52rc3 volumes: - ~/.llama:/root/.llama diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 2a9f4b6f7..4bb58c8dd 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -6,8 +6,8 @@ OpenAPI specification - - + +