diff --git a/.cursor/rules/general.mdc b/.cursor/rules/general.mdc deleted file mode 100644 index 24daef2ba..000000000 --- a/.cursor/rules/general.mdc +++ /dev/null @@ -1,9 +0,0 @@ ---- -description: General rules always applicable across the project -globs: -alwaysApply: true ---- -# Style - -- Comments must add value to code. Don't write filler comments explaining what you are doing next; they just add noise. -- Add a comment to clarify surprising behavior which would not be obvious. Good variable naming and clear code organization is more important. diff --git a/.github/TRIAGERS.md b/.github/TRIAGERS.md new file mode 100644 index 000000000..d4ef6d1ac --- /dev/null +++ b/.github/TRIAGERS.md @@ -0,0 +1,2 @@ +# This file documents Triage members in the Llama Stack community +@franciscojavierarceo @leseb diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 4aba604dd..d68af5615 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -5,4 +5,19 @@ updates: - package-ecosystem: "github-actions" directory: "/" # Will use the default workflow location of `.github/workflows` schedule: - interval: "daily" + interval: "weekly" + day: "saturday" + commit-message: + prefix: chore(github-deps) + - package-ecosystem: "uv" + directory: "/" + schedule: + interval: "weekly" + day: "saturday" + # ignore all non-security updates: https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#open-pull-requests-limit + open-pull-requests-limit: 0 + labels: + - type/dependencies + - python + commit-message: + prefix: chore(python-deps) diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml new file mode 100644 index 000000000..5b63e231c --- /dev/null +++ b/.github/workflows/changelog.yml @@ -0,0 +1,29 @@ +name: Update Changelog + +on: + release: + types: [published, unpublished, created, edited, deleted, released] + +permissions: + contents: read + +jobs: + generate_changelog: + name: Generate changelog + permissions: + contents: write # for peter-evans/create-pull-request to create branch + pull-requests: write # for peter-evans/create-pull-request to create a PR + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + ref: main + fetch-depth: 0 + - run: | + python ./scripts/gen-changelog.py + - uses: peter-evans/create-pull-request@v7 + with: + title: 'docs: update CHANGELOG.md for ${{ github.ref_name }}' + commit-message: 'docs: update CHANGELOG.md for ${{ github.ref_name }}' + branch: create-pull-request/changelog + signoff: true diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml new file mode 100644 index 000000000..475b26d0a --- /dev/null +++ b/.github/workflows/integration-tests.yml @@ -0,0 +1,97 @@ +name: Integration Tests + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - 'distributions/**' + - 'llama_stack/**' + - 'tests/integration/**' + - 'uv.lock' + - 'pyproject.toml' + - 'requirements.txt' + - '.github/workflows/integration-tests.yml' # This workflow + +jobs: + test-matrix: + runs-on: ubuntu-latest + strategy: + matrix: + # Listing tests manually since some of them currently fail + # TODO: generate matrix list from tests/integration when fixed + test-type: [inference, datasets, inspect, scoring, post_training, providers] + fail-fast: false # we want to run all tests regardless of failure + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + python-version: "3.10" + + - name: Install Ollama + run: | + curl -fsSL https://ollama.com/install.sh | sh + + - name: Pull Ollama image + run: | + ollama pull llama3.2:3b-instruct-fp16 + + - name: Start Ollama in background + run: | + nohup ollama run llama3.2:3b-instruct-fp16 > ollama.log 2>&1 & + + - name: Set Up Environment and Install Dependencies + run: | + uv sync --extra dev --extra test + uv pip install ollama faiss-cpu + # always test against the latest version of the client + uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main + uv pip install -e . + llama stack build --template ollama --image-type venv + + - name: Wait for Ollama to start + run: | + echo "Waiting for Ollama..." + for i in {1..30}; do + if curl -s http://localhost:11434 | grep -q "Ollama is running"; then + echo "Ollama is running!" + exit 0 + fi + sleep 1 + done + echo "Ollama failed to start" + ollama ps + ollama.log + exit 1 + + - name: Start Llama Stack server in background + env: + INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" + run: | + source .venv/bin/activate + nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv > server.log 2>&1 & + + - name: Wait for Llama Stack server to be ready + run: | + echo "Waiting for Llama Stack server..." + for i in {1..30}; do + if curl -s http://localhost:8321/v1/health | grep -q "OK"; then + echo "Llama Stack server is up!" + exit 0 + fi + sleep 1 + done + echo "Llama Stack server failed to start" + cat server.log + exit 1 + + - name: Run Integration Tests + env: + INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" + run: | + uv run pytest -v tests/integration/${{ matrix.test-type }} --stack-config=ollama --text-model="meta-llama/Llama-3.2-3B-Instruct" --embedding-model=all-MiniLM-L6-v2 diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml new file mode 100644 index 000000000..e6871bf99 --- /dev/null +++ b/.github/workflows/providers-build.yml @@ -0,0 +1,79 @@ +name: Test Llama Stack Build + +on: + push: + branches: + - main + paths: + - 'llama_stack/cli/stack/build.py' + - 'llama_stack/cli/stack/_build.py' + - 'llama_stack/distribution/build.*' + - 'llama_stack/distribution/*.sh' + - '.github/workflows/providers-build.yml' + pull_request: + paths: + - 'llama_stack/cli/stack/build.py' + - 'llama_stack/cli/stack/_build.py' + - 'llama_stack/distribution/build.*' + - 'llama_stack/distribution/*.sh' + - '.github/workflows/providers-build.yml' + +jobs: + generate-matrix: + runs-on: ubuntu-latest + outputs: + templates: ${{ steps.set-matrix.outputs.templates }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Generate Template List + id: set-matrix + run: | + templates=$(ls llama_stack/templates/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]') + echo "templates=$templates" >> "$GITHUB_OUTPUT" + + build: + needs: generate-matrix + runs-on: ubuntu-latest + strategy: + matrix: + template: ${{ fromJson(needs.generate-matrix.outputs.templates) }} + image-type: [venv, container] + fail-fast: false # We want to run all jobs even if some fail + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + python-version: "3.10" + + - name: Install LlamaStack + run: | + uv venv + source .venv/bin/activate + uv pip install -e . + + - name: Print build dependencies + run: | + uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only + + - name: Run Llama Stack Build + run: | + # USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead + # LLAMA_STACK_DIR is set to the current directory so we are building from the source + USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test + + - name: Print dependencies in the image + if: matrix.image-type == 'venv' + run: | + source test/bin/activate + uv pip list diff --git a/.github/workflows/stale_bot.yml b/.github/workflows/stale_bot.yml new file mode 100644 index 000000000..2039fcbb4 --- /dev/null +++ b/.github/workflows/stale_bot.yml @@ -0,0 +1,45 @@ +name: Close stale issues and PRs + +on: + schedule: + - cron: '0 0 * * *' # every day at midnight + +env: + LC_ALL: en_US.UTF-8 + +defaults: + run: + shell: bash + +permissions: + contents: read + +jobs: + stale: + permissions: + issues: write + pull-requests: write + runs-on: ubuntu-latest + steps: + - name: Stale Action + uses: actions/stale@v9 + with: + stale-issue-label: 'stale' + stale-issue-message: > + This issue has been automatically marked as stale because it has not had activity within 60 days. + It will be automatically closed if no further activity occurs within 30 days. + close-issue-message: > + This issue has been automatically closed due to inactivity. + Please feel free to reopen if you feel it is still relevant! + days-before-issue-stale: 60 + days-before-issue-close: 30 + stale-pr-label: 'stale' + stale-pr-message: > + This pull request has been automatically marked as stale because it has not had activity within 60 days. + It will be automatically closed if no further activity occurs within 30 days. + close-pr-message: > + This pull request has been automatically closed due to inactivity. + Please feel free to reopen if you intend to continue working on it! + days-before-pr-stale: 60 + days-before-pr-close: 30 + operations-per-run: 300 diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 28e749aff..6d6e91f22 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -1,36 +1,55 @@ name: Unit Tests on: + push: + branches: [ main ] pull_request: branches: [ main ] + paths: + - 'distributions/**' + - 'llama_stack/**' + - 'tests/unit/**' + - 'uv.lock' + - 'pyproject.toml' + - 'requirements.txt' + - '.github/workflows/unit-tests.yml' # This workflow workflow_dispatch: jobs: unit-tests: runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python: + - "3.10" + - "3.11" + - "3.12" + - "3.13" steps: - uses: actions/checkout@v4 - - name: Set up Python + - name: Set up Python ${{ matrix.python }} uses: actions/setup-python@v5 with: - python-version: '3.10.16' + python-version: ${{ matrix.python }} - uses: astral-sh/setup-uv@v5 with: - python-version: '3.10.16' + python-version: ${{ matrix.python }} enable-cache: false - name: Run unit tests run: | - uv run -p 3.10.16 --with . --with ".[dev]" --with ".[test]" pytest -s -v tests/unit/ --junitxml=pytest-report.xml + PYTHON_VERSION=${{ matrix.python }} ./scripts/unit-tests.sh --cov=llama_stack --junitxml=pytest-report-${{ matrix.python }}.xml --cov-report=html:htmlcov-${{ matrix.python }} - name: Upload test results if: always() uses: actions/upload-artifact@v4 with: - name: test-results + name: test-results-${{ matrix.python }} path: | .pytest_cache/ - pytest-report.xml + pytest-report-${{ matrix.python }}.xml + htmlcov-${{ matrix.python }}/ retention-days: 7 diff --git a/.gitignore b/.gitignore index 163b65947..0ef25cdf1 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,5 @@ docs/src pyrightconfig.json venv/ pytest-report.xml +.coverage +.python-version diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 926ae21cc..e83e64672 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,6 +8,7 @@ repos: rev: v5.0.0 # Latest stable version hooks: - id: check-merge-conflict + args: ['--assume-in-merge'] - id: trailing-whitespace exclude: '\.py$' # Exclude Python files as Ruff already handles them - id: check-added-large-files @@ -76,12 +77,23 @@ repos: name: Distribution Template Codegen additional_dependencies: - uv==0.6.0 - entry: uv run --extra codegen python -m llama_stack.scripts.distro_codegen + entry: uv run --extra codegen ./scripts/distro_codegen.py language: python pass_filenames: false require_serial: true files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$ +- repo: local + hooks: + - id: openapi-codegen + name: API Spec Codegen + additional_dependencies: + - uv==0.6.2 + entry: sh -c 'uv run --with ".[dev]" ./docs/openapi_generator/run_openapi_generator.sh > /dev/null 2>&1' + language: python + pass_filenames: false + require_serial: true + ci: autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate diff --git a/.python-version b/.python-version deleted file mode 100644 index c8cfe3959..000000000 --- a/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.10 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e639328f0..505d6b162 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -61,6 +61,7 @@ outlined on that page and do not file a public issue. We use [uv](https://github.com/astral-sh/uv) to manage python dependencies and virtual environments. You can install `uv` by following this [guide](https://docs.astral.sh/uv/getting-started/installation/). + You can install the dependencies by running: ```bash @@ -70,6 +71,11 @@ uv pip install -e . source .venv/bin/activate ``` +> [!NOTE] +> You can pin a specific version of Python to use for `uv` by adding a `.python-version` file in the root project directory. +> Otherwise, `uv` will automatically select a Python version according to the `requires-python` section of the `pyproject.toml`. +> For more info, see the [uv docs around Python versions](https://docs.astral.sh/uv/concepts/python-versions/). + Note that you can create a dotenv file `.env` that includes necessary environment variables: ``` LLAMA_STACK_BASE_URL=http://localhost:8321 @@ -80,7 +86,7 @@ LLAMA_STACK_CONFIG= And then use this dotenv file when running client SDK tests via the following: ```bash -uv run --env-file .env -- pytest -v tests/api/inference/test_text_inference.py +uv run --env-file .env -- pytest -v tests/integration/inference/test_text_inference.py ``` ## Pre-commit Hooks @@ -102,6 +108,22 @@ uv run pre-commit run --all-files > [!CAUTION] > Before pushing your changes, make sure that the pre-commit hooks have passed successfully. +## Running unit tests + +You can run the unit tests by running: + +```bash +source .venv/bin/activate +./scripts/unit-tests.sh +``` + +If you'd like to run for a non-default version of Python (currently 3.10), pass `PYTHON_VERSION` variable as follows: + +``` +source .venv/bin/activate +PYTHON_VERSION=3.13 ./scripts/unit-tests.sh +``` + ## Adding a new dependency to the project To add a new dependency to the project, you can use the `uv` command. For example, to add `foo` to the project, you can run: @@ -137,7 +159,7 @@ LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama ### Updating Provider Configurations -If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `python llama_stack/scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated. +If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `./scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated. ### Building the Documentation @@ -159,8 +181,7 @@ uv run sphinx-autobuild source build/html --write-all If you modify or add new API endpoints, update the API documentation accordingly. You can do this by running the following command: ```bash -uv sync --extra dev -uv run ./docs/openapi_generator/run_openapi_generator.sh +uv run --with ".[dev]" ./docs/openapi_generator/run_openapi_generator.sh ``` The generated API documentation will be available in `docs/_static/`. Make sure to review the changes before committing. diff --git a/README.md b/README.md index b24e69514..918433d51 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,8 @@ [![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/) [![License](https://img.shields.io/pypi/l/llama_stack.svg)](https://github.com/meta-llama/llama-stack/blob/main/LICENSE) [![Discord](https://img.shields.io/discord/1257833999603335178)](https://discord.gg/llama-stack) +[![Unit Tests](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain) +[![Integration Tests](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain) [**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) @@ -50,6 +52,10 @@ Here is a list of the various API providers and available distributions that can | PG Vector | Single Node | | | ✅ | | | | PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | vLLM | Hosted and Single Node | | ✅ | | | | +| OpenAI | Hosted | | ✅ | | | | +| Anthropic | Hosted | | ✅ | | | | +| Gemini | Hosted | | ✅ | | | | + ### Distributions @@ -67,26 +73,6 @@ A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider | Fireworks | [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/fireworks.html) | | vLLM | [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html) | -### Installation - -You have two ways to install this repository: - -* **Install as a package**: - You can install the repository directly from [PyPI](https://pypi.org/project/llama-stack/) by running the following command: - ```bash - pip install llama-stack - ``` - -* **Install from source**: - If you prefer to install from the source code, we recommend using [uv](https://github.com/astral-sh/uv). - Then, run the following commands: - ```bash - git clone git@github.com:meta-llama/llama-stack.git - cd llama-stack - - uv sync - uv pip install -e . - ``` ### Documentation diff --git a/distributions/dependencies.json b/distributions/dependencies.json index 3defd082a..4439a9aae 100644 --- a/distributions/dependencies.json +++ b/distributions/dependencies.json @@ -7,10 +7,12 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", + "langdetect", "matplotlib", "mcp", "nltk", @@ -23,6 +25,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -30,6 +33,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn" ], "cerebras": [ @@ -40,10 +44,12 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", + "langdetect", "matplotlib", "nltk", "numpy", @@ -55,6 +61,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -62,6 +69,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" @@ -73,10 +81,12 @@ "chardet", "chromadb-client", "datasets", + "emoji", "fastapi", "fire", "fireworks-ai", "httpx", + "langdetect", "matplotlib", "mcp", "nltk", @@ -89,6 +99,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -97,6 +108,7 @@ "sqlite-vec", "tqdm", "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" @@ -109,11 +121,13 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", "huggingface_hub", + "langdetect", "matplotlib", "nltk", "numpy", @@ -125,6 +139,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -132,6 +147,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" @@ -143,10 +159,12 @@ "chardet", "chromadb-client", "datasets", + "emoji", "fastapi", "fire", "fireworks-ai", "httpx", + "langdetect", "litellm", "matplotlib", "mcp", @@ -160,6 +178,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -168,6 +187,7 @@ "sqlite-vec", "tqdm", "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" @@ -179,11 +199,13 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "fireworks-ai", "httpx", + "langdetect", "matplotlib", "mcp", "nltk", @@ -196,6 +218,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -203,6 +226,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" @@ -213,10 +237,12 @@ "blobfile", "chardet", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", + "langdetect", "litellm", "matplotlib", "nltk", @@ -229,6 +255,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -236,6 +263,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn" ], "hf-endpoint": [ @@ -246,11 +274,13 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", "huggingface_hub", + "langdetect", "matplotlib", "mcp", "nltk", @@ -263,6 +293,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -270,6 +301,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn" ], "hf-serverless": [ @@ -280,11 +312,13 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", "huggingface_hub", + "langdetect", "matplotlib", "mcp", "nltk", @@ -297,6 +331,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -304,6 +339,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" @@ -316,11 +352,13 @@ "chardet", "chromadb-client", "datasets", + "emoji", "fairscale", "faiss-cpu", "fastapi", "fire", "httpx", + "langdetect", "lm-format-enforcer", "matplotlib", "mcp", @@ -334,6 +372,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -344,6 +383,7 @@ "torchvision", "tqdm", "transformers", + "tree_sitter", "uvicorn", "zmq" ], @@ -355,12 +395,14 @@ "chardet", "chromadb-client", "datasets", + "emoji", "fairscale", "faiss-cpu", "fastapi", "fbgemm-gpu", "fire", "httpx", + "langdetect", "lm-format-enforcer", "matplotlib", "mcp", @@ -374,6 +416,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -385,21 +428,21 @@ "torchvision", "tqdm", "transformers", + "tree_sitter", "uvicorn", "zmq" ], "nvidia": [ "aiosqlite", - "autoevals", "blobfile", "chardet", - "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", + "langdetect", "matplotlib", - "mcp", "nltk", "numpy", "openai", @@ -410,6 +453,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -417,6 +461,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn" ], "ollama": [ @@ -427,9 +472,12 @@ "chardet", "chromadb-client", "datasets", + "emoji", + "faiss-cpu", "fastapi", "fire", "httpx", + "langdetect", "matplotlib", "mcp", "nltk", @@ -443,27 +491,30 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", "scipy", "sentencepiece", - "sqlite-vec", "tqdm", "transformers", + "tree_sitter", "uvicorn" ], - "remote-vllm": [ + "open-benchmark": [ "aiosqlite", "autoevals", "blobfile", "chardet", "chromadb-client", "datasets", - "faiss-cpu", + "emoji", "fastapi", "fire", "httpx", + "langdetect", + "litellm", "matplotlib", "mcp", "nltk", @@ -476,6 +527,45 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlite-vec", + "together", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn" + ], + "passthrough": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -483,6 +573,45 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "remote-vllm": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "tqdm", + "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" @@ -524,11 +653,13 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", "huggingface_hub", + "langdetect", "matplotlib", "mcp", "nltk", @@ -541,6 +672,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -548,6 +680,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" @@ -559,10 +692,12 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", + "langdetect", "matplotlib", "mcp", "nltk", @@ -575,6 +710,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -583,6 +719,7 @@ "together", "tqdm", "transformers", + "tree_sitter", "uvicorn", "sentence-transformers --no-deps", "torch torchvision --index-url https://download.pytorch.org/whl/cpu" @@ -594,10 +731,12 @@ "chardet", "chromadb-client", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", + "langdetect", "matplotlib", "mcp", "nltk", @@ -610,6 +749,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -617,6 +757,7 @@ "sentencepiece", "tqdm", "transformers", + "tree_sitter", "uvicorn", "vllm", "sentence-transformers --no-deps", diff --git a/distributions/ramalama/faiss_store.db b/distributions/ramalama/faiss_store.db new file mode 100644 index 000000000..573e60e90 Binary files /dev/null and b/distributions/ramalama/faiss_store.db differ diff --git a/distributions/remote-vllm/compose.yaml b/distributions/remote-vllm/compose.yaml index c387e1049..9c21a4c13 100644 --- a/distributions/remote-vllm/compose.yaml +++ b/distributions/remote-vllm/compose.yaml @@ -71,7 +71,6 @@ services: condition: service_healthy - vllm-${VLLM_SAFETY_MODEL:+safety}: condition: service_healthy - # image: llamastack/distribution-remote-vllm image: llamastack/distribution-remote-vllm:test-0.0.52rc3 volumes: - ~/.llama:/root/.llama diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 1a8169090..c3c18774e 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -6,8 +6,8 @@ OpenAPI specification - - + +