diff --git a/.github/workflows/Dockerfile b/.github/workflows/Dockerfile new file mode 100644 index 000000000..f661aa36b --- /dev/null +++ b/.github/workflows/Dockerfile @@ -0,0 +1 @@ +FROM distribution-kvant:dev \ No newline at end of file diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 000000000..ea9e90c4a --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,78 @@ +name: Build and Push container +run-name: Build and Push container +on: + workflow_dispatch: + #schedule: + # - cron: "0 10 * * *" + push: + branches: + - 'main' + tags: + - 'v*' + pull_request: + branches: + - 'main' +env: + IMAGE: git.kvant.cloud/${{github.repository}} +jobs: + build_concierge_backend: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set current time + uses: https://github.com/gerred/actions/current-time@master + id: current_time + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to git.kvant.cloud registry + uses: docker/login-action@v3 + with: + registry: git.kvant.cloud + username: ${{ vars.ORG_PACKAGE_WRITER_USERNAME }} + password: ${{ secrets.ORG_PACKAGE_WRITER_TOKEN }} + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + # list of Docker images to use as base name for tags + images: | + ${{env.IMAGE}} + # generate Docker tags based on the following events/attributes + tags: | + type=schedule + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + + - name: Install uv + run: pipx install uv + + - name: Build + env: + USE_COPY_NOT_MOUNT: true + LLAMA_STACK_DIR: . + run: uvx --from . llama stack build --template kvant --image-type container + + - name: Build and push to gitea registry + uses: docker/build-push-action@v6 + with: + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + context: .github/workflows + provenance: mode=max + sbom: true + build-args: | + BUILD_DATE=${{ steps.current_time.outputs.time }} + cache-from: | + type=registry,ref=${{ env.IMAGE }}:buildcache + type=registry,ref=${{ env.IMAGE }}:${{ github.ref_name }} + type=registry,ref=${{ env.IMAGE }}:main + cache-to: type=registry,ref=${{ env.IMAGE }}:buildcache,mode=max,image-manifest=true diff --git a/.github/workflows/changelog.yml b/.github/workflows_upstream/changelog.yml similarity index 100% rename from .github/workflows/changelog.yml rename to .github/workflows_upstream/changelog.yml diff --git a/.github/workflows/gha_workflow_llama_stack_tests.yml b/.github/workflows_upstream/gha_workflow_llama_stack_tests.yml similarity index 100% rename from .github/workflows/gha_workflow_llama_stack_tests.yml rename to .github/workflows_upstream/gha_workflow_llama_stack_tests.yml diff --git a/.github/workflows/install-script-ci.yml b/.github/workflows_upstream/install-script-ci.yml similarity index 100% rename from .github/workflows/install-script-ci.yml rename to .github/workflows_upstream/install-script-ci.yml diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows_upstream/integration-auth-tests.yml similarity index 100% rename from .github/workflows/integration-auth-tests.yml rename to .github/workflows_upstream/integration-auth-tests.yml diff --git a/.github/workflows/integration-tests.yml b/.github/workflows_upstream/integration-tests.yml similarity index 100% rename from .github/workflows/integration-tests.yml rename to .github/workflows_upstream/integration-tests.yml diff --git a/.github/workflows/pre-commit.yml b/.github/workflows_upstream/pre-commit.yml similarity index 100% rename from .github/workflows/pre-commit.yml rename to .github/workflows_upstream/pre-commit.yml diff --git a/.github/workflows/providers-build.yml b/.github/workflows_upstream/providers-build.yml similarity index 100% rename from .github/workflows/providers-build.yml rename to .github/workflows_upstream/providers-build.yml diff --git a/.github/workflows/semantic-pr.yml b/.github/workflows_upstream/semantic-pr.yml similarity index 100% rename from .github/workflows/semantic-pr.yml rename to .github/workflows_upstream/semantic-pr.yml diff --git a/.github/workflows/stale_bot.yml b/.github/workflows_upstream/stale_bot.yml similarity index 100% rename from .github/workflows/stale_bot.yml rename to .github/workflows_upstream/stale_bot.yml diff --git a/.github/workflows/test-external-providers.yml b/.github/workflows_upstream/test-external-providers.yml similarity index 100% rename from .github/workflows/test-external-providers.yml rename to .github/workflows_upstream/test-external-providers.yml diff --git a/.github/workflows/tests.yml b/.github/workflows_upstream/tests.yml similarity index 100% rename from .github/workflows/tests.yml rename to .github/workflows_upstream/tests.yml diff --git a/.github/workflows/unit-tests.yml b/.github/workflows_upstream/unit-tests.yml similarity index 100% rename from .github/workflows/unit-tests.yml rename to .github/workflows_upstream/unit-tests.yml diff --git a/.github/workflows/update-readthedocs.yml b/.github/workflows_upstream/update-readthedocs.yml similarity index 100% rename from .github/workflows/update-readthedocs.yml rename to .github/workflows_upstream/update-readthedocs.yml diff --git a/llama_stack/templates/dependencies.json b/llama_stack/templates/dependencies.json new file mode 100644 index 000000000..47a35edc0 --- /dev/null +++ b/llama_stack/templates/dependencies.json @@ -0,0 +1,904 @@ +{ + "bedrock": [ + "aiosqlite", + "autoevals", + "blobfile", + "boto3", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn" + ], + "cerebras": [ + "aiosqlite", + "autoevals", + "blobfile", + "cerebras_cloud_sdk", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "langdetect", + "matplotlib", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "ci-tests": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "fastapi", + "fire", + "fireworks-ai", + "httpx", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "sqlite-vec", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "dell": [ + "aiohttp", + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "huggingface_hub", + "langdetect", + "matplotlib", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "fireworks": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "fireworks-ai", + "httpx", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "groq": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "langdetect", + "litellm", + "matplotlib", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn" + ], + "hf-endpoint": [ + "aiohttp", + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "huggingface_hub", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn" + ], + "hf-serverless": [ + "aiohttp", + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "huggingface_hub", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "kvant": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "llama_api": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "fastapi", + "fire", + "httpx", + "langdetect", + "litellm", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "sqlite-vec", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "meta-reference-gpu": [ + "accelerate", + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "fairscale", + "faiss-cpu", + "fastapi", + "fbgemm-gpu-genai==1.1.2", + "fire", + "httpx", + "langdetect", + "lm-format-enforcer", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentence-transformers", + "sentencepiece", + "sqlalchemy[asyncio]", + "torch", + "torchao==0.8.0", + "torchvision", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "zmq" + ], + "nvidia": [ + "aiohttp", + "aiosqlite", + "blobfile", + "chardet", + "datasets", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "matplotlib", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "tqdm", + "transformers", + "uvicorn" + ], + "ollama": [ + "aiohttp", + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "ollama", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "peft", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "torch", + "tqdm", + "transformers", + "tree_sitter", + "trl", + "uvicorn" + ], + "open-benchmark": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "fastapi", + "fire", + "httpx", + "langdetect", + "litellm", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "sqlite-vec", + "together", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn" + ], + "passthrough": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "remote-vllm": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "sambanova": [ + "aiosqlite", + "blobfile", + "chardet", + "chromadb-client", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "litellm", + "matplotlib", + "mcp", + "nltk", + "numpy", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "tqdm", + "transformers", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "starter": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "fastapi", + "fire", + "fireworks-ai", + "httpx", + "langdetect", + "litellm", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "sqlite-vec", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "tgi": [ + "aiohttp", + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "huggingface_hub", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "together": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "together", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "verification": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "fastapi", + "fire", + "httpx", + "langdetect", + "litellm", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "sqlite-vec", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "vllm-gpu": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "chromadb-client", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "vllm", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ], + "watsonx": [ + "aiosqlite", + "autoevals", + "blobfile", + "chardet", + "datasets", + "emoji", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "ibm_watson_machine_learning", + "langdetect", + "matplotlib", + "mcp", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pymongo", + "pypdf", + "pythainlp", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "sqlalchemy[asyncio]", + "tqdm", + "transformers", + "tree_sitter", + "uvicorn", + "sentence-transformers --no-deps", + "torch torchvision --index-url https://download.pytorch.org/whl/cpu" + ] +} diff --git a/llama_stack/templates/kvant/__init__.py b/llama_stack/templates/kvant/__init__.py new file mode 100644 index 000000000..61706f7f6 --- /dev/null +++ b/llama_stack/templates/kvant/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .kvant import get_distribution_template # noqa: F401 diff --git a/llama_stack/templates/kvant/build.yaml b/llama_stack/templates/kvant/build.yaml new file mode 100644 index 000000000..25afc1f4d --- /dev/null +++ b/llama_stack/templates/kvant/build.yaml @@ -0,0 +1,35 @@ +version: '2' +distribution_spec: + description: distribution for kvant cloud + providers: + inference: + - remote::vllm + - inline::sentence-transformers + vector_io: + - inline::faiss + - remote::chromadb + - remote::pgvector + safety: + - inline::llama-guard + agents: + - inline::meta-reference + telemetry: + - inline::meta-reference + eval: + - inline::meta-reference + datasetio: + - remote::huggingface + - inline::localfs + scoring: + - inline::basic + - inline::llm-as-judge + - inline::braintrust + tool_runtime: + - remote::brave-search + - remote::tavily-search + - remote::wolfram-alpha + - inline::rag-runtime + - remote::model-context-protocol +image_type: conda +additional_pip_packages: +- sqlalchemy[asyncio] diff --git a/llama_stack/templates/kvant/kvant.py b/llama_stack/templates/kvant/kvant.py new file mode 100644 index 000000000..44cfc7016 --- /dev/null +++ b/llama_stack/templates/kvant/kvant.py @@ -0,0 +1,136 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from pathlib import Path + +from llama_stack.apis.models.models import ModelType +from llama_stack.distribution.datatypes import ( + ModelInput, + Provider, + ShieldInput, + ToolGroupInput, +) +from llama_stack.providers.inline.inference.sentence_transformers import ( + SentenceTransformersInferenceConfig, +) +from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig +from llama_stack.providers.remote.inference.passthrough.config import ( + PassthroughImplConfig, +) +from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings + + +def get_distribution_template() -> DistributionTemplate: + providers = { + "inference": ["remote::openai", "inline::sentence-transformers"], + "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], + "safety": ["inline::llama-guard"], + "agents": ["inline::meta-reference"], + "telemetry": ["inline::meta-reference"], + "eval": ["inline::meta-reference"], + "datasetio": ["remote::huggingface", "inline::localfs"], + "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "remote::wolfram-alpha", + "inline::rag-runtime", + "remote::model-context-protocol", + ], + } + + name = "kvant" + + inference_provider = Provider( + provider_id="openai", + provider_type="remote::openai", + config=PassthroughImplConfig.sample_run_config(), + ) + embedding_provider = Provider( + provider_id="sentence-transformers", + provider_type="inline::sentence-transformers", + config=SentenceTransformersInferenceConfig.sample_run_config(), + ) + vector_io_provider = Provider( + provider_id="faiss", + provider_type="inline::faiss", + config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"), + ) + + default_models = [ + ModelInput( + metadata={}, + model_id="inference-llama4-maverick", + provider_id="openai", + provider_model_id="inference-llama4-maverick", + model_type=ModelType.llm, + ), + ] + + embedding_model = ModelInput( + model_id="all-MiniLM-L6-v2", + provider_id="sentence-transformers", + model_type=ModelType.embedding, + metadata={ + "embedding_dimension": 384, + }, + ) + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ToolGroupInput( + toolgroup_id="builtin::wolfram_alpha", + provider_id="wolfram-alpha", + ), + ToolGroupInput( + toolgroup_id="builtin::rag", + provider_id="rag-runtime", + ), + ] + + return DistributionTemplate( + name=name, + distro_type="self_hosted", + description="Use Passthrough hosted llama-stack endpoint for LLM inference", + container_image=None, + providers=providers, + available_models_by_provider={ + "openai": [ + ProviderModelEntry( + provider_model_id="inference-llama4-maverick", + model_type=ModelType.llm, + ), + ], + }, + run_configs={ + "run.yaml": RunConfigSettings( + provider_overrides={ + "inference": [inference_provider, embedding_provider], + "vector_io": [vector_io_provider], + }, + default_models=default_models + [embedding_model], + default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], + default_tool_groups=default_tool_groups, + ), + }, + run_config_env_vars={ + "LLAMA_STACK_PORT": ( + "8321", + "Port for the Llama Stack distribution server", + ), + "OPENAI_API_KEY": ( + "", + "kvant maas API Key", + ), + "OPENAI_BASE_URL": ( + "https://maas.kvant.cloud", + "kvant maas URL", + ), + }, + ) diff --git a/llama_stack/templates/kvant/run.yaml b/llama_stack/templates/kvant/run.yaml new file mode 100644 index 000000000..12e5c902d --- /dev/null +++ b/llama_stack/templates/kvant/run.yaml @@ -0,0 +1,153 @@ +version: '2' +image_name: kvant +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: kvant + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:https://maas.kvant.cloud/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:400000} + api_token: ${env.VLLM_API_TOKEN:fake} + tls_verify: ${env.VLLM_TLS_VERIFY:true} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: ${env.OTEL_SERVICE_NAME:} + sinks: ${env.TELEMETRY_SINKS:console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: wolfram-alpha + provider_type: remote::wolfram-alpha + config: + api_key: ${env.WOLFRAM_ALPHA_API_KEY:} + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/inference_store.db +models: +- metadata: {} + model_id: Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: kvant + provider_model_id: inference-llama4-maverick + model_type: llm +- metadata: + embedding_dimension: 1024 + context_length: 8192 + model_id: inference-bge-m3 + provider_id: kvant + model_type: embedding +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: +- shield_id: meta-llama/Llama-Guard-3-8B +vector_dbs: [] +# - vector_db_id: test-bge +# embedding_model: inference-bge-m3 +# embedding_dimension: 1024 +# provider_id: faiss +# - vector_db_id: test-MiniLM-L6-v2 +# embedding_model: all-MiniLM-L6-v2 +# embedding_dimension: 384 +# provider_id: faiss +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::wolfram_alpha + provider_id: wolfram-alpha +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321