add kvant

2025-05-26 05:53:04 +02:00 · 2025-05-26 05:53:04 +02:00 · 1c1a54d5d4
commit 1c1a54d5d4
parent 9623d5d230
20 changed files with 449 additions and 0 deletions
--- a/.github/workflows/Dockerfile
+++ b/.github/workflows/Dockerfile
@ -0,0 +1 @@
+FROM distribution-kvant:dev
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@ -0,0 +1,78 @@
+name: Build and Push container
+run-name: Build and Push container
+on:
+  workflow_dispatch:
+  #schedule:
+  #  - cron: "0 10 * * *"
+  push:
+    branches:
+      - 'main'
+    tags:
+      - 'v*'
+  pull_request:
+    branches:
+      - 'main'
+env:
+  IMAGE: git.kvant.cloud/${{github.repository}}
+jobs:
+  build_concierge_backend:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set current time
+        uses: https://github.com/gerred/actions/current-time@master
+        id: current_time
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to git.kvant.cloud registry
+        uses: docker/login-action@v3
+        with:
+          registry: git.kvant.cloud
+          username: ${{ vars.ORG_PACKAGE_WRITER_USERNAME }}
+          password: ${{ secrets.ORG_PACKAGE_WRITER_TOKEN }}
+      
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          # list of Docker images to use as base name for tags
+          images: |
+            ${{env.IMAGE}}
+          # generate Docker tags based on the following events/attributes
+          tags: |
+            type=schedule
+            type=ref,event=branch
+            type=ref,event=pr
+            type=semver,pattern={{version}}
+
+      - name: Install uv
+        run: pipx install uv
+            
+      - name: Build
+        env:
+          USE_COPY_NOT_MOUNT: true
+          LLAMA_STACK_DIR: .
+        run: uvx --from . llama stack build --template kvant --image-type container
+
+      - name: Build and push to gitea registry
+        uses: docker/build-push-action@v6
+        with:
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          context: .github/workflows
+          provenance: mode=max
+          sbom: true
+          build-args: |
+            BUILD_DATE=${{ steps.current_time.outputs.time }}
+          cache-from: |
+            type=registry,ref=${{ env.IMAGE }}:buildcache
+            type=registry,ref=${{ env.IMAGE }}:${{ github.ref_name }}
+            type=registry,ref=${{ env.IMAGE }}:main
+          cache-to: type=registry,ref=${{ env.IMAGE }}:buildcache,mode=max,image-manifest=true
--- a/.github/workflows_upstream/changelog.yml
+++ b/.github/workflows_upstream/changelog.yml
--- a/.github/workflows_upstream/gha_workflow_llama_stack_tests.yml
+++ b/.github/workflows_upstream/gha_workflow_llama_stack_tests.yml
--- a/.github/workflows_upstream/install-script-ci.yml
+++ b/.github/workflows_upstream/install-script-ci.yml
--- a/.github/workflows_upstream/integration-auth-tests.yml
+++ b/.github/workflows_upstream/integration-auth-tests.yml
--- a/.github/workflows_upstream/integration-tests.yml
+++ b/.github/workflows_upstream/integration-tests.yml
--- a/.github/workflows_upstream/pre-commit.yml
+++ b/.github/workflows_upstream/pre-commit.yml
--- a/.github/workflows_upstream/providers-build.yml
+++ b/.github/workflows_upstream/providers-build.yml
--- a/.github/workflows_upstream/semantic-pr.yml
+++ b/.github/workflows_upstream/semantic-pr.yml
--- a/.github/workflows_upstream/stale_bot.yml
+++ b/.github/workflows_upstream/stale_bot.yml
--- a/.github/workflows_upstream/test-external-providers.yml
+++ b/.github/workflows_upstream/test-external-providers.yml
--- a/.github/workflows_upstream/tests.yml
+++ b/.github/workflows_upstream/tests.yml
--- a/.github/workflows_upstream/unit-tests.yml
+++ b/.github/workflows_upstream/unit-tests.yml
--- a/.github/workflows_upstream/update-readthedocs.yml
+++ b/.github/workflows_upstream/update-readthedocs.yml
--- a/llama_stack/templates/dependencies.json
+++ b/llama_stack/templates/dependencies.json
@ -312,6 +312,45 @@
    "sentence-transformers --no-deps",
    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
  ],
+  "kvant": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
  "llama_api": [
    "aiosqlite",
    "autoevals",
--- a/llama_stack/templates/kvant/init.py
+++ b/llama_stack/templates/kvant/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .kvant import get_distribution_template  # noqa: F401
--- a/llama_stack/templates/kvant/build.yaml
+++ b/llama_stack/templates/kvant/build.yaml
@ -0,0 +1,35 @@
+version: '2'
+distribution_spec:
+  description: distribution for kvant cloud
+  providers:
+    inference:
+    - remote::vllm
+    - inline::sentence-transformers
+    vector_io:
+    - inline::faiss
+    - remote::chromadb
+    - remote::pgvector
+    safety:
+    - inline::llama-guard
+    agents:
+    - inline::meta-reference
+    telemetry:
+    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
+    tool_runtime:
+    - remote::brave-search
+    - remote::tavily-search
+    - remote::wolfram-alpha
+    - inline::rag-runtime
+    - remote::model-context-protocol
+image_type: conda
+additional_pip_packages:
+- sqlalchemy[asyncio]
--- a/llama_stack/templates/kvant/kvant.py
+++ b/llama_stack/templates/kvant/kvant.py
@ -0,0 +1,136 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pathlib import Path
+
+from llama_stack.apis.models.models import ModelType
+from llama_stack.distribution.datatypes import (
+    ModelInput,
+    Provider,
+    ShieldInput,
+    ToolGroupInput,
+)
+from llama_stack.providers.inline.inference.sentence_transformers import (
+    SentenceTransformersInferenceConfig,
+)
+from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
+from llama_stack.providers.remote.inference.passthrough.config import (
+    PassthroughImplConfig,
+)
+from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+
+
+def get_distribution_template() -> DistributionTemplate:
+    providers = {
+        "inference": ["remote::openai", "inline::sentence-transformers"],
+        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
+        "safety": ["inline::llama-guard"],
+        "agents": ["inline::meta-reference"],
+        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
+        "tool_runtime": [
+            "remote::brave-search",
+            "remote::tavily-search",
+            "remote::wolfram-alpha",
+            "inline::rag-runtime",
+            "remote::model-context-protocol",
+        ],
+    }
+
+    name = "kvant"
+
+    inference_provider = Provider(
+        provider_id="openai",
+        provider_type="remote::openai",
+        config=PassthroughImplConfig.sample_run_config(),
+    )
+    embedding_provider = Provider(
+        provider_id="sentence-transformers",
+        provider_type="inline::sentence-transformers",
+        config=SentenceTransformersInferenceConfig.sample_run_config(),
+    )
+    vector_io_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+    )
+
+    default_models = [
+        ModelInput(
+            metadata={},
+            model_id="inference-llama4-maverick",
+            provider_id="openai",
+            provider_model_id="inference-llama4-maverick",
+            model_type=ModelType.llm,
+        ),
+    ]
+
+    embedding_model = ModelInput(
+        model_id="all-MiniLM-L6-v2",
+        provider_id="sentence-transformers",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 384,
+        },
+    )
+    default_tool_groups = [
+        ToolGroupInput(
+            toolgroup_id="builtin::websearch",
+            provider_id="tavily-search",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::wolfram_alpha",
+            provider_id="wolfram-alpha",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::rag",
+            provider_id="rag-runtime",
+        ),
+    ]
+
+    return DistributionTemplate(
+        name=name,
+        distro_type="self_hosted",
+        description="Use Passthrough hosted llama-stack endpoint for LLM inference",
+        container_image=None,
+        providers=providers,
+        available_models_by_provider={
+            "openai": [
+                ProviderModelEntry(
+                    provider_model_id="inference-llama4-maverick",
+                    model_type=ModelType.llm,
+                ),
+            ],
+        },
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [inference_provider, embedding_provider],
+                    "vector_io": [vector_io_provider],
+                },
+                default_models=default_models + [embedding_model],
+                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
+                default_tool_groups=default_tool_groups,
+            ),
+        },
+        run_config_env_vars={
+            "LLAMA_STACK_PORT": (
+                "8321",
+                "Port for the Llama Stack distribution server",
+            ),
+            "OPENAI_API_KEY": (
+                "",
+                "kvant maas API Key",
+            ),
+            "OPENAI_BASE_URL": (
+                "https://maas.kvant.cloud",
+                "kvant maas URL",
+            ),
+        },
+    )
--- a/llama_stack/templates/kvant/run.yaml
+++ b/llama_stack/templates/kvant/run.yaml
@ -0,0 +1,153 @@
+version: '2'
+image_name: kvant
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: kvant
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:https://maas.kvant.cloud/v1}
+      max_tokens: ${env.VLLM_MAX_TOKENS:400000}
+      api_token: ${env.VLLM_API_TOKEN:fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/trace_store.db
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/meta_reference_eval.db
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/huggingface_datasetio.db
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/localfs_datasetio.db
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: wolfram-alpha
+    provider_type: remote::wolfram-alpha
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/registry.db
+inference_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/kvant}/inference_store.db
+models:
+- metadata: {}
+  model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
+  provider_id: kvant
+  provider_model_id: inference-llama4-maverick
+  model_type: llm
+- metadata:
+    embedding_dimension: 1024
+    context_length: 8192
+  model_id: inference-bge-m3
+  provider_id: kvant
+  model_type: embedding
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  model_type: embedding
+shields:
+- shield_id: meta-llama/Llama-Guard-3-8B
+vector_dbs: []
+# - vector_db_id: test-bge
+#   embedding_model: inference-bge-m3
+#   embedding_dimension: 1024
+#   provider_id: faiss
+# - vector_db_id: test-MiniLM-L6-v2
+#   embedding_model: all-MiniLM-L6-v2
+#   embedding_dimension: 384
+#   provider_id: faiss
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::wolfram_alpha
+  provider_id: wolfram-alpha
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+server:
+  port: 8321