fix: authorized sql store with postgres (#2641)

# What does this PR do? postgres has different json extract syntax from sqlite ## Test Plan added integration test
2025-07-14 17:16:09 +00:00 · 2025-07-07 19:36:34 -07:00 · 2025-07-07 19:36:34 -07:00 · e9926564bd
commit e9926564bd
parent 5bb3817c49
7 changed files with 337 additions and 27 deletions
--- a/.github/workflows/integration-sql-store-tests.yml
+++ b/.github/workflows/integration-sql-store-tests.yml
@ -0,0 +1,70 @@
 name: SqlStore Integration Tests
 on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
    paths:
      - 'llama_stack/providers/utils/sqlstore/**'
      - 'tests/integration/sqlstore/**'
      - 'uv.lock'
      - 'pyproject.toml'
      - 'requirements.txt'
      - '.github/workflows/integration-sql-store-tests.yml' # This workflow
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  test-postgres:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ["3.12", "3.13"]
      fail-fast: false
    services:
      postgres:
        image: postgres:15
        env:
          POSTGRES_USER: llamastack
          POSTGRES_PASSWORD: llamastack
          POSTGRES_DB: llamastack
        ports:
          - 5432:5432
        options: >-
          --health-cmd pg_isready
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5
    steps:
      - name: Checkout repository
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
      - name: Install dependencies
        uses: ./.github/actions/setup-runner
        with:
          python-version: ${{ matrix.python-version }}
      - name: Run SqlStore Integration Tests
        env:
          ENABLE_POSTGRES_TESTS: "true"
          POSTGRES_HOST: localhost
          POSTGRES_PORT: 5432
          POSTGRES_DB: llamastack
          POSTGRES_USER: llamastack
          POSTGRES_PASSWORD: llamastack
        run: |
          uv run pytest -sv tests/integration/providers/utils/sqlstore/
      - name: Upload test logs
        if: ${{ always() }}
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
        with:
          name: postgres-test-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.python-version }}
          path: |
            *.log
          retention-days: 1
--- a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
+++ b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
@ -15,6 +15,7 @@ from llama_stack.distribution.request_headers import get_authenticated_user
 from llama_stack.log import get_logger
 from .api import ColumnDefinition, ColumnType, PaginatedResponse, SqlStore
 from .sqlstore import SqlStoreType
 logger = get_logger(name=__name__, category="authorized_sqlstore")
@ -71,9 +72,18 @@ class AuthorizedSqlStore:
        :param sql_store: Base SqlStore implementation to wrap
        """
        self.sql_store = sql_store
-
+        self._detect_database_type()
        self._validate_sql_optimized_policy()
    def _detect_database_type(self) -> None:
        """Detect the database type from the underlying SQL store."""
        if not hasattr(self.sql_store, "config"):
            raise ValueError("SqlStore must have a config attribute to be used with AuthorizedSqlStore")
        self.database_type = self.sql_store.config.type
        if self.database_type not in [SqlStoreType.postgres, SqlStoreType.sqlite]:
            raise ValueError(f"Unsupported database type: {self.database_type}")
    def _validate_sql_optimized_policy(self) -> None:
        """Validate that SQL_OPTIMIZED_POLICY matches the actual default_policy().
@ -181,6 +191,50 @@ class AuthorizedSqlStore:
        else:
            return self._build_conservative_where_clause()
    def _json_extract(self, column: str, path: str) -> str:
        """Extract JSON value (keeping JSON type).
        Args:
            column: The JSON column name
            path: The JSON path (e.g., 'roles', 'teams')
        Returns:
            SQL expression to extract JSON value
        """
        if self.database_type == SqlStoreType.postgres:
            return f"{column}->'{path}'"
        elif self.database_type == SqlStoreType.sqlite:
            return f"JSON_EXTRACT({column}, '$.{path}')"
        else:
            raise ValueError(f"Unsupported database type: {self.database_type}")
    def _json_extract_text(self, column: str, path: str) -> str:
        """Extract JSON value as text.
        Args:
            column: The JSON column name
            path: The JSON path (e.g., 'roles', 'teams')
        Returns:
            SQL expression to extract JSON value as text
        """
        if self.database_type == SqlStoreType.postgres:
            return f"{column}->>'{path}'"
        elif self.database_type == SqlStoreType.sqlite:
            return f"JSON_EXTRACT({column}, '$.{path}')"
        else:
            raise ValueError(f"Unsupported database type: {self.database_type}")
    def _get_public_access_conditions(self) -> list[str]:
        """Get the SQL conditions for public access."""
        if self.database_type == SqlStoreType.postgres:
            # Postgres stores JSON null as 'null'
            return ["access_attributes::text = 'null'"]
        elif self.database_type == SqlStoreType.sqlite:
            return ["access_attributes = 'null'"]
        else:
            raise ValueError(f"Unsupported database type: {self.database_type}")
    def _build_default_policy_where_clause(self) -> str:
        """Build SQL WHERE clause for the default policy.
@ -189,29 +243,32 @@ class AuthorizedSqlStore:
        """
        current_user = get_authenticated_user()
        base_conditions = self._get_public_access_conditions()
        if not current_user or not current_user.attributes:
-            return "(access_attributes IS NULL OR access_attributes = 'null' OR access_attributes = '{}')"
+            # Only allow public records
            return f"({' OR '.join(base_conditions)})"
        else:
            base_conditions = ["access_attributes IS NULL", "access_attributes = 'null'", "access_attributes = '{}'"]
            user_attr_conditions = []
            for attr_key, user_values in current_user.attributes.items():
                if user_values:
                    value_conditions = []
                    for value in user_values:
-                        value_conditions.append(f"JSON_EXTRACT(access_attributes, '$.{attr_key}') LIKE '%\"{value}\"%'")
+                        # Check if JSON array contains the value
                        escaped_value = value.replace("'", "''")
                        json_text = self._json_extract_text("access_attributes", attr_key)
                        value_conditions.append(f"({json_text} LIKE '%\"{escaped_value}\"%')")
                    if value_conditions:
-                        category_missing = f"JSON_EXTRACT(access_attributes, '$.{attr_key}') IS NULL"
+                        # Check if the category is missing (NULL)
                        category_missing = f"{self._json_extract('access_attributes', attr_key)} IS NULL"
                        user_matches_category = f"({' OR '.join(value_conditions)})"
                        user_attr_conditions.append(f"({category_missing} OR {user_matches_category})")
            if user_attr_conditions:
                all_requirements_met = f"({' AND '.join(user_attr_conditions)})"
                base_conditions.append(all_requirements_met)
-                return f"({' OR '.join(base_conditions)})"
+
            else:
            return f"({' OR '.join(base_conditions)})"
    def _build_conservative_where_clause(self) -> str:
@ -222,5 +279,8 @@ class AuthorizedSqlStore:
        current_user = get_authenticated_user()
        if not current_user:
-            return "(access_attributes IS NULL OR access_attributes = 'null' OR access_attributes = '{}')"
+            # Only allow public records
            base_conditions = self._get_public_access_conditions()
            return f"({' OR '.join(base_conditions)})"
        return "1=1"
--- a/llama_stack/providers/utils/sqlstore/sqlstore.py
+++ b/llama_stack/providers/utils/sqlstore/sqlstore.py
@ -4,9 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from abc import abstractmethod
-from enum import Enum
+from enum import StrEnum
 from pathlib import Path
 from typing import Annotated, Literal
@ -19,7 +18,7 @@ from .api import SqlStore
 sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"]
-class SqlStoreType(Enum):
+class SqlStoreType(StrEnum):
    sqlite = "sqlite"
    postgres = "postgres"
@ -36,7 +35,7 @@ class SqlAlchemySqlStoreConfig(BaseModel):
 class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
-    type: Literal["sqlite"] = SqlStoreType.sqlite.value
+    type: Literal[SqlStoreType.sqlite] = SqlStoreType.sqlite
    db_path: str = Field(
        default=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(),
        description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db",
@ -59,7 +58,7 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
 class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
-    type: Literal["postgres"] = SqlStoreType.postgres.value
+    type: Literal[SqlStoreType.postgres] = SqlStoreType.postgres
    host: str = "localhost"
    port: int = 5432
    db: str = "llamastack"
@ -107,7 +106,7 @@ def get_pip_packages(store_config: dict | SqlStoreConfig) -> list[str]:
 def sqlstore_impl(config: SqlStoreConfig) -> SqlStore:
-    if config.type in [SqlStoreType.sqlite.value, SqlStoreType.postgres.value]:
+    if config.type in [SqlStoreType.sqlite, SqlStoreType.postgres]:
        from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
        impl = SqlAlchemySqlStoreImpl(config)
--- a/tests/integration/providers/utils/init.py
+++ b/tests/integration/providers/utils/init.py
@ -0,0 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
--- a/tests/integration/providers/utils/sqlstore/init.py
+++ b/tests/integration/providers/utils/sqlstore/init.py
@ -0,0 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
--- a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py
+++ b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py
@ -0,0 +1,173 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import os
 import tempfile
 from unittest.mock import patch
 import pytest
 from llama_stack.distribution.access_control.access_control import default_policy
 from llama_stack.distribution.datatypes import User
 from llama_stack.providers.utils.sqlstore.api import ColumnType
 from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
 from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
 from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig, SqliteSqlStoreConfig
 def get_postgres_config():
    """Get PostgreSQL configuration if tests are enabled."""
    return PostgresSqlStoreConfig(
        host=os.environ.get("POSTGRES_HOST", "localhost"),
        port=int(os.environ.get("POSTGRES_PORT", "5432")),
        db=os.environ.get("POSTGRES_DB", "llamastack"),
        user=os.environ.get("POSTGRES_USER", "llamastack"),
        password=os.environ.get("POSTGRES_PASSWORD", "llamastack"),
    )
 def get_sqlite_config():
    """Get SQLite configuration with temporary database."""
    tmp_file = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
    tmp_file.close()
    return SqliteSqlStoreConfig(db_path=tmp_file.name), tmp_file.name
@pytest.mark.asyncio
@pytest.mark.parametrize(
    "backend_config",
    [
        pytest.param(
            ("postgres", get_postgres_config),
            marks=pytest.mark.skipif(
                not os.environ.get("ENABLE_POSTGRES_TESTS"),
                reason="PostgreSQL tests require ENABLE_POSTGRES_TESTS environment variable",
            ),
            id="postgres",
        ),
        pytest.param(("sqlite", get_sqlite_config), id="sqlite"),
    ],
 )
@patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
 async def test_json_comparison(mock_get_authenticated_user, backend_config):
    """Test that JSON column comparisons work correctly for both PostgreSQL and SQLite"""
    backend_name, config_func = backend_config
    # Handle different config types
    if backend_name == "postgres":
        config = config_func()
        cleanup_path = None
    else:  # sqlite
        config, cleanup_path = config_func()
    try:
        base_sqlstore = SqlAlchemySqlStoreImpl(config)
        authorized_store = AuthorizedSqlStore(base_sqlstore)
        # Create test table
        table_name = f"test_json_comparison_{backend_name}"
        await authorized_store.create_table(
            table=table_name,
            schema={
                "id": ColumnType.STRING,
                "data": ColumnType.STRING,
            },
        )
        try:
            # Test with no authenticated user (should handle JSON null comparison)
            mock_get_authenticated_user.return_value = None
            # Insert some test data
            await authorized_store.insert(table_name, {"id": "1", "data": "public_data"})
            # Test fetching with no user - should not error on JSON comparison
            result = await authorized_store.fetch_all(table_name, policy=default_policy())
            assert len(result.data) == 1
            assert result.data[0]["id"] == "1"
            assert result.data[0]["access_attributes"] is None
            # Test with authenticated user
            test_user = User("test-user", {"roles": ["admin"]})
            mock_get_authenticated_user.return_value = test_user
            # Insert data with user attributes
            await authorized_store.insert(table_name, {"id": "2", "data": "admin_data"})
            # Fetch all - admin should see both
            result = await authorized_store.fetch_all(table_name, policy=default_policy())
            assert len(result.data) == 2
            # Test with non-admin user
            regular_user = User("regular-user", {"roles": ["user"]})
            mock_get_authenticated_user.return_value = regular_user
            # Should only see public record
            result = await authorized_store.fetch_all(table_name, policy=default_policy())
            assert len(result.data) == 1
            assert result.data[0]["id"] == "1"
            # Test the category missing branch: user with multiple attributes
            multi_user = User("multi-user", {"roles": ["admin"], "teams": ["dev"]})
            mock_get_authenticated_user.return_value = multi_user
            # Insert record with multi-user (has both roles and teams)
            await authorized_store.insert(table_name, {"id": "3", "data": "multi_user_data"})
            # Test different user types to create records with different attribute patterns
            # Record with only roles (teams category will be missing)
            roles_only_user = User("roles-user", {"roles": ["admin"]})
            mock_get_authenticated_user.return_value = roles_only_user
            await authorized_store.insert(table_name, {"id": "4", "data": "roles_only_data"})
            # Record with only teams (roles category will be missing)
            teams_only_user = User("teams-user", {"teams": ["dev"]})
            mock_get_authenticated_user.return_value = teams_only_user
            await authorized_store.insert(table_name, {"id": "5", "data": "teams_only_data"})
            # Record with different roles/teams (shouldn't match our test user)
            different_user = User("different-user", {"roles": ["user"], "teams": ["qa"]})
            mock_get_authenticated_user.return_value = different_user
            await authorized_store.insert(table_name, {"id": "6", "data": "different_user_data"})
            # Now test with the multi-user who has both roles=admin and teams=dev
            mock_get_authenticated_user.return_value = multi_user
            result = await authorized_store.fetch_all(table_name, policy=default_policy())
            # Should see:
            # - public record (1) - no access_attributes
            # - admin record (2) - user matches roles=admin, teams missing (allowed)
            # - multi_user record (3) - user matches both roles=admin and teams=dev
            # - roles_only record (4) - user matches roles=admin, teams missing (allowed)
            # - teams_only record (5) - user matches teams=dev, roles missing (allowed)
            # Should NOT see:
            # - different_user record (6) - user doesn't match roles=user or teams=qa
            expected_ids = {"1", "2", "3", "4", "5"}
            actual_ids = {record["id"] for record in result.data}
            assert actual_ids == expected_ids, f"Expected to see records {expected_ids} but got {actual_ids}"
            # Verify the category missing logic specifically
            # Records 4 and 5 test the "category missing" branch where one attribute category is missing
            category_test_ids = {record["id"] for record in result.data if record["id"] in ["4", "5"]}
            assert category_test_ids == {"4", "5"}, (
                f"Category missing logic failed: expected 4,5 but got {category_test_ids}"
            )
        finally:
            # Clean up records
            for record_id in ["1", "2", "3", "4", "5", "6"]:
                try:
                    await base_sqlstore.delete(table_name, {"id": record_id})
                except Exception:
                    pass
    finally:
        # Clean up temporary SQLite database file if needed
        if cleanup_path:
            try:
                os.unlink(cleanup_path)
            except OSError:
                pass
--- a/tests/unit/utils/test_authorized_sqlstore.py
+++ b/tests/unit/utils/test_authorized_sqlstore.py
@ -104,19 +104,17 @@ async def test_sql_policy_consistency(mock_get_authenticated_user):
        # Test scenarios with different access control patterns
        test_scenarios = [
-            # Scenario 1: Public record (no access control)
+            # Scenario 1: Public record (no access control - represents None user insert)
            {"id": "1", "name": "public", "access_attributes": None},
-            # Scenario 2: Empty access control (should be treated as public)
+            # Scenario 2: Record with roles requirement
-            {"id": "2", "name": "empty", "access_attributes": {}},
+            {"id": "2", "name": "admin-only", "access_attributes": {"roles": ["admin"]}},
-            # Scenario 3: Record with roles requirement
+            # Scenario 3: Record with multiple attribute categories
-            {"id": "3", "name": "admin-only", "access_attributes": {"roles": ["admin"]}},
+            {"id": "3", "name": "admin-ml-team", "access_attributes": {"roles": ["admin"], "teams": ["ml-team"]}},
-            # Scenario 4: Record with multiple attribute categories
+            # Scenario 4: Record with teams only (missing roles category)
-            {"id": "4", "name": "admin-ml-team", "access_attributes": {"roles": ["admin"], "teams": ["ml-team"]}},
+            {"id": "4", "name": "ml-team-only", "access_attributes": {"teams": ["ml-team"]}},
-            # Scenario 5: Record with teams only (missing roles category)
+            # Scenario 5: Record with roles and projects
            {"id": "5", "name": "ml-team-only", "access_attributes": {"teams": ["ml-team"]}},
            # Scenario 6: Record with roles and projects
            {
-                "id": "6",
+                "id": "5",
                "name": "admin-project-x",
                "access_attributes": {"roles": ["admin"], "projects": ["project-x"]},
            },