Merge branch 'main' into auto_instrument_1

2025-12-03 09:53:45 +00:00 · 2025-11-20 16:12:57 -05:00 · 2025-11-20 16:12:57 -05:00 · d8b883be41
commit d8b883be41
parent 0d11cb1cb8 acf74cb8df
4 changed files with 150 additions and 15 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@ -2,4 +2,4 @@
 # These owners will be the default owners for everything in
 # the repo. Unless a later match takes precedence,
-* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo
+* @ashwinb @raghotham @ehhuang @leseb @bbrowning @mattf @franciscojavierarceo
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@ -20,6 +20,7 @@ TEST_PATTERN=""
 INFERENCE_MODE="replay"
 EXTRA_PARAMS=""
 COLLECT_ONLY=false
 TYPESCRIPT_ONLY=false
 # Function to display usage
 usage() {
@ -34,6 +35,7 @@ Options:
    --subdirs STRING         Comma-separated list of test subdirectories to run (overrides suite)
    --pattern STRING         Regex pattern to pass to pytest -k
    --collect-only           Collect tests only without running them (skips server startup)
    --typescript-only        Skip Python tests and run only TypeScript client tests
    --help                   Show this help message
 Suites are defined in tests/integration/suites.py and define which tests to run.
@ -90,6 +92,10 @@ while [[ $# -gt 0 ]]; do
        COLLECT_ONLY=true
        shift
        ;;
    --typescript-only)
        TYPESCRIPT_ONLY=true
        shift
        ;;
    --help)
        usage
        exit 0
@ -552,6 +558,8 @@ if [[ -n "$STACK_CONFIG" ]]; then
    STACK_CONFIG_ARG="--stack-config=$STACK_CONFIG"
 fi
 # Run Python tests unless typescript-only mode
 if [[ "$TYPESCRIPT_ONLY" == "false" ]]; then
    pytest -s -v $PYTEST_TARGET \
        $STACK_CONFIG_ARG \
        --inference-mode="$INFERENCE_MODE" \
@ -562,6 +570,11 @@ pytest -s -v $PYTEST_TARGET \
        --color=yes $EXTRA_PARAMS \
        --capture=tee-sys
    exit_code=$?
 else
    echo "Skipping Python tests (--typescript-only mode)"
    exit_code=0
 fi
 set +x
 set -e
--- a/src/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/src/llama_stack/providers/utils/inference/openai_mixin.py
@ -213,6 +213,19 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
        return api_key
    def _validate_model_allowed(self, provider_model_id: str) -> None:
        """
        Validate that the model is in the allowed_models list if configured.
        :param provider_model_id: The provider-specific model ID to validate
        :raises ValueError: If the model is not in the allowed_models list
        """
        if self.config.allowed_models is not None and provider_model_id not in self.config.allowed_models:
            raise ValueError(
                f"Model '{provider_model_id}' is not in the allowed models list. "
                f"Allowed models: {self.config.allowed_models}"
            )
    async def _get_provider_model_id(self, model: str) -> str:
        """
        Get the provider-specific model ID from the model store.
@ -259,8 +272,11 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
        Direct OpenAI completion API call.
        """
        # TODO: fix openai_completion to return type compatible with OpenAI's API response
        provider_model_id = await self._get_provider_model_id(params.model)
        self._validate_model_allowed(provider_model_id)
        completion_kwargs = await prepare_openai_completion_params(
-            model=await self._get_provider_model_id(params.model),
+            model=provider_model_id,
            prompt=params.prompt,
            best_of=params.best_of,
            echo=params.echo,
@ -292,6 +308,9 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
        """
        Direct OpenAI chat completion API call.
        """
        provider_model_id = await self._get_provider_model_id(params.model)
        self._validate_model_allowed(provider_model_id)
        messages = params.messages
        if self.download_images:
@ -313,7 +332,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
            messages = [await _localize_image_url(m) for m in messages]
        request_params = await prepare_openai_completion_params(
-            model=await self._get_provider_model_id(params.model),
+            model=provider_model_id,
            messages=messages,
            frequency_penalty=params.frequency_penalty,
            function_call=params.function_call,
@ -351,10 +370,13 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
        """
        Direct OpenAI embeddings API call.
        """
        provider_model_id = await self._get_provider_model_id(params.model)
        self._validate_model_allowed(provider_model_id)
        # Build request params conditionally to avoid NotGiven/Omit type mismatch
        # The OpenAI SDK uses Omit in signatures but NOT_GIVEN has type NotGiven
        request_params: dict[str, Any] = {
-            "model": await self._get_provider_model_id(params.model),
+            "model": provider_model_id,
            "input": params.input,
        }
        if params.encoding_format is not None:
--- a/tests/unit/providers/utils/inference/test_openai_mixin.py
+++ b/tests/unit/providers/utils/inference/test_openai_mixin.py
@ -15,7 +15,14 @@ from pydantic import BaseModel, Field
 from llama_stack.core.request_headers import request_provider_data_context
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
-from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
+from llama_stack_api import (
    Model,
    ModelType,
    OpenAIChatCompletionRequestWithExtraBody,
    OpenAICompletionRequestWithExtraBody,
    OpenAIEmbeddingsRequestWithExtraBody,
    OpenAIUserMessageParam,
 )
 class OpenAIMixinImpl(OpenAIMixin):
@ -834,3 +841,96 @@ class TestOpenAIMixinProviderDataApiKey:
        error_message = str(exc_info.value)
        assert "test_api_key" in error_message
        assert "x-llamastack-provider-data" in error_message
 class TestOpenAIMixinAllowedModelsInference:
    """Test cases for allowed_models enforcement during inference requests"""
    async def test_inference_with_allowed_models(self, mixin, mock_client_context):
        """Test that all inference methods succeed with allowed models"""
        mixin.config.allowed_models = ["gpt-4", "text-davinci-003", "text-embedding-ada-002"]
        mock_client = MagicMock()
        mock_client.chat.completions.create = AsyncMock(return_value=MagicMock())
        mock_client.completions.create = AsyncMock(return_value=MagicMock())
        mock_embedding_response = MagicMock()
        mock_embedding_response.data = [MagicMock(embedding=[0.1, 0.2, 0.3])]
        mock_embedding_response.usage = MagicMock(prompt_tokens=5, total_tokens=5)
        mock_client.embeddings.create = AsyncMock(return_value=mock_embedding_response)
        with mock_client_context(mixin, mock_client):
            # Test chat completion
            await mixin.openai_chat_completion(
                OpenAIChatCompletionRequestWithExtraBody(
                    model="gpt-4", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
                )
            )
            mock_client.chat.completions.create.assert_called_once()
            # Test completion
            await mixin.openai_completion(
                OpenAICompletionRequestWithExtraBody(model="text-davinci-003", prompt="Hello")
            )
            mock_client.completions.create.assert_called_once()
            # Test embeddings
            await mixin.openai_embeddings(
                OpenAIEmbeddingsRequestWithExtraBody(model="text-embedding-ada-002", input="test text")
            )
            mock_client.embeddings.create.assert_called_once()
    async def test_inference_with_disallowed_models(self, mixin, mock_client_context):
        """Test that all inference methods fail with disallowed models"""
        mixin.config.allowed_models = ["gpt-4"]
        mock_client = MagicMock()
        with mock_client_context(mixin, mock_client):
            # Test chat completion with disallowed model
            with pytest.raises(ValueError, match="Model 'gpt-4-turbo' is not in the allowed models list"):
                await mixin.openai_chat_completion(
                    OpenAIChatCompletionRequestWithExtraBody(
                        model="gpt-4-turbo", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
                    )
                )
            # Test completion with disallowed model
            with pytest.raises(ValueError, match="Model 'text-davinci-002' is not in the allowed models list"):
                await mixin.openai_completion(
                    OpenAICompletionRequestWithExtraBody(model="text-davinci-002", prompt="Hello")
                )
            # Test embeddings with disallowed model
            with pytest.raises(ValueError, match="Model 'text-embedding-3-large' is not in the allowed models list"):
                await mixin.openai_embeddings(
                    OpenAIEmbeddingsRequestWithExtraBody(model="text-embedding-3-large", input="test text")
                )
            mock_client.chat.completions.create.assert_not_called()
            mock_client.completions.create.assert_not_called()
            mock_client.embeddings.create.assert_not_called()
    async def test_inference_with_no_restrictions(self, mixin, mock_client_context):
        """Test that inference succeeds when allowed_models is None or empty list blocks all"""
        # Test with None (no restrictions)
        assert mixin.config.allowed_models is None
        mock_client = MagicMock()
        mock_client.chat.completions.create = AsyncMock(return_value=MagicMock())
        with mock_client_context(mixin, mock_client):
            await mixin.openai_chat_completion(
                OpenAIChatCompletionRequestWithExtraBody(
                    model="any-model", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
                )
            )
            mock_client.chat.completions.create.assert_called_once()
        # Test with empty list (blocks all models)
        mixin.config.allowed_models = []
        with mock_client_context(mixin, mock_client):
            with pytest.raises(ValueError, match="Model 'gpt-4' is not in the allowed models list"):
                await mixin.openai_chat_completion(
                    OpenAIChatCompletionRequestWithExtraBody(
                        model="gpt-4", messages=[OpenAIUserMessageParam(role="user", content="Hello")]
                    )
                )