Resolved merge conflicts

2026-01-02 01:44:31 +00:00 · 2025-03-13 01:58:09 -07:00 · 2025-03-13 01:58:09 -07:00 · 967dd0aa08
commit 967dd0aa08
parent 3298e50105
82 changed files with 66055 additions and 0 deletions
--- a/tests/unit/cli/test_stack_config.py
+++ b/tests/unit/cli/test_stack_config.py
@ -0,0 +1,127 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from datetime import datetime
+
+import pytest
+import yaml
+
+from llama_stack.distribution.configure import (
+    LLAMA_STACK_RUN_CONFIG_VERSION,
+    parse_and_maybe_upgrade_config,
+)
+
+
+@pytest.fixture
+def up_to_date_config():
+    return yaml.safe_load(
+        """
+        version: {version}
+        image_name: foo
+        apis_to_serve: []
+        built_at: {built_at}
+        providers:
+          inference:
+            - provider_id: provider1
+              provider_type: inline::meta-reference
+              config: {{}}
+          safety:
+            - provider_id: provider1
+              provider_type: inline::meta-reference
+              config:
+                llama_guard_shield:
+                  model: Llama-Guard-3-1B
+                  excluded_categories: []
+                  disable_input_check: false
+                  disable_output_check: false
+                enable_prompt_guard: false
+          memory:
+            - provider_id: provider1
+              provider_type: inline::meta-reference
+              config: {{}}
+    """.format(version=LLAMA_STACK_RUN_CONFIG_VERSION, built_at=datetime.now().isoformat())
+    )
+
+
+@pytest.fixture
+def old_config():
+    return yaml.safe_load(
+        """
+        image_name: foo
+        built_at: {built_at}
+        apis_to_serve: []
+        routing_table:
+          inference:
+            - provider_type: remote::ollama
+              config:
+                host: localhost
+                port: 11434
+              routing_key: Llama3.2-1B-Instruct
+            - provider_type: inline::meta-reference
+              config:
+                model: Llama3.1-8B-Instruct
+              routing_key: Llama3.1-8B-Instruct
+          safety:
+            - routing_key: ["shield1", "shield2"]
+              provider_type: inline::meta-reference
+              config:
+                llama_guard_shield:
+                  model: Llama-Guard-3-1B
+                  excluded_categories: []
+                  disable_input_check: false
+                  disable_output_check: false
+                enable_prompt_guard: false
+          memory:
+            - routing_key: vector
+              provider_type: inline::meta-reference
+              config: {{}}
+        api_providers:
+          telemetry:
+            provider_type: noop
+            config: {{}}
+    """.format(built_at=datetime.now().isoformat())
+    )
+
+
+@pytest.fixture
+def invalid_config():
+    return yaml.safe_load(
+        """
+        routing_table: {}
+        api_providers: {}
+    """
+    )
+
+
+def test_parse_and_maybe_upgrade_config_up_to_date(up_to_date_config):
+    result = parse_and_maybe_upgrade_config(up_to_date_config)
+    assert result.version == LLAMA_STACK_RUN_CONFIG_VERSION
+    assert "inference" in result.providers
+
+
+def test_parse_and_maybe_upgrade_config_old_format(old_config):
+    result = parse_and_maybe_upgrade_config(old_config)
+    assert result.version == LLAMA_STACK_RUN_CONFIG_VERSION
+    assert all(api in result.providers for api in ["inference", "safety", "memory", "telemetry"])
+    safety_provider = result.providers["safety"][0]
+    assert safety_provider.provider_type == "inline::meta-reference"
+    assert "llama_guard_shield" in safety_provider.config
+
+    inference_providers = result.providers["inference"]
+    assert len(inference_providers) == 2
+    assert {x.provider_id for x in inference_providers} == {
+        "remote::ollama-00",
+        "inline::meta-reference-01",
+    }
+
+    ollama = inference_providers[0]
+    assert ollama.provider_type == "remote::ollama"
+    assert ollama.config["port"] == 11434
+
+
+def test_parse_and_maybe_upgrade_config_invalid(invalid_config):
+    with pytest.raises(KeyError):
+        parse_and_maybe_upgrade_config(invalid_config)
--- a/tests/unit/models/test_prompt_adapter.py
+++ b/tests/unit/models/test_prompt_adapter.py
@ -0,0 +1,285 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+import unittest
+
+from llama_stack.apis.inference import (
+    ChatCompletionRequest,
+    CompletionMessage,
+    StopReason,
+    SystemMessage,
+    ToolCall,
+    ToolConfig,
+    UserMessage,
+)
+from llama_stack.models.llama.datatypes import (
+    BuiltinTool,
+    ToolDefinition,
+    ToolParamDefinition,
+    ToolPromptFormat,
+)
+from llama_stack.providers.utils.inference.prompt_adapter import (
+    chat_completion_request_to_messages,
+    chat_completion_request_to_prompt,
+)
+
+MODEL = "Llama3.1-8B-Instruct"
+MODEL3_2 = "Llama3.2-3B-Instruct"
+
+
+class PrepareMessagesTests(unittest.IsolatedAsyncioTestCase):
+    async def asyncSetUp(self):
+        asyncio.get_running_loop().set_debug(False)
+
+    async def test_system_default(self):
+        content = "Hello !"
+        request = ChatCompletionRequest(
+            model=MODEL,
+            messages=[
+                UserMessage(content=content),
+            ],
+        )
+        messages = chat_completion_request_to_messages(request, MODEL)
+        self.assertEqual(len(messages), 2)
+        self.assertEqual(messages[-1].content, content)
+        self.assertTrue("Cutting Knowledge Date: December 2023" in messages[0].content)
+
+    async def test_system_builtin_only(self):
+        content = "Hello !"
+        request = ChatCompletionRequest(
+            model=MODEL,
+            messages=[
+                UserMessage(content=content),
+            ],
+            tools=[
+                ToolDefinition(tool_name=BuiltinTool.code_interpreter),
+                ToolDefinition(tool_name=BuiltinTool.brave_search),
+            ],
+        )
+        messages = chat_completion_request_to_messages(request, MODEL)
+        self.assertEqual(len(messages), 2)
+        self.assertEqual(messages[-1].content, content)
+        self.assertTrue("Cutting Knowledge Date: December 2023" in messages[0].content)
+        self.assertTrue("Tools: brave_search" in messages[0].content)
+
+    async def test_system_custom_only(self):
+        content = "Hello !"
+        request = ChatCompletionRequest(
+            model=MODEL,
+            messages=[
+                UserMessage(content=content),
+            ],
+            tools=[
+                ToolDefinition(
+                    tool_name="custom1",
+                    description="custom1 tool",
+                    parameters={
+                        "param1": ToolParamDefinition(
+                            param_type="str",
+                            description="param1 description",
+                            required=True,
+                        ),
+                    },
+                )
+            ],
+            tool_config=ToolConfig(tool_prompt_format=ToolPromptFormat.json),
+        )
+        messages = chat_completion_request_to_messages(request, MODEL)
+        self.assertEqual(len(messages), 3)
+        self.assertTrue("Environment: ipython" in messages[0].content)
+
+        self.assertTrue("Return function calls in JSON format" in messages[1].content)
+        self.assertEqual(messages[-1].content, content)
+
+    async def test_system_custom_and_builtin(self):
+        content = "Hello !"
+        request = ChatCompletionRequest(
+            model=MODEL,
+            messages=[
+                UserMessage(content=content),
+            ],
+            tools=[
+                ToolDefinition(tool_name=BuiltinTool.code_interpreter),
+                ToolDefinition(tool_name=BuiltinTool.brave_search),
+                ToolDefinition(
+                    tool_name="custom1",
+                    description="custom1 tool",
+                    parameters={
+                        "param1": ToolParamDefinition(
+                            param_type="str",
+                            description="param1 description",
+                            required=True,
+                        ),
+                    },
+                ),
+            ],
+        )
+        messages = chat_completion_request_to_messages(request, MODEL)
+        self.assertEqual(len(messages), 3)
+
+        self.assertTrue("Environment: ipython" in messages[0].content)
+        self.assertTrue("Tools: brave_search" in messages[0].content)
+
+        self.assertTrue("Return function calls in JSON format" in messages[1].content)
+        self.assertEqual(messages[-1].content, content)
+
+    async def test_completion_message_encoding(self):
+        request = ChatCompletionRequest(
+            model=MODEL3_2,
+            messages=[
+                UserMessage(content="hello"),
+                CompletionMessage(
+                    content="",
+                    stop_reason=StopReason.end_of_turn,
+                    tool_calls=[
+                        ToolCall(
+                            tool_name="custom1",
+                            arguments={"param1": "value1"},
+                            call_id="123",
+                        )
+                    ],
+                ),
+            ],
+            tools=[
+                ToolDefinition(
+                    tool_name="custom1",
+                    description="custom1 tool",
+                    parameters={
+                        "param1": ToolParamDefinition(
+                            param_type="str",
+                            description="param1 description",
+                            required=True,
+                        ),
+                    },
+                ),
+            ],
+            tool_config=ToolConfig(tool_prompt_format=ToolPromptFormat.python_list),
+        )
+        prompt = await chat_completion_request_to_prompt(request, request.model)
+        self.assertIn('[custom1(param1="value1")]', prompt)
+
+        request.model = MODEL
+        request.tool_config.tool_prompt_format = ToolPromptFormat.json
+        prompt = await chat_completion_request_to_prompt(request, request.model)
+        self.assertIn('{"type": "function", "name": "custom1", "parameters": {"param1": "value1"}}', prompt)
+
+    async def test_user_provided_system_message(self):
+        content = "Hello !"
+        system_prompt = "You are a pirate"
+        request = ChatCompletionRequest(
+            model=MODEL,
+            messages=[
+                SystemMessage(content=system_prompt),
+                UserMessage(content=content),
+            ],
+            tools=[
+                ToolDefinition(tool_name=BuiltinTool.code_interpreter),
+            ],
+        )
+        messages = chat_completion_request_to_messages(request, MODEL)
+        self.assertEqual(len(messages), 2, messages)
+        self.assertTrue(messages[0].content.endswith(system_prompt))
+
+        self.assertEqual(messages[-1].content, content)
+
+    async def test_repalce_system_message_behavior_builtin_tools(self):
+        content = "Hello !"
+        system_prompt = "You are a pirate"
+        request = ChatCompletionRequest(
+            model=MODEL,
+            messages=[
+                SystemMessage(content=system_prompt),
+                UserMessage(content=content),
+            ],
+            tools=[
+                ToolDefinition(tool_name=BuiltinTool.code_interpreter),
+            ],
+            tool_config=ToolConfig(
+                tool_choice="auto",
+                tool_prompt_format="python_list",
+                system_message_behavior="replace",
+            ),
+        )
+        messages = chat_completion_request_to_messages(request, MODEL3_2)
+        self.assertEqual(len(messages), 2, messages)
+        self.assertTrue(messages[0].content.endswith(system_prompt))
+        self.assertIn("Environment: ipython", messages[0].content)
+        self.assertEqual(messages[-1].content, content)
+
+    async def test_repalce_system_message_behavior_custom_tools(self):
+        content = "Hello !"
+        system_prompt = "You are a pirate"
+        request = ChatCompletionRequest(
+            model=MODEL,
+            messages=[
+                SystemMessage(content=system_prompt),
+                UserMessage(content=content),
+            ],
+            tools=[
+                ToolDefinition(tool_name=BuiltinTool.code_interpreter),
+                ToolDefinition(
+                    tool_name="custom1",
+                    description="custom1 tool",
+                    parameters={
+                        "param1": ToolParamDefinition(
+                            param_type="str",
+                            description="param1 description",
+                            required=True,
+                        ),
+                    },
+                ),
+            ],
+            tool_config=ToolConfig(
+                tool_choice="auto",
+                tool_prompt_format="python_list",
+                system_message_behavior="replace",
+            ),
+        )
+        messages = chat_completion_request_to_messages(request, MODEL3_2)
+
+        self.assertEqual(len(messages), 2, messages)
+        self.assertTrue(messages[0].content.endswith(system_prompt))
+        self.assertIn("Environment: ipython", messages[0].content)
+        self.assertEqual(messages[-1].content, content)
+
+    async def test_replace_system_message_behavior_custom_tools_with_template(self):
+        content = "Hello !"
+        system_prompt = "You are a pirate {{ function_description }}"
+        request = ChatCompletionRequest(
+            model=MODEL,
+            messages=[
+                SystemMessage(content=system_prompt),
+                UserMessage(content=content),
+            ],
+            tools=[
+                ToolDefinition(tool_name=BuiltinTool.code_interpreter),
+                ToolDefinition(
+                    tool_name="custom1",
+                    description="custom1 tool",
+                    parameters={
+                        "param1": ToolParamDefinition(
+                            param_type="str",
+                            description="param1 description",
+                            required=True,
+                        ),
+                    },
+                ),
+            ],
+            tool_config=ToolConfig(
+                tool_choice="auto",
+                tool_prompt_format="python_list",
+                system_message_behavior="replace",
+            ),
+        )
+        messages = chat_completion_request_to_messages(request, MODEL3_2)
+
+        self.assertEqual(len(messages), 2, messages)
+        self.assertIn("Environment: ipython", messages[0].content)
+        self.assertIn("You are a pirate", messages[0].content)
+        # function description is present in the system prompt
+        self.assertIn('"name": "custom1"', messages[0].content)
+        self.assertEqual(messages[-1].content, content)
--- a/tests/unit/models/test_system_prompts.py
+++ b/tests/unit/models/test_system_prompts.py
@ -0,0 +1,198 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# top-level folder for each specific model found within the models/ directory at
+# the top-level of this source tree.
+
+import textwrap
+import unittest
+from datetime import datetime
+
+from llama_stack.models.llama.llama3.prompt_templates import (
+    BuiltinToolGenerator,
+    FunctionTagCustomToolGenerator,
+    JsonCustomToolGenerator,
+    PythonListCustomToolGenerator,
+    SystemDefaultGenerator,
+)
+
+
+class PromptTemplateTests(unittest.TestCase):
+    def check_generator_output(self, generator, expected_text):
+        example = generator.data_examples()[0]
+
+        pt = generator.gen(example)
+        text = pt.render()
+        # print(text)  # debugging
+        assert text == expected_text, f"Expected:\n{expected_text}\nActual:\n{text}"
+
+    def test_system_default(self):
+        generator = SystemDefaultGenerator()
+        today = datetime.now().strftime("%d %B %Y")
+        expected_text = f"Cutting Knowledge Date: December 2023\nToday Date: {today}"
+        self.check_generator_output(generator, expected_text)
+
+    def test_system_builtin_only(self):
+        generator = BuiltinToolGenerator()
+        expected_text = textwrap.dedent(
+            """
+            Environment: ipython
+            Tools: brave_search, wolfram_alpha
+            """
+        )
+        self.check_generator_output(generator, expected_text.strip("\n"))
+
+    def test_system_custom_only(self):
+        self.maxDiff = None
+        generator = JsonCustomToolGenerator()
+        expected_text = textwrap.dedent(
+            """
+            Answer the user's question by making use of the following functions if needed.
+            If none of the function can be used, please say so.
+            Here is a list of functions in JSON format:
+            {
+                "type": "function",
+                "function": {
+                    "name": "trending_songs",
+                    "description": "Returns the trending songs on a Music site",
+                    "parameters": {
+                        "type": "object",
+                        "properties": [
+                            {
+                                "n": {
+                                    "type": "object",
+                                    "description": "The number of songs to return"
+                                }
+                            },
+                            {
+                                "genre": {
+                                    "type": "object",
+                                    "description": "The genre of the songs to return"
+                                }
+                            }
+                        ],
+                        "required": ["n"]
+                    }
+                }
+            }
+
+            Return function calls in JSON format.
+            """
+        )
+        self.check_generator_output(generator, expected_text.strip("\n"))
+
+    def test_system_custom_function_tag(self):
+        self.maxDiff = None
+        generator = FunctionTagCustomToolGenerator()
+        expected_text = textwrap.dedent(
+            """
+            You have access to the following functions:
+
+            Use the function 'trending_songs' to 'Returns the trending songs on a Music site':
+            {"name": "trending_songs", "description": "Returns the trending songs on a Music site", "parameters": {"genre": {"description": "The genre of the songs to return", "param_type": "str", "required": false}, "n": {"description": "The number of songs to return", "param_type": "int", "required": true}}}
+
+            Think very carefully before calling functions.
+            If you choose to call a function ONLY reply in the following format with no prefix or suffix:
+
+            <function=example_function_name>{"example_name": "example_value"}</function>
+
+            Reminder:
+            - If looking for real time information use relevant functions before falling back to brave_search
+            - Function calls MUST follow the specified format, start with <function= and end with </function>
+            - Required parameters MUST be specified
+            - Only call one function at a time
+            - Put the entire function call reply on one line
+            """
+        )
+        self.check_generator_output(generator, expected_text.strip("\n"))
+
+    def test_llama_3_2_system_zero_shot(self):
+        generator = PythonListCustomToolGenerator()
+        expected_text = textwrap.dedent(
+            """
+            You are a helpful assistant. You have access to functions, but you should only use them if they are required.
+            You are an expert in composing functions. You are given a question and a set of possible functions.
+            Based on the question, you may or may not need to make one function/tool call to achieve the purpose.
+
+            If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
+            You SHOULD NOT include any other text in the response.
+
+            Here is a list of functions in JSON format that you can invoke.
+
+            [
+                {
+                    "name": "get_weather",
+                    "description": "Get weather info for places",
+                    "parameters": {
+                        "type": "dict",
+                        "required": ["city"],
+                        "properties": {
+                            "city": {
+                                "type": "string",
+                                "description": "The name of the city to get the weather for"
+                            },
+                            "metric": {
+                                "type": "string",
+                                "description": "The metric for weather. Options are: celsius, fahrenheit",
+                                "default": "celsius"
+                            }
+                        }
+                    }
+                }
+            ]
+            """
+        )
+        self.check_generator_output(generator, expected_text.strip("\n"))
+
+    def test_llama_3_2_provided_system_prompt(self):
+        generator = PythonListCustomToolGenerator()
+        expected_text = textwrap.dedent(
+            """
+            Overriding message.
+
+            If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
+            You SHOULD NOT include any other text in the response.
+
+            Here is a list of functions in JSON format that you can invoke.
+
+            [
+                {
+                    "name": "get_weather",
+                    "description": "Get weather info for places",
+                    "parameters": {
+                        "type": "dict",
+                        "required": ["city"],
+                        "properties": {
+                            "city": {
+                                "type": "string",
+                                "description": "The name of the city to get the weather for"
+                            },
+                            "metric": {
+                                "type": "string",
+                                "description": "The metric for weather. Options are: celsius, fahrenheit",
+                                "default": "celsius"
+                            }
+                        }
+                    }
+                }
+            ]"""
+        )
+        user_system_prompt = textwrap.dedent(
+            """
+            Overriding message.
+
+            {{ function_description }}
+            """
+        )
+        example = generator.data_examples()[0]
+
+        pt = generator.gen(example, user_system_prompt)
+        text = pt.render()
+        assert text == expected_text, f"Expected:\n{expected_text}\nActual:\n{text}"
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@ -0,0 +1,234 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+import json
+import logging
+import threading
+import time
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from typing import Any, Dict
+from unittest.mock import AsyncMock, patch
+
+import pytest
+import pytest_asyncio
+from openai.types.chat.chat_completion_chunk import (
+    ChatCompletionChunk as OpenAIChatCompletionChunk,
+)
+from openai.types.chat.chat_completion_chunk import (
+    Choice as OpenAIChoice,
+)
+from openai.types.chat.chat_completion_chunk import (
+    ChoiceDelta as OpenAIChoiceDelta,
+)
+from openai.types.model import Model as OpenAIModel
+
+from llama_stack.apis.inference import ToolChoice, ToolConfig
+from llama_stack.apis.models import Model
+from llama_stack.models.llama.datatypes import StopReason
+from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
+from llama_stack.providers.remote.inference.vllm.vllm import (
+    VLLMInferenceAdapter,
+    _process_vllm_chat_completion_stream_response,
+)
+
+# These are unit test for the remote vllm provider
+# implementation. This should only contain tests which are specific to
+# the implementation details of those classes. More general
+# (API-level) tests should be placed in tests/integration/inference/
+#
+# How to run this test:
+#
+# pytest tests/unit/providers/inference/test_remote_vllm.py \
+# -v -s --tb=short --disable-warnings
+
+
+class MockInferenceAdapterWithSleep:
+    def __init__(self, sleep_time: int, response: Dict[str, Any]):
+        self.httpd = None
+
+        class DelayedRequestHandler(BaseHTTPRequestHandler):
+            # ruff: noqa: N802
+            def do_POST(self):
+                time.sleep(sleep_time)
+                self.send_response(code=200)
+                self.end_headers()
+                self.wfile.write(json.dumps(response).encode("utf-8"))
+
+        self.request_handler = DelayedRequestHandler
+
+    def __enter__(self):
+        httpd = HTTPServer(("", 0), self.request_handler)
+        self.httpd = httpd
+        host, port = httpd.server_address
+        httpd_thread = threading.Thread(target=httpd.serve_forever)
+        httpd_thread.daemon = True  # stop server if this thread terminates
+        httpd_thread.start()
+
+        config = VLLMInferenceAdapterConfig(url=f"http://{host}:{port}")
+        inference_adapter = VLLMInferenceAdapter(config)
+        return inference_adapter
+
+    def __exit__(self, _exc_type, _exc_value, _traceback):
+        if self.httpd:
+            self.httpd.shutdown()
+            self.httpd.server_close()
+
+
+@pytest.fixture(scope="module")
+def mock_openai_models_list():
+    with patch("openai.resources.models.AsyncModels.list", new_callable=AsyncMock) as mock_list:
+        yield mock_list
+
+
+@pytest_asyncio.fixture(scope="module")
+async def vllm_inference_adapter():
+    config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
+    inference_adapter = VLLMInferenceAdapter(config)
+    inference_adapter.model_store = AsyncMock()
+    await inference_adapter.initialize()
+    return inference_adapter
+
+
+@pytest.mark.asyncio
+async def test_register_model_checks_vllm(mock_openai_models_list, vllm_inference_adapter):
+    async def mock_openai_models():
+        yield OpenAIModel(id="foo", created=1, object="model", owned_by="test")
+
+    mock_openai_models_list.return_value = mock_openai_models()
+
+    foo_model = Model(identifier="foo", provider_resource_id="foo", provider_id="vllm-inference")
+
+    await vllm_inference_adapter.register_model(foo_model)
+    mock_openai_models_list.assert_called()
+
+
+@pytest.mark.asyncio
+async def test_old_vllm_tool_choice(vllm_inference_adapter):
+    """
+    Test that we set tool_choice to none when no tools are in use
+    to support older versions of vLLM
+    """
+    mock_model = Model(identifier="mock-model", provider_resource_id="mock-model", provider_id="vllm-inference")
+    vllm_inference_adapter.model_store.get_model.return_value = mock_model
+
+    with patch.object(vllm_inference_adapter, "_nonstream_chat_completion") as mock_nonstream_completion:
+        # No tools but auto tool choice
+        await vllm_inference_adapter.chat_completion(
+            "mock-model",
+            [],
+            stream=False,
+            tools=None,
+            tool_config=ToolConfig(tool_choice=ToolChoice.auto),
+        )
+        mock_nonstream_completion.assert_called()
+        request = mock_nonstream_completion.call_args.args[0]
+        # Ensure tool_choice gets converted to none for older vLLM versions
+        assert request.tool_config.tool_choice == ToolChoice.none
+
+
+@pytest.mark.asyncio
+async def test_tool_call_delta_empty_tool_call_buf():
+    """
+    Test that we don't generate extra chunks when processing a
+    tool call response that didn't call any tools. Previously we would
+    emit chunks with spurious ToolCallParseStatus.succeeded or
+    ToolCallParseStatus.failed when processing chunks that didn't
+    actually make any tool calls.
+    """
+
+    async def mock_stream():
+        delta = OpenAIChoiceDelta(content="", tool_calls=None)
+        choices = [OpenAIChoice(delta=delta, finish_reason="stop", index=0)]
+        mock_chunk = OpenAIChatCompletionChunk(
+            id="chunk-1",
+            created=1,
+            model="foo",
+            object="chat.completion.chunk",
+            choices=choices,
+        )
+        for chunk in [mock_chunk]:
+            yield chunk
+
+    chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
+    assert len(chunks) == 1
+    assert chunks[0].event.stop_reason == StopReason.end_of_turn
+
+
+@pytest.mark.asyncio
+async def test_process_vllm_chat_completion_stream_response_no_choices():
+    """
+    Test that we don't error out when vLLM returns no choices for a
+    completion request. This can happen when there's an error thrown
+    in vLLM for example.
+    """
+
+    async def mock_stream():
+        choices = []
+        mock_chunk = OpenAIChatCompletionChunk(
+            id="chunk-1",
+            created=1,
+            model="foo",
+            object="chat.completion.chunk",
+            choices=choices,
+        )
+        for chunk in [mock_chunk]:
+            yield chunk
+
+    chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
+    assert len(chunks) == 0
+
+
+def test_chat_completion_doesnt_block_event_loop(caplog):
+    loop = asyncio.new_event_loop()
+    loop.set_debug(True)
+    caplog.set_level(logging.WARNING)
+
+    # Log when event loop is blocked for more than 100ms
+    loop.slow_callback_duration = 0.1
+    # Sleep for 500ms in our delayed http response
+    sleep_time = 0.5
+
+    mock_model = Model(identifier="mock-model", provider_resource_id="mock-model", provider_id="vllm-inference")
+    mock_response = {
+        "id": "chatcmpl-abc123",
+        "object": "chat.completion",
+        "created": 1,
+        "modle": "mock-model",
+        "choices": [
+            {
+                "message": {"content": ""},
+                "logprobs": None,
+                "finish_reason": "stop",
+                "index": 0,
+            }
+        ],
+    }
+
+    async def do_chat_completion():
+        await inference_adapter.chat_completion(
+            "mock-model",
+            [],
+            stream=False,
+            tools=None,
+            tool_config=ToolConfig(tool_choice=ToolChoice.auto),
+        )
+
+    with MockInferenceAdapterWithSleep(sleep_time, mock_response) as inference_adapter:
+        inference_adapter.model_store = AsyncMock()
+        inference_adapter.model_store.get_model.return_value = mock_model
+        loop.run_until_complete(inference_adapter.initialize())
+
+        # Clear the logs so far and run the actual chat completion we care about
+        caplog.clear()
+        loop.run_until_complete(do_chat_completion())
+
+    # Ensure we don't have any asyncio warnings in the captured log
+    # records from our chat completion call. A message gets logged
+    # here any time we exceed the slow_callback_duration configured
+    # above.
+    asyncio_warnings = [record.message for record in caplog.records if record.name == "asyncio"]
+    assert not asyncio_warnings
--- a/tests/unit/providers/test_configs.py
+++ b/tests/unit/providers/test_configs.py
@ -0,0 +1,50 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import pytest
+from pydantic import BaseModel
+
+from llama_stack.distribution.distribution import get_provider_registry, providable_apis
+from llama_stack.distribution.utils.dynamic import instantiate_class_type
+
+
+class TestProviderConfigurations:
+    """Test suite for testing provider configurations across all API types."""
+
+    def test_all_api_providers_exist(self):
+        provider_registry = get_provider_registry()
+        for api in providable_apis():
+            providers = provider_registry.get(api, {})
+            assert providers, f"No providers found for API type: {api}"
+
+    @pytest.mark.parametrize("api", providable_apis())
+    def test_api_providers(self, api):
+        provider_registry = get_provider_registry()
+        providers = provider_registry.get(api, {})
+        assert providers, f"No providers found for API type: {api}"
+
+        failures = []
+        for provider_type, provider_spec in providers.items():
+            try:
+                self._verify_provider_config(provider_type, provider_spec)
+            except Exception as e:
+                failures.append(f"Failed to verify {provider_type} config: {str(e)}")
+
+        if failures:
+            pytest.fail("\n".join(failures))
+
+    def _verify_provider_config(self, provider_type, provider_spec):
+        """Helper method to verify a single provider configuration."""
+        # Get the config class
+        config_class_name = provider_spec.config_class
+        config_type = instantiate_class_type(config_class_name)
+
+        assert issubclass(config_type, BaseModel), f"{config_class_name} is not a subclass of BaseModel"
+
+        assert hasattr(config_type, "sample_run_config"), f"{config_class_name} does not have sample_run_config method"
+
+        sample_config = config_type.sample_run_config(__distro_dir__="foobarbaz")
+        assert isinstance(sample_config, dict), f"{config_class_name}.sample_run_config() did not return a dict"
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@ -0,0 +1,135 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+import sqlite3
+
+import numpy as np
+import pytest
+import pytest_asyncio
+import sqlite_vec
+
+from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
+from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
+    SQLiteVecIndex,
+    SQLiteVecVectorIOAdapter,
+    generate_chunk_id,
+)
+
+# This test is a unit test for the SQLiteVecVectorIOAdapter class. This should only contain
+# tests which are specific to this class. More general (API-level) tests should be placed in
+# tests/integration/vector_io/
+#
+# How to run this test:
+#
+# pytest tests/unit/providers/vector_io/test_sqlite_vec.py \
+# -v -s --tb=short --disable-warnings --asyncio-mode=auto
+
+SQLITE_VEC_PROVIDER = "sqlite_vec"
+EMBEDDING_DIMENSION = 384
+EMBEDDING_MODEL = "all-MiniLM-L6-v2"
+
+
+@pytest.fixture(scope="session")
+def loop():
+    return asyncio.new_event_loop()
+
+
+@pytest.fixture(scope="session", autouse=True)
+def sqlite_connection(loop):
+    conn = sqlite3.connect(":memory:")
+    try:
+        conn.enable_load_extension(True)
+        sqlite_vec.load(conn)
+        yield conn
+    finally:
+        conn.close()
+
+
+@pytest_asyncio.fixture(scope="session", autouse=True)
+async def sqlite_vec_index(sqlite_connection):
+    return await SQLiteVecIndex.create(dimension=EMBEDDING_DIMENSION, connection=sqlite_connection, bank_id="test_bank")
+
+
+@pytest.fixture(scope="session")
+def sample_chunks():
+    """Generates chunks that force multiple batches for a single document to expose ID conflicts."""
+    n, k = 10, 3
+    sample = [
+        Chunk(content=f"Sentence {i} from document {j}", metadata={"document_id": f"document-{j}"})
+        for j in range(k)
+        for i in range(n)
+    ]
+    return sample
+
+
+@pytest.fixture(scope="session")
+def sample_embeddings(sample_chunks):
+    np.random.seed(42)
+    return np.array([np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) for _ in sample_chunks])
+
+
+@pytest.mark.asyncio
+async def test_add_chunks(sqlite_vec_index, sample_chunks, sample_embeddings):
+    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings, batch_size=2)
+    cur = sqlite_vec_index.connection.cursor()
+    cur.execute(f"SELECT COUNT(*) FROM {sqlite_vec_index.metadata_table}")
+    count = cur.fetchone()[0]
+    assert count == len(sample_chunks)
+
+
+@pytest.mark.asyncio
+async def test_query_chunks(sqlite_vec_index, sample_chunks, sample_embeddings):
+    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
+    query_embedding = np.random.rand(EMBEDDING_DIMENSION).astype(np.float32)
+    response = await sqlite_vec_index.query(query_embedding, k=2, score_threshold=0.0)
+    assert isinstance(response, QueryChunksResponse)
+    assert len(response.chunks) == 2
+
+
+@pytest.mark.asyncio
+async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks):
+    """Test that chunk IDs do not conflict across batches when inserting chunks."""
+    # Reduce batch size to force multiple batches for same document
+    # since there are 10 chunks per document and batch size is 2
+    batch_size = 2
+    sample_embeddings = np.random.rand(len(sample_chunks), EMBEDDING_DIMENSION).astype(np.float32)
+
+    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings, batch_size=batch_size)
+
+    cur = sqlite_vec_index.connection.cursor()
+
+    # Retrieve all chunk IDs to check for duplicates
+    cur.execute(f"SELECT id FROM {sqlite_vec_index.metadata_table}")
+    chunk_ids = [row[0] for row in cur.fetchall()]
+    cur.close()
+
+    # Ensure all chunk IDs are unique
+    assert len(chunk_ids) == len(set(chunk_ids)), "Duplicate chunk IDs detected across batches!"
+
+
+@pytest.fixture(scope="session")
+async def sqlite_vec_adapter(sqlite_connection):
+    config = type("Config", (object,), {"db_path": ":memory:"})  # Mock config with in-memory database
+    adapter = SQLiteVecVectorIOAdapter(config=config, inference_api=None)
+    await adapter.initialize()
+    yield adapter
+    await adapter.shutdown()
+
+
+def test_generate_chunk_id():
+    chunks = [
+        Chunk(content="test", metadata={"document_id": "doc-1"}),
+        Chunk(content="test ", metadata={"document_id": "doc-1"}),
+        Chunk(content="test 3", metadata={"document_id": "doc-1"}),
+    ]
+
+    chunk_ids = sorted([generate_chunk_id(chunk.metadata["document_id"], chunk.content) for chunk in chunks])
+    assert chunk_ids == [
+        "177a1368-f6a8-0c50-6e92-18677f2c3de3",
+        "bc744db3-1b25-0a9c-cdff-b6ba3df73c36",
+        "f68df25d-d9aa-ab4d-5684-64a233add20d",
+    ]
--- a/tests/unit/rag/fixtures/dummy.pdf
+++ b/tests/unit/rag/fixtures/dummy.pdf
--- a/tests/unit/rag/test_vector_store.py
+++ b/tests/unit/rag/test_vector_store.py
@ -0,0 +1,78 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import base64
+import mimetypes
+import os
+from pathlib import Path
+
+import pytest
+
+from llama_stack.apis.tools import RAGDocument
+from llama_stack.providers.utils.memory.vector_store import URL, content_from_doc
+
+DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf"
+# Depending on the machine, this can get parsed a couple of ways
+DUMMY_PDF_TEXT_CHOICES = ["Dummy PDF file", "Dumm y PDF file"]
+
+
+def read_file(file_path: str) -> bytes:
+    with open(file_path, "rb") as file:
+        return file.read()
+
+
+def data_url_from_file(file_path: str) -> str:
+    with open(file_path, "rb") as file:
+        file_content = file.read()
+
+    base64_content = base64.b64encode(file_content).decode("utf-8")
+    mime_type, _ = mimetypes.guess_type(file_path)
+
+    data_url = f"data:{mime_type};base64,{base64_content}"
+
+    return data_url
+
+
+class TestVectorStore:
+    @pytest.mark.asyncio
+    async def test_returns_content_from_pdf_data_uri(self):
+        data_uri = data_url_from_file(DUMMY_PDF_PATH)
+        doc = RAGDocument(
+            document_id="dummy",
+            content=data_uri,
+            mime_type="application/pdf",
+            metadata={},
+        )
+        content = await content_from_doc(doc)
+        assert content in DUMMY_PDF_TEXT_CHOICES
+
+    @pytest.mark.asyncio
+    async def test_downloads_pdf_and_returns_content(self):
+        # Using GitHub to host the PDF file
+        url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf"
+        doc = RAGDocument(
+            document_id="dummy",
+            content=url,
+            mime_type="application/pdf",
+            metadata={},
+        )
+        content = await content_from_doc(doc)
+        assert content in DUMMY_PDF_TEXT_CHOICES
+
+    @pytest.mark.asyncio
+    async def test_downloads_pdf_and_returns_content_with_url_object(self):
+        # Using GitHub to host the PDF file
+        url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf"
+        doc = RAGDocument(
+            document_id="dummy",
+            content=URL(
+                uri=url,
+            ),
+            mime_type="application/pdf",
+            metadata={},
+        )
+        content = await content_from_doc(doc)
+        assert content in DUMMY_PDF_TEXT_CHOICES
--- a/tests/unit/registry/test_registry.py
+++ b/tests/unit/registry/test_registry.py
@ -0,0 +1,199 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+
+import pytest
+import pytest_asyncio
+
+from llama_stack.apis.inference import Model
+from llama_stack.apis.vector_dbs import VectorDB
+from llama_stack.distribution.store.registry import (
+    CachedDiskDistributionRegistry,
+    DiskDistributionRegistry,
+)
+from llama_stack.providers.utils.kvstore import kvstore_impl
+from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+
+
+@pytest.fixture
+def config():
+    config = SqliteKVStoreConfig(db_path="/tmp/test_registry.db")
+    if os.path.exists(config.db_path):
+        os.remove(config.db_path)
+    return config
+
+
+@pytest_asyncio.fixture(scope="function")
+async def registry(config):
+    registry = DiskDistributionRegistry(await kvstore_impl(config))
+    await registry.initialize()
+    return registry
+
+
+@pytest_asyncio.fixture(scope="function")
+async def cached_registry(config):
+    registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
+    await registry.initialize()
+    return registry
+
+
+@pytest.fixture
+def sample_vector_db():
+    return VectorDB(
+        identifier="test_vector_db",
+        embedding_model="all-MiniLM-L6-v2",
+        embedding_dimension=384,
+        provider_resource_id="test_vector_db",
+        provider_id="test-provider",
+    )
+
+
+@pytest.fixture
+def sample_model():
+    return Model(
+        identifier="test_model",
+        provider_resource_id="test_model",
+        provider_id="test-provider",
+    )
+
+
+@pytest.mark.asyncio
+async def test_registry_initialization(registry):
+    # Test empty registry
+    result = await registry.get("nonexistent", "nonexistent")
+    assert result is None
+
+
+@pytest.mark.asyncio
+async def test_basic_registration(registry, sample_vector_db, sample_model):
+    print(f"Registering {sample_vector_db}")
+    await registry.register(sample_vector_db)
+    print(f"Registering {sample_model}")
+    await registry.register(sample_model)
+    print("Getting vector_db")
+    result_vector_db = await registry.get("vector_db", "test_vector_db")
+    assert result_vector_db is not None
+    assert result_vector_db.identifier == sample_vector_db.identifier
+    assert result_vector_db.embedding_model == sample_vector_db.embedding_model
+    assert result_vector_db.provider_id == sample_vector_db.provider_id
+
+    result_model = await registry.get("model", "test_model")
+    assert result_model is not None
+    assert result_model.identifier == sample_model.identifier
+    assert result_model.provider_id == sample_model.provider_id
+
+
+@pytest.mark.asyncio
+async def test_cached_registry_initialization(config, sample_vector_db, sample_model):
+    # First populate the disk registry
+    disk_registry = DiskDistributionRegistry(await kvstore_impl(config))
+    await disk_registry.initialize()
+    await disk_registry.register(sample_vector_db)
+    await disk_registry.register(sample_model)
+
+    # Test cached version loads from disk
+    cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
+    await cached_registry.initialize()
+
+    result_vector_db = await cached_registry.get("vector_db", "test_vector_db")
+    assert result_vector_db is not None
+    assert result_vector_db.identifier == sample_vector_db.identifier
+    assert result_vector_db.embedding_model == sample_vector_db.embedding_model
+    assert result_vector_db.embedding_dimension == sample_vector_db.embedding_dimension
+    assert result_vector_db.provider_id == sample_vector_db.provider_id
+
+
+@pytest.mark.asyncio
+async def test_cached_registry_updates(config):
+    cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
+    await cached_registry.initialize()
+
+    new_vector_db = VectorDB(
+        identifier="test_vector_db_2",
+        embedding_model="all-MiniLM-L6-v2",
+        embedding_dimension=384,
+        provider_resource_id="test_vector_db_2",
+        provider_id="baz",
+    )
+    await cached_registry.register(new_vector_db)
+
+    # Verify in cache
+    result_vector_db = await cached_registry.get("vector_db", "test_vector_db_2")
+    assert result_vector_db is not None
+    assert result_vector_db.identifier == new_vector_db.identifier
+    assert result_vector_db.provider_id == new_vector_db.provider_id
+
+    # Verify persisted to disk
+    new_registry = DiskDistributionRegistry(await kvstore_impl(config))
+    await new_registry.initialize()
+    result_vector_db = await new_registry.get("vector_db", "test_vector_db_2")
+    assert result_vector_db is not None
+    assert result_vector_db.identifier == new_vector_db.identifier
+    assert result_vector_db.provider_id == new_vector_db.provider_id
+
+
+@pytest.mark.asyncio
+async def test_duplicate_provider_registration(config):
+    cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
+    await cached_registry.initialize()
+
+    original_vector_db = VectorDB(
+        identifier="test_vector_db_2",
+        embedding_model="all-MiniLM-L6-v2",
+        embedding_dimension=384,
+        provider_resource_id="test_vector_db_2",
+        provider_id="baz",
+    )
+    await cached_registry.register(original_vector_db)
+
+    duplicate_vector_db = VectorDB(
+        identifier="test_vector_db_2",
+        embedding_model="different-model",
+        embedding_dimension=384,
+        provider_resource_id="test_vector_db_2",
+        provider_id="baz",  # Same provider_id
+    )
+    await cached_registry.register(duplicate_vector_db)
+
+    result = await cached_registry.get("vector_db", "test_vector_db_2")
+    assert result is not None
+    assert result.embedding_model == original_vector_db.embedding_model  # Original values preserved
+
+
+@pytest.mark.asyncio
+async def test_get_all_objects(config):
+    cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
+    await cached_registry.initialize()
+
+    # Create multiple test banks
+    test_vector_dbs = [
+        VectorDB(
+            identifier=f"test_vector_db_{i}",
+            embedding_model="all-MiniLM-L6-v2",
+            embedding_dimension=384,
+            provider_resource_id=f"test_vector_db_{i}",
+            provider_id=f"provider_{i}",
+        )
+        for i in range(3)
+    ]
+
+    # Register all vector_dbs
+    for vector_db in test_vector_dbs:
+        await cached_registry.register(vector_db)
+
+    # Test get_all retrieval
+    all_results = await cached_registry.get_all()
+    assert len(all_results) == 3
+
+    # Verify each vector_db was stored correctly
+    for original_vector_db in test_vector_dbs:
+        matching_vector_dbs = [v for v in all_results if v.identifier == original_vector_db.identifier]
+        assert len(matching_vector_dbs) == 1
+        stored_vector_db = matching_vector_dbs[0]
+        assert stored_vector_db.embedding_model == original_vector_db.embedding_model
+        assert stored_vector_db.provider_id == original_vector_db.provider_id
+        assert stored_vector_db.embedding_dimension == original_vector_db.embedding_dimension
--- a/tests/unit/server/test_replace_env_vars.py
+++ b/tests/unit/server/test_replace_env_vars.py
@ -0,0 +1,66 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+import unittest
+
+from llama_stack.distribution.stack import replace_env_vars
+
+
+class TestReplaceEnvVars(unittest.TestCase):
+    def setUp(self):
+        # Clear any existing environment variables we'll use in tests
+        for var in ["TEST_VAR", "EMPTY_VAR", "ZERO_VAR"]:
+            if var in os.environ:
+                del os.environ[var]
+
+        # Set up test environment variables
+        os.environ["TEST_VAR"] = "test_value"
+        os.environ["EMPTY_VAR"] = ""
+        os.environ["ZERO_VAR"] = "0"
+
+    def test_simple_replacement(self):
+        self.assertEqual(replace_env_vars("${env.TEST_VAR}"), "test_value")
+
+    def test_default_value_when_not_set(self):
+        self.assertEqual(replace_env_vars("${env.NOT_SET:default}"), "default")
+
+    def test_default_value_when_set(self):
+        self.assertEqual(replace_env_vars("${env.TEST_VAR:default}"), "test_value")
+
+    def test_default_value_when_empty(self):
+        self.assertEqual(replace_env_vars("${env.EMPTY_VAR:default}"), "default")
+
+    def test_conditional_value_when_set(self):
+        self.assertEqual(replace_env_vars("${env.TEST_VAR+conditional}"), "conditional")
+
+    def test_conditional_value_when_not_set(self):
+        self.assertEqual(replace_env_vars("${env.NOT_SET+conditional}"), "")
+
+    def test_conditional_value_when_empty(self):
+        self.assertEqual(replace_env_vars("${env.EMPTY_VAR+conditional}"), "")
+
+    def test_conditional_value_with_zero(self):
+        self.assertEqual(replace_env_vars("${env.ZERO_VAR+conditional}"), "conditional")
+
+    def test_mixed_syntax(self):
+        self.assertEqual(replace_env_vars("${env.TEST_VAR:default} and ${env.NOT_SET+conditional}"), "test_value and ")
+        self.assertEqual(
+            replace_env_vars("${env.NOT_SET:default} and ${env.TEST_VAR+conditional}"), "default and conditional"
+        )
+
+    def test_nested_structures(self):
+        data = {
+            "key1": "${env.TEST_VAR:default}",
+            "key2": ["${env.NOT_SET:default}", "${env.TEST_VAR+conditional}"],
+            "key3": {"nested": "${env.NOT_SET+conditional}"},
+        }
+        expected = {"key1": "test_value", "key2": ["default", "conditional"], "key3": {"nested": ""}}
+        self.assertEqual(replace_env_vars(data), expected)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/unit/server/test_resolver.py
+++ b/tests/unit/server/test_resolver.py
@ -0,0 +1,117 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import inspect
+import sys
+from typing import Any, Dict, Protocol
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+from pydantic import BaseModel, Field
+
+from llama_stack.apis.inference import Inference
+from llama_stack.distribution.datatypes import (
+    Api,
+    Provider,
+    StackRunConfig,
+)
+from llama_stack.distribution.resolver import resolve_impls
+from llama_stack.distribution.routers.routers import InferenceRouter
+from llama_stack.distribution.routers.routing_tables import ModelsRoutingTable
+from llama_stack.providers.datatypes import InlineProviderSpec, ProviderSpec
+
+
+def add_protocol_methods(cls: type, protocol: type[Protocol]) -> None:
+    """Dynamically add protocol methods to a class by inspecting the protocol."""
+    for name, value in inspect.getmembers(protocol):
+        if inspect.isfunction(value) and hasattr(value, "__webmethod__"):
+            # Get the signature
+            sig = inspect.signature(value)
+
+            # Create an async function with the same signature that returns a MagicMock
+            async def mock_impl(*args, **kwargs):
+                return MagicMock()
+
+            # Set the signature on our mock implementation
+            mock_impl.__signature__ = sig
+            # Add it to the class
+            setattr(cls, name, mock_impl)
+
+
+class SampleConfig(BaseModel):
+    foo: str = Field(
+        default="bar",
+        description="foo",
+    )
+
+    @classmethod
+    def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]:
+        return {
+            "foo": "baz",
+        }
+
+
+class SampleImpl:
+    def __init__(self, config: SampleConfig, deps: Dict[Api, Any], provider_spec: ProviderSpec = None):
+        self.__provider_id__ = "test_provider"
+        self.__provider_spec__ = provider_spec
+        self.__provider_config__ = config
+        self.__deps__ = deps
+        self.foo = config.foo
+
+    async def initialize(self):
+        pass
+
+
+@pytest.mark.asyncio
+async def test_resolve_impls_basic():
+    # Create a real provider spec
+    provider_spec = InlineProviderSpec(
+        api=Api.inference,
+        provider_type="sample",
+        module="test_module",
+        config_class="test_resolver.SampleConfig",
+        api_dependencies=[],
+    )
+
+    # Create provider registry with our provider
+    provider_registry = {Api.inference: {provider_spec.provider_type: provider_spec}}
+
+    run_config = StackRunConfig(
+        image_name="test_image",
+        providers={
+            "inference": [
+                Provider(
+                    provider_id="sample_provider",
+                    provider_type="sample",
+                    config=SampleConfig.sample_run_config(),
+                )
+            ]
+        },
+    )
+
+    dist_registry = MagicMock()
+
+    mock_module = MagicMock()
+    impl = SampleImpl(SampleConfig(foo="baz"), {}, provider_spec)
+    add_protocol_methods(SampleImpl, Inference)
+
+    mock_module.get_provider_impl = AsyncMock(return_value=impl)
+    sys.modules["test_module"] = mock_module
+
+    impls = await resolve_impls(run_config, provider_registry, dist_registry)
+
+    assert Api.inference in impls
+    assert isinstance(impls[Api.inference], InferenceRouter)
+
+    table = impls[Api.inference].routing_table
+    assert isinstance(table, ModelsRoutingTable)
+
+    impl = table.impls_by_provider_id["sample_provider"]
+    assert isinstance(impl, SampleImpl)
+    assert impl.foo == "baz"
+    assert impl.__provider_id__ == "sample_provider"
+    assert impl.__provider_spec__ == provider_spec