Kill "remote" providers and fix testing with a remote stack properly (#435)

# What does this PR do? This PR kills the notion of "pure passthrough" remote providers. You cannot specify a single provider you must specify a whole distribution (stack) as remote. This PR also significantly fixes / upgrades testing infrastructure so you can now test against a remotely hosted stack server by just doing ```bash pytest -s -v -m remote test_agents.py \ --inference-model=Llama3.1-8B-Instruct --safety-shield=Llama-Guard-3-1B \ --env REMOTE_STACK_URL=http://localhost:5001 ``` Also fixed `test_agents_persistence.py` (which was broken) and killed some deprecated testing functions. ## Test Plan All the tests.
2024-11-12 21:51:29 -08:00 · 2024-11-12 21:51:29 -08:00 · 12947ac19e
commit 12947ac19e
parent 59a65e34d3
28 changed files with 406 additions and 519 deletions
--- a/llama_stack/providers/tests/inference/fixtures.py
+++ b/llama_stack/providers/tests/inference/fixtures.py
@ -21,7 +21,7 @@ from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
 from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
 from llama_stack.providers.remote.inference.together import TogetherImplConfig
 from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
-from llama_stack.providers.tests.resolver import resolve_impls_for_test_v2
+from llama_stack.providers.tests.resolver import construct_stack_for_test

 from ..conftest import ProviderFixture, remote_stack_fixture
 from ..env import get_env_or_fail
@ -182,15 +182,11 @@ INFERENCE_FIXTURES = [
 async def inference_stack(request, inference_model):
    fixture_name = request.param
    inference_fixture = request.getfixturevalue(f"inference_{fixture_name}")
-    impls = await resolve_impls_for_test_v2(
+    test_stack = await construct_stack_for_test(
        [Api.inference],
        {"inference": inference_fixture.providers},
        inference_fixture.provider_data,
-        models=[
-            ModelInput(
-                model_id=inference_model,
-            )
-        ],
+        models=[ModelInput(model_id=inference_model)],
    )

-    return (impls[Api.inference], impls[Api.models])
+    return test_stack.impls[Api.inference], test_stack.impls[Api.models]
--- a/llama_stack/providers/tests/inference/test_text_inference.py
+++ b/llama_stack/providers/tests/inference/test_text_inference.py
@ -147,9 +147,9 @@ class TestInference:

        user_input = "Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003."
        response = await inference_impl.completion(
+            model_id=inference_model,
            content=user_input,
            stream=False,
-            model=inference_model,
            sampling_params=SamplingParams(
                max_tokens=50,
            ),
--- a/llama_stack/providers/tests/inference/test_vision_inference.py
+++ b/llama_stack/providers/tests/inference/test_vision_inference.py
@ -55,7 +55,7 @@ class TestVisionModelInference:
            )

        response = await inference_impl.chat_completion(
-            model=inference_model,
+            model_id=inference_model,
            messages=[
                UserMessage(content="You are a helpful assistant."),
                UserMessage(content=[image, "Describe this image in two sentences."]),
@ -102,7 +102,7 @@ class TestVisionModelInference:
            response = [
                r
                async for r in await inference_impl.chat_completion(
-                    model=inference_model,
+                    model_id=inference_model,
                    messages=[
                        UserMessage(content="You are a helpful assistant."),
                        UserMessage(