feat(providers): support non-llama models for inference providers (#1200)

This PR begins the process of supporting non-llama models within Llama Stack. We start simple by adding support for this functionality within a few existing providers: fireworks, together and ollama. ## Test Plan ```bash LLAMA_STACK_CONFIG=fireworks pytest -s -v tests/client-sdk/inference/test_text_inference.py \ --inference-model accounts/fireworks/models/phi-3-vision-128k-instruct ``` ^ this passes most of the tests but as expected fails the tool calling related tests since they are very specific to Llama models ``` inference/test_text_inference.py::test_text_completion_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct] PASSED inference/test_text_inference.py::test_completion_log_probs_non_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct] PASSED inference/test_text_inference.py::test_completion_log_probs_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct] PASSED inference/test_text_inference.py::test_text_completion_structured_output[accounts/fireworks/models/phi-3-vision-128k-instruct-completion-01] PASSED inference/test_text_inference.py::test_text_chat_completion_non_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct-Which planet do humans live on?-Earth] PASSED inference/test_text_inference.py::test_text_chat_completion_non_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct-Which planet has rings around it with a name starting w ith letter S?-Saturn] PASSED inference/test_text_inference.py::test_text_chat_completion_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct-What's the name of the Sun in latin?-Sol] PASSED inference/test_text_inference.py::test_text_chat_completion_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct-What is the name of the US captial?-Washington] PASSED inference/test_text_inference.py::test_text_chat_completion_with_tool_calling_and_non_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct] FAILED inference/test_text_inference.py::test_text_chat_completion_with_tool_calling_and_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct] FAILED inference/test_text_inference.py::test_text_chat_completion_with_tool_choice_required[accounts/fireworks/models/phi-3-vision-128k-instruct] FAILED inference/test_text_inference.py::test_text_chat_completion_with_tool_choice_none[accounts/fireworks/models/phi-3-vision-128k-instruct] PASSED inference/test_text_inference.py::test_text_chat_completion_structured_output[accounts/fireworks/models/phi-3-vision-128k-instruct] ERROR inference/test_text_inference.py::test_text_chat_completion_tool_calling_tools_not_in_request[accounts/fireworks/models/phi-3-vision-128k-instruct-True] PASSED inference/test_text_inference.py::test_text_chat_completion_tool_calling_tools_not_in_request[accounts/fireworks/models/phi-3-vision-128k-instruct-False] PASSED ```
2025-12-16 13:59:27 +00:00 · 2025-02-21 13:21:28 -08:00 · 2025-02-21 13:21:28 -08:00 · ab54b8cd58
commit ab54b8cd58
parent 9bbe34694d
7 changed files with 103 additions and 74 deletions
--- a/tests/client-sdk/conftest.py
+++ b/tests/client-sdk/conftest.py
@ -42,28 +42,30 @@ def pytest_addoption(parser):
    )
    parser.addoption(
        "--inference-model",
-        action="store",
        default=TEXT_MODEL,
        help="Specify the inference model to use for testing",
    )
    parser.addoption(
        "--vision-inference-model",
-        action="store",
        default=VISION_MODEL,
        help="Specify the vision inference model to use for testing",
    )
    parser.addoption(
        "--safety-shield",
-        action="store",
        default="meta-llama/Llama-Guard-3-1B",
        help="Specify the safety shield model to use for testing",
    )
    parser.addoption(
        "--embedding-model",
-        action="store",
-        default=TEXT_MODEL,
+        default=None,
        help="Specify the embedding model to use for testing",
    )
+    parser.addoption(
+        "--embedding-dimension",
+        type=int,
+        default=384,
+        help="Output dimensionality of the embedding model to use for testing",
+    )


@pytest.fixture(scope="session")
@ -78,7 +80,7 @@ def provider_data():


@pytest.fixture(scope="session")
-def llama_stack_client(provider_data):
+def llama_stack_client(provider_data, text_model_id):
    if os.environ.get("LLAMA_STACK_CONFIG"):
        client = LlamaStackAsLibraryClient(
            get_env_or_fail("LLAMA_STACK_CONFIG"),
@ -95,6 +97,45 @@ def llama_stack_client(provider_data):
        )
    else:
        raise ValueError("LLAMA_STACK_CONFIG or LLAMA_STACK_BASE_URL must be set")
+
+    return client
+
+
+@pytest.fixture(scope="session")
+def inference_provider_type(llama_stack_client):
+    providers = llama_stack_client.providers.list()
+    inference_providers = [p for p in providers if p.api == "inference"]
+    assert len(inference_providers) > 0, "No inference providers found"
+    return inference_providers[0].provider_type
+
+
+@pytest.fixture(scope="session")
+def client_with_models(llama_stack_client, text_model_id, vision_model_id, embedding_model_id, embedding_dimension):
+    client = llama_stack_client
+
+    providers = [p for p in client.providers.list() if p.api == "inference"]
+    assert len(providers) > 0, "No inference providers found"
+    inference_providers = [p.provider_id for p in providers if p.provider_type != "inline::sentence-transformers"]
+    if text_model_id:
+        client.models.register(model_id=text_model_id, provider_id=inference_providers[0])
+    if vision_model_id:
+        client.models.register(model_id=vision_model_id, provider_id=inference_providers[0])
+
+    if embedding_model_id and embedding_dimension:
+        # try to find a provider that supports embeddings, if sentence-transformers is not available
+        selected_provider = None
+        for p in providers:
+            if p.provider_type == "inline::sentence-transformers":
+                selected_provider = p
+                break
+
+        selected_provider = selected_provider or providers[0]
+        client.models.register(
+            model_id=embedding_model_id,
+            provider_id=selected_provider.provider_id,
+            model_type="embedding",
+            metadata={"embedding_dimension": embedding_dimension},
+        )
    return client


@ -117,3 +158,9 @@ def pytest_generate_tests(metafunc):
            [metafunc.config.getoption("--embedding-model")],
            scope="session",
        )
+    if "embedding_dimension" in metafunc.fixturenames:
+        metafunc.parametrize(
+            "embedding_dimension",
+            [metafunc.config.getoption("--embedding-dimension")],
+            scope="session",
+        )