rebase and fix some small breakage due to model -> model_id fix

2025-10-16 23:03:49 +00:00 · 2024-11-12 21:47:39 -08:00 · 2024-11-12 21:47:39 -08:00 · 1cb42d3060
commit 1cb42d3060
parent 22aedd0277
6 changed files with 20 additions and 11 deletions
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@ -38,15 +38,15 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable
    if api == Api.inference:
        return await p.register_model(obj)
    elif api == Api.safety:
-        await p.register_shield(obj)
+        return await p.register_shield(obj)
    elif api == Api.memory:
-        await p.register_memory_bank(obj)
+        return await p.register_memory_bank(obj)
    elif api == Api.datasetio:
-        await p.register_dataset(obj)
+        return await p.register_dataset(obj)
    elif api == Api.scoring:
-        await p.register_scoring_function(obj)
+        return await p.register_scoring_function(obj)
    elif api == Api.eval:
-        await p.register_eval_task(obj)
+        return await p.register_eval_task(obj)
    else:
        raise ValueError(f"Unknown API {api} for registering object with provider")

--- a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
+++ b/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
@ -234,7 +234,7 @@ class LlamaGuardShield:
        # TODO: llama-stack inference protocol has issues with non-streaming inference code
        content = ""
        async for chunk in await self.inference_api.chat_completion(
-            model=self.model,
+            model_id=self.model,
            messages=[shield_input_message],
            stream=True,
        ):
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -164,7 +164,6 @@ class OllamaInferenceAdapter(Inference, ModelRegistryHelper, ModelsProtocolPriva
        logprobs: Optional[LogProbConfig] = None,
    ) -> AsyncGenerator:
        model = await self.model_store.get_model(model_id)
-        print(f"model={model}")
        request = ChatCompletionRequest(
            model=model.provider_resource_id,
            messages=messages,
--- a/llama_stack/providers/tests/agents/conftest.py
+++ b/llama_stack/providers/tests/agents/conftest.py
@ -46,6 +46,16 @@ DEFAULT_PROVIDER_COMBINATIONS = [
        id="together",
        marks=pytest.mark.together,
    ),
+    pytest.param(
+        {
+            "inference": "fireworks",
+            "safety": "llama_guard",
+            "memory": "faiss",
+            "agents": "meta_reference",
+        },
+        id="fireworks",
+        marks=pytest.mark.fireworks,
+    ),
    pytest.param(
        {
            "inference": "remote",
@ -60,7 +70,7 @@ DEFAULT_PROVIDER_COMBINATIONS = [


 def pytest_configure(config):
-    for mark in ["meta_reference", "ollama", "together", "remote"]:
+    for mark in ["meta_reference", "ollama", "together", "fireworks", "remote"]:
        config.addinivalue_line(
            "markers",
            f"{mark}: marks tests as {mark} specific",
--- a/llama_stack/providers/tests/inference/test_text_inference.py
+++ b/llama_stack/providers/tests/inference/test_text_inference.py
@ -147,9 +147,9 @@ class TestInference:

        user_input = "Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003."
        response = await inference_impl.completion(
+            model_id=inference_model,
            content=user_input,
            stream=False,
-            model=inference_model,
            sampling_params=SamplingParams(
                max_tokens=50,
            ),
--- a/llama_stack/providers/tests/inference/test_vision_inference.py
+++ b/llama_stack/providers/tests/inference/test_vision_inference.py
@ -55,7 +55,7 @@ class TestVisionModelInference:
            )

        response = await inference_impl.chat_completion(
-            model=inference_model,
+            model_id=inference_model,
            messages=[
                UserMessage(content="You are a helpful assistant."),
                UserMessage(content=[image, "Describe this image in two sentences."]),
@ -102,7 +102,7 @@ class TestVisionModelInference:
            response = [
                r
                async for r in await inference_impl.chat_completion(
-                    model=inference_model,
+                    model_id=inference_model,
                    messages=[
                        UserMessage(content="You are a helpful assistant."),
                        UserMessage(