routers for inference chat_completion with models dependency

2025-10-04 12:07:34 +00:00 · 2024-09-19 20:59:32 -07:00 · 2024-09-19 20:59:32 -07:00 · d2ec822b12
commit d2ec822b12
parent 47be4c7222
5 changed files with 113 additions and 15 deletions
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -297,6 +297,13 @@ async def resolve_impls(
                    f"Unknown provider `{provider_id}` is not available for API `{api}`"
                )
            specs[api] = providers[item.provider_id]
+        elif isinstance(item, str) and item == "models-router":
+            specs[api] = RouterProviderSpec(
+                api=api,
+                module=f"llama_stack.providers.routers.{api.value.lower()}",
+                api_dependencies=[Api.models],
+                inner_specs=[],
+            )
        else:
            assert isinstance(item, list)
            inner_specs = []
@ -314,6 +321,10 @@ async def resolve_impls(
                inner_specs=inner_specs,
            )

+    for k, v in specs.items():
+        cprint(k, "blue")
+        cprint(v, "blue")
+
    sorted_specs = topological_sort(specs.values())

    impls = {}
@ -333,9 +344,7 @@ def main(yaml_config: str, port: int = 5000, disable_ipv6: bool = False):

    app = FastAPI()

-    print(config)
    impls, specs = asyncio.run(resolve_impls(config.provider_map))
-    print(impls)
    if Api.telemetry in impls:
        setup_logger(impls[Api.telemetry])