routers for inference chat_completion with models dependency

This commit is contained in:
Xi Yan 2024-09-19 20:59:32 -07:00
parent 47be4c7222
commit d2ec822b12
5 changed files with 113 additions and 15 deletions

View file

@ -297,6 +297,13 @@ async def resolve_impls(
f"Unknown provider `{provider_id}` is not available for API `{api}`"
)
specs[api] = providers[item.provider_id]
elif isinstance(item, str) and item == "models-router":
specs[api] = RouterProviderSpec(
api=api,
module=f"llama_stack.providers.routers.{api.value.lower()}",
api_dependencies=[Api.models],
inner_specs=[],
)
else:
assert isinstance(item, list)
inner_specs = []
@ -314,6 +321,10 @@ async def resolve_impls(
inner_specs=inner_specs,
)
for k, v in specs.items():
cprint(k, "blue")
cprint(v, "blue")
sorted_specs = topological_sort(specs.values())
impls = {}
@ -333,9 +344,7 @@ def main(yaml_config: str, port: int = 5000, disable_ipv6: bool = False):
app = FastAPI()
print(config)
impls, specs = asyncio.run(resolve_impls(config.provider_map))
print(impls)
if Api.telemetry in impls:
setup_logger(impls[Api.telemetry])