diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 84c369364..185c89e7e 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -288,6 +288,9 @@ def main( apis_to_serve = set(impls.keys()) for inf in builtin_automatically_routed_apis(): + # if we do not serve the corresponding router API, we should not serve the routing table API + if inf.router_api.value not in apis_to_serve: + continue apis_to_serve.add(inf.routing_table_api.value) apis_to_serve.add("inspect") diff --git a/tests/examples/inference-run.yaml b/tests/examples/inference-run.yaml new file mode 100644 index 000000000..87ab5146b --- /dev/null +++ b/tests/examples/inference-run.yaml @@ -0,0 +1,14 @@ +version: '2' +built_at: '2024-10-08T17:40:45.325529' +image_name: local +docker_image: null +conda_env: local +apis: +- models +- inference +providers: + inference: + - provider_id: tgi0 + provider_type: remote::tgi + config: + url: http://127.0.0.1:5009