From 07d45f2af3bfead460fcb8860908ffc71f51a81d Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Wed, 23 Oct 2024 15:15:52 -0700 Subject: [PATCH] fix issue w/ enforcing api --- llama_stack/distribution/distribution.py | 1 + llama_stack/distribution/server/server.py | 2 ++ tests/examples/local-run.yaml | 18 +++++++++++------- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/llama_stack/distribution/distribution.py b/llama_stack/distribution/distribution.py index 53d544471..87d77b5ed 100644 --- a/llama_stack/distribution/distribution.py +++ b/llama_stack/distribution/distribution.py @@ -35,6 +35,7 @@ def builtin_automatically_routed_apis() -> List[AutoRoutedApiInfo]: routing_table_api=Api.memory_banks, router_api=Api.memory, ), + # TODO: re-enable once we have proper checking on builtin routing: Issue #297 AutoRoutedApiInfo( routing_table_api=Api.datasets, router_api=Api.datasetio, diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 84c369364..178fac070 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -288,6 +288,8 @@ def main( apis_to_serve = set(impls.keys()) for inf in builtin_automatically_routed_apis(): + if inf.router_api.value not in apis_to_serve: + continue apis_to_serve.add(inf.routing_table_api.value) apis_to_serve.add("inspect") diff --git a/tests/examples/local-run.yaml b/tests/examples/local-run.yaml index e12f6e852..128008117 100644 --- a/tests/examples/local-run.yaml +++ b/tests/examples/local-run.yaml @@ -13,14 +13,18 @@ apis: - safety providers: inference: - - provider_id: meta-reference - provider_type: meta-reference + - provider_id: tgi0 + provider_type: remote::tgi config: - model: Llama3.1-8B-Instruct - quantization: null - torch_seed: null - max_seq_len: 4096 - max_batch_size: 1 + url: http://127.0.0.1:5009 + # - provider_id: meta-reference + # provider_type: meta-reference + # config: + # model: Llama3.1-8B-Instruct + # quantization: null + # torch_seed: null + # max_seq_len: 4096 + # max_batch_size: 1 safety: - provider_id: meta-reference provider_type: meta-reference