Further generalize Xi's changes

- introduce a slightly more general notion of an AutoRouted provider - the AutoRouted provider is associated with a RoutingTable provider - e.g. inference -> models - Introduced safety -> shields and memory -> memory_banks correspondences
2025-12-05 10:23:44 +00:00 · 2024-09-22 12:06:43 -07:00 · 2024-09-22 12:06:43 -07:00 · e1966b90d9
commit e1966b90d9
parent b8914bb56f
19 changed files with 559 additions and 388 deletions
--- a/llama_stack/providers/impls/builtin/models/init.py
+++ b/llama_stack/providers/impls/builtin/models/init.py
@ -1,21 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Any, Dict
-
-from llama_stack.distribution.datatypes import Api, ProviderSpec, StackRunConfig
-
-from .config import BuiltinImplConfig  # noqa
-
-
-async def get_builtin_impl(config: StackRunConfig):
-    from .models import BuiltinModelsImpl
-
-    assert isinstance(config, StackRunConfig), f"Unexpected config type: {type(config)}"
-
-    impl = BuiltinModelsImpl(config)
-    await impl.initialize()
-    return impl
--- a/llama_stack/providers/impls/builtin/models/config.py
+++ b/llama_stack/providers/impls/builtin/models/config.py
@ -1,11 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from llama_models.schema_utils import json_schema_type
-from pydantic import BaseModel
-
-
-@json_schema_type
-class BuiltinImplConfig(BaseModel): ...
--- a/llama_stack/providers/impls/builtin/models/models.py
+++ b/llama_stack/providers/impls/builtin/models/models.py
@ -1,113 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-import asyncio
-
-from typing import AsyncIterator, Union
-
-from llama_models.llama3.api.datatypes import StopReason
-from llama_models.sku_list import resolve_model
-
-from llama_stack.distribution.distribution import Api, api_providers
-
-from llama_stack.apis.models import *  # noqa: F403
-from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_models.datatypes import CoreModelId, Model
-from llama_models.sku_list import resolve_model
-
-from llama_stack.distribution.datatypes import *  # noqa: F403
-from termcolor import cprint
-
-
-class BuiltinModelsImpl(Models):
-    def __init__(
-        self,
-        config: StackRunConfig,
-    ) -> None:
-        self.run_config = config
-        self.models = {}
-        # check against inference & safety api
-        apis_with_models = [Api.inference, Api.safety]
-
-        all_providers = api_providers()
-
-        for api in apis_with_models:
-
-            # check against provider_map (simple case single model)
-            if api.value in config.provider_map:
-                providers_for_api = all_providers[api]
-                provider_spec = config.provider_map[api.value]
-                core_model_id = provider_spec.config
-                # get supported model ids  from the provider
-                supported_model_ids = self.get_supported_model_ids(
-                    api.value, provider_spec, providers_for_api
-                )
-                for model_id in supported_model_ids:
-                    self.models[model_id] = ModelServingSpec(
-                        llama_model=resolve_model(model_id),
-                        provider_config=provider_spec,
-                        api=api.value,
-                    )
-
-            # check against provider_routing_table (router with multiple models)
-            # with routing table, we use the routing_key as the supported models
-            if api.value in config.provider_routing_table:
-                routing_table = config.provider_routing_table[api.value]
-                for rt_entry in routing_table:
-                    model_id = rt_entry.routing_key
-                    self.models[model_id] = ModelServingSpec(
-                        llama_model=resolve_model(model_id),
-                        provider_config=GenericProviderConfig(
-                            provider_id=rt_entry.provider_id,
-                            config=rt_entry.config,
-                        ),
-                        api=api.value,
-                    )
-
-        print("BuiltinModelsImpl models", self.models)
-
-    def get_supported_model_ids(
-        self,
-        api: str,
-        provider_spec: GenericProviderConfig,
-        providers_for_api: Dict[str, ProviderSpec],
-    ) -> List[str]:
-        serving_models_list = []
-        if api == Api.inference.value:
-            provider_id = provider_spec.provider_id
-            if provider_id == "meta-reference":
-                serving_models_list.append(provider_spec.config["model"])
-            if provider_id in {
-                remote_provider_id("ollama"),
-                remote_provider_id("fireworks"),
-                remote_provider_id("together"),
-            }:
-                adapter_supported_models = providers_for_api[
-                    provider_id
-                ].adapter.supported_model_ids
-                serving_models_list.extend(adapter_supported_models)
-        elif api == Api.safety.value:
-            if provider_spec.config and "llama_guard_shield" in provider_spec.config:
-                llama_guard_shield = provider_spec.config["llama_guard_shield"]
-                serving_models_list.append(llama_guard_shield["model"])
-            if provider_spec.config and "prompt_guard_shield" in provider_spec.config:
-                prompt_guard_shield = provider_spec.config["prompt_guard_shield"]
-                serving_models_list.append(prompt_guard_shield["model"])
-        else:
-            raise NotImplementedError(f"Unsupported api {api} for builtin models")
-
-        return serving_models_list
-
-    async def initialize(self) -> None:
-        pass
-
-    async def list_models(self) -> ModelsListResponse:
-        return ModelsListResponse(models_list=list(self.models.values()))
-
-    async def get_model(self, core_model_id: str) -> ModelsGetResponse:
-        if core_model_id in self.models:
-            return ModelsGetResponse(core_model_spec=self.models[core_model_id])
-        print(f"Cannot find {core_model_id} in model registry")
-        return ModelsGetResponse()