From 75e50e87c84dc309fea2142d1fdec24b55b465ed Mon Sep 17 00:00:00 2001 From: Wojciech-Rebisz Date: Tue, 4 Nov 2025 15:38:18 +0100 Subject: [PATCH] Refactor list_models method --- .../remote/inference/watsonx/watsonx.py | 35 +++++-------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py index b4dfc1f06..0db6d7017 100644 --- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py +++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py @@ -274,42 +274,23 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin): models = [] for model_spec in self._get_model_specs(): functions = [f["id"] for f in model_spec.get("functions", [])] - # Format: {"embedding_dimension": 1536, "context_length": 8192} - # Example of an embedding model: - # {'model_id': 'ibm/granite-embedding-278m-multilingual', - # 'label': 'granite-embedding-278m-multilingual', - # 'model_limits': {'max_sequence_length': 512, 'embedding_dimension': 768}, - # ... provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}" if "embedding" in functions: - embedding_dimension = model_spec.get("model_limits", {}).get("embedding_dimension", 0) - context_length = model_spec.get("model_limits", {}).get("max_sequence_length", 0) - embedding_metadata = { - "embedding_dimension": embedding_dimension, - "context_length": context_length, - } - model = Model( - identifier=model_spec["model_id"], - provider_resource_id=provider_resource_id, - provider_id=self.__provider_id__, - metadata=embedding_metadata, - model_type=ModelType.embedding, - ) - self._model_cache[provider_resource_id] = model - models.append(model) - if "text_chat" in functions: + model_type = ModelType.embedding + elif "text_chat" in functions: + model_type = ModelType.llm + else: + model_type = None + + if model_type is not None: model = Model( identifier=model_spec["model_id"], provider_resource_id=provider_resource_id, provider_id=self.__provider_id__, metadata={}, - model_type=ModelType.llm, + model_type=model_type, ) - # In theory, I guess it is possible that a model could be both an embedding model and a text chat model. - # In that case, the cache will record the generator Model object, and the list which we return will have - # both the generator Model object and the text chat Model object. That's fine because the cache is - # only used for check_model_availability() anyway. self._model_cache[provider_resource_id] = model models.append(model) return models