diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py index 2c051719b..8689ead8e 100644 --- a/llama_stack/providers/remote/inference/watsonx/watsonx.py +++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py @@ -283,8 +283,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin): # ... provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}" if "embedding" in functions: - embedding_dimension = model_spec["model_limits"]["embedding_dimension"] - context_length = model_spec["model_limits"]["max_sequence_length"] + embedding_dimension = model_spec.get("model_limits", {}).get("embedding_dimension", 0) + context_length = model_spec.get("model_limits", {}).get("max_sequence_length", 0) embedding_metadata = { "embedding_dimension": embedding_dimension, "context_length": context_length, @@ -306,10 +306,6 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin): metadata={}, model_type=ModelType.llm, ) - # In theory, I guess it is possible that a model could be both an embedding model and a text chat model. - # In that case, the cache will record the generator Model object, and the list which we return will have - # both the generator Model object and the text chat Model object. That's fine because the cache is - # only used for check_model_availability() anyway. self._model_cache[provider_resource_id] = model models.append(model) return models