Merge branch 'main' into add-mcp-authentication-param

2025-12-04 18:13:44 +00:00 · 2025-11-05 14:12:32 -08:00 · 2025-11-05 14:12:32 -08:00 · 411b18a90f
commit 411b18a90f
parent 7db4ed7bbb 84a84ee85c
515 changed files with 1213 additions and 370490 deletions
--- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
@ -283,8 +283,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
            # ...
            provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}"
            if "embedding" in functions:
-                embedding_dimension = model_spec["model_limits"]["embedding_dimension"]
-                context_length = model_spec["model_limits"]["max_sequence_length"]
+                embedding_dimension = model_spec.get("model_limits", {}).get("embedding_dimension", 0)
+                context_length = model_spec.get("model_limits", {}).get("max_sequence_length", 0)
                embedding_metadata = {
                    "embedding_dimension": embedding_dimension,
                    "context_length": context_length,
@ -306,10 +306,6 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
                    metadata={},
                    model_type=ModelType.llm,
                )
-                # In theory, I guess it is possible that a model could be both an embedding model and a text chat model.
-                # In that case, the cache will record the generator Model object, and the list which we return will have
-                # both the generator Model object and the text chat Model object.  That's fine because the cache is
-                # only used for check_model_availability() anyway.
                self._model_cache[provider_resource_id] = model
                models.append(model)
        return models