From ecafe40a84ac17a4a6c36577bf8d2589d4499735 Mon Sep 17 00:00:00 2001 From: Bill Murdock Date: Fri, 3 Oct 2025 16:16:55 -0400 Subject: [PATCH] fix: Fix embedding model listing and usage for watsonx Signed-off-by: Bill Murdock --- .../remote/inference/watsonx/watsonx.py | 56 +++++++++++-------- .../utils/inference/openai_compat.py | 6 +- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py index 9584789e3..1a7e2b6c3 100644 --- a/llama_stack/providers/remote/inference/watsonx/watsonx.py +++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py @@ -56,15 +56,40 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin): async def list_models(self) -> list[Model] | None: models = [] for model_spec in self._get_model_specs(): - models.append( - Model( - identifier=model_spec["model_id"], - provider_resource_id=f"{self.__provider_id__}/{model_spec['model_id']}", - provider_id=self.__provider_id__, - metadata={}, - model_type=ModelType.llm, + functions = [f['id'] for f in model_spec.get("functions", [])] + # Format: {"embedding_dimension": 1536, "context_length": 8192} + + # Example of an embedding model: + # {'model_id': 'ibm/granite-embedding-278m-multilingual', + # 'label': 'granite-embedding-278m-multilingual', + # 'model_limits': {'max_sequence_length': 512, 'embedding_dimension': 768}, + # ... + if "embedding" in functions: + embedding_dimension = model_spec["model_limits"]["embedding_dimension"] + context_length = model_spec["model_limits"]["max_sequence_length"] + embedding_metadata = { + "embedding_dimension": embedding_dimension, + "context_length": context_length, + } + models.append( + Model( + identifier=model_spec["model_id"], + provider_resource_id=f"{self.__provider_id__}/{model_spec['model_id']}", + provider_id=self.__provider_id__, + metadata=embedding_metadata, + model_type=ModelType.embedding, + ) + ) + if "text_chat" in functions: + models.append( + Model( + identifier=model_spec["model_id"], + provider_resource_id=f"{self.__provider_id__}/{model_spec['model_id']}", + provider_id=self.__provider_id__, + metadata={}, + model_type=ModelType.llm, + ) ) - ) return models # LiteLLM provides methods to list models for many providers, but not for watsonx.ai. @@ -91,18 +116,3 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin): if "resources" not in response_data: raise ValueError("Resources not found in response") return response_data["resources"] - - -# TO DO: Delete the test main method. -if __name__ == "__main__": - config = WatsonXConfig(url="https://us-south.ml.cloud.ibm.com", api_key="xxx", project_id="xxx", timeout=60) - adapter = WatsonXInferenceAdapter(config) - model_specs = adapter._get_model_specs() - models = asyncio.run(adapter.list_models()) - for model in models: - print(model.identifier) - print(model.provider_resource_id) - print(model.provider_id) - print(model.metadata) - print(model.model_type) - print("--------------------------------") diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index d863eb53a..4070f7a5a 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -1405,7 +1405,7 @@ def prepare_openai_embeddings_params( def b64_encode_openai_embeddings_response( - response_data: dict, encoding_format: str | None = "float" + response_data: list[dict], encoding_format: str | None = "float" ) -> list[OpenAIEmbeddingData]: """ Process the OpenAI embeddings response to encode the embeddings in base64 format if specified. @@ -1414,12 +1414,12 @@ def b64_encode_openai_embeddings_response( for i, embedding_data in enumerate(response_data): if encoding_format == "base64": byte_array = bytearray() - for embedding_value in embedding_data.embedding: + for embedding_value in embedding_data["embedding"]: byte_array.extend(struct.pack("f", float(embedding_value))) response_embedding = base64.b64encode(byte_array).decode("utf-8") else: - response_embedding = embedding_data.embedding + response_embedding = embedding_data["embedding"] data.append( OpenAIEmbeddingData( embedding=response_embedding,