diff --git a/docs/docs/providers/inference/remote_watsonx.mdx b/docs/docs/providers/inference/remote_watsonx.mdx index 8cd3b2869..be82afcba 100644 --- a/docs/docs/providers/inference/remote_watsonx.mdx +++ b/docs/docs/providers/inference/remote_watsonx.mdx @@ -16,8 +16,8 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform |-------|------|----------|---------|-------------| | `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `url` | `` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx API key | -| `project_id` | `str \| None` | No | | The Project ID key | +| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx.ai API key | +| `project_id` | `str \| None` | No | | The watsonx.ai project ID | | `timeout` | `` | No | 60 | Timeout for the HTTP requests | ## Sample Configuration diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py index 1a7e2b6c3..d7337d085 100644 --- a/llama_stack/providers/remote/inference/watsonx/watsonx.py +++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import asyncio from typing import Any import requests @@ -56,14 +55,14 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin): async def list_models(self) -> list[Model] | None: models = [] for model_spec in self._get_model_specs(): - functions = [f['id'] for f in model_spec.get("functions", [])] + functions = [f["id"] for f in model_spec.get("functions", [])] # Format: {"embedding_dimension": 1536, "context_length": 8192} # Example of an embedding model: # {'model_id': 'ibm/granite-embedding-278m-multilingual', - # 'label': 'granite-embedding-278m-multilingual', + # 'label': 'granite-embedding-278m-multilingual', # 'model_limits': {'max_sequence_length': 512, 'embedding_dimension': 768}, - # ... + # ... if "embedding" in functions: embedding_dimension = model_spec["model_limits"]["embedding_dimension"] context_length = model_spec["model_limits"]["max_sequence_length"]