From efe5b124f3dbff2dcff84440d5ef44d62e9329f6 Mon Sep 17 00:00:00 2001
From: Sajikumar JS <sajikumar.js@ibm.com>
Date: Thu, 17 Apr 2025 23:45:27 +0530
Subject: [PATCH] pre-commit issues fix

---
 .../remote_hosted_distro/watsonx.md           | 88 +++++++++++++++++++
 .../remote/inference/watsonx/config.py        | 10 +--
 .../remote/inference/watsonx/models.py        |  4 +-
 .../remote/inference/watsonx/watsonx.py       | 39 ++++----
 llama_stack/templates/dependencies.json       |  8 +-
 llama_stack/templates/watsonx/doc_template.md |  2 +-
 llama_stack/templates/watsonx/run.yaml        | 87 +++++++++++++++++-
 7 files changed, 207 insertions(+), 31 deletions(-)
 create mode 100644 docs/source/distributions/remote_hosted_distro/watsonx.md

diff --git a/docs/source/distributions/remote_hosted_distro/watsonx.md b/docs/source/distributions/remote_hosted_distro/watsonx.md
new file mode 100644
index 000000000..248903d73
--- /dev/null
+++ b/docs/source/distributions/remote_hosted_distro/watsonx.md
@@ -0,0 +1,88 @@
+---
+orphan: true
+---
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
+# WatsonX Distribution
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
+The `llamastack/distribution-watsonx` distribution consists of the following provider configurations.
+
+| API | Provider(s) |
+|-----|-------------|
+| agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
+| inference | `remote::watsonx` |
+| safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
+| telemetry | `inline::meta-reference` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
+| vector_io | `inline::faiss` |
+
+
+
+### Environment Variables
+
+The following environment variables can be configured:
+
+- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
+- `WATSONX_API_KEY`: Watsonx API Key (default: ``)
+- `WATSONX_PROJECT_ID`: Watsonx Project ID (default: ``)
+
+### Models
+
+The following models are available by default:
+
+- `meta-llama/llama-3-3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
+- `meta-llama/llama-2-13b-chat (aliases: meta-llama/Llama-2-13b)`
+- `meta-llama/llama-3-1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
+- `meta-llama/llama-3-1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
+- `meta-llama/llama-3-2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
+- `meta-llama/llama-3-2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
+- `meta-llama/llama-3-2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
+- `meta-llama/llama-3-2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
+- `meta-llama/llama-guard-3-11b-vision (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
+
+
+### Prerequisite: API Keys
+
+Make sure you have access to a WatsonX API Key. You can get one by referring [watsonx.ai](https://www.ibm.com/docs/en/masv-and-l/maximo-manage/continuous-delivery?topic=setup-create-watsonx-api-key).
+
+
+## Running Llama Stack with WatsonX
+
+You can do this via Conda (build code), venv or Docker which has a pre-built image.
+
+### Via Docker
+
+This method allows you to get started quickly without having to build the distribution code.
+
+```bash
+LLAMA_STACK_PORT=5001
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ./run.yaml:/root/my-run.yaml \
+  llamastack/distribution-watsonx \
+  --yaml-config /root/my-run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env WATSONX_API_KEY=$WATSONX_API_KEY \
+  --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
+  --env WATSONX_BASE_URL=$WATSONX_BASE_URL
+```
+
+### Via Conda
+
+```bash
+llama stack build --template watsonx --image-type conda
+llama stack run ./run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env WATSONX_API_KEY=$WATSONX_API_KEY \
+  --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID
+```
diff --git a/llama_stack/providers/remote/inference/watsonx/config.py b/llama_stack/providers/remote/inference/watsonx/config.py
index 2f25b54af..3cb14f4e7 100644
--- a/llama_stack/providers/remote/inference/watsonx/config.py
+++ b/llama_stack/providers/remote/inference/watsonx/config.py
@@ -5,10 +5,11 @@
 # the root directory of this source tree.
 
 import os
-from typing import Optional, Dict, Any
+from typing import Any, Dict, Optional
+
+from pydantic import BaseModel, Field, SecretStr
 
 from llama_stack.schema_utils import json_schema_type
-from pydantic import BaseModel, Field, SecretStr
 
 
 class WatsonXProviderDataValidator(BaseModel):
@@ -19,7 +20,6 @@ class WatsonXProviderDataValidator(BaseModel):
 
 @json_schema_type
 class WatsonXConfig(BaseModel):
-
     url: str = Field(
         default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"),
         description="A base url for accessing the Watsonx.ai",
@@ -42,5 +42,5 @@ class WatsonXConfig(BaseModel):
         return {
             "url": "${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}",
             "api_key": "${env.WATSONX_API_KEY:}",
-            "project_id": "${env.WATSONX_PROJECT_ID:}"
-        }
\ No newline at end of file
+            "project_id": "${env.WATSONX_PROJECT_ID:}",
+        }
diff --git a/llama_stack/providers/remote/inference/watsonx/models.py b/llama_stack/providers/remote/inference/watsonx/models.py
index 1eaafc4ce..d98f0510a 100644
--- a/llama_stack/providers/remote/inference/watsonx/models.py
+++ b/llama_stack/providers/remote/inference/watsonx/models.py
@@ -43,7 +43,5 @@ MODEL_ENTRIES = [
     build_hf_repo_model_entry(
         "meta-llama/llama-guard-3-11b-vision",
         CoreModelId.llama_guard_3_11b_vision.value,
-    )
-    
+    ),
 ]
-
diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py
index 10ba1e484..1bb8a6bbf 100644
--- a/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -6,8 +6,10 @@
 
 from typing import AsyncGenerator, List, Optional, Union
 
+from ibm_watson_machine_learning.foundation_models import Model
+from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
+
 from llama_stack.apis.common.content_types import InterleavedContent, InterleavedContentItem
-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from llama_stack.apis.inference import (
     ChatCompletionRequest,
     ChatCompletionResponse,
@@ -18,7 +20,6 @@ from llama_stack.apis.inference import (
     LogProbConfig,
     Message,
     ResponseFormat,
-    ResponseFormatType,
     SamplingParams,
     TextTruncation,
     ToolChoice,
@@ -26,6 +27,7 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from llama_stack.providers.utils.inference.openai_compat import (
     OpenAICompatCompletionChoice,
     OpenAICompatCompletionResponse,
@@ -41,14 +43,9 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )
 
 from . import WatsonXConfig
-
-from ibm_watson_machine_learning.foundation_models import Model
-from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
-
 from .models import MODEL_ENTRIES
 
 
-
 class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
     def __init__(self, config: WatsonXConfig) -> None:
         ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
@@ -94,12 +91,9 @@ class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
         config_api_key = self._config.api_key.get_secret_value() if self._config.api_key else None
         config_url = self._config.url
         project_id = self._config.project_id
-        credentials = {
-            "url": config_url,
-            "apikey": config_api_key
-        }
+        credentials = {"url": config_url, "apikey": config_api_key}
 
-        return Model(model_id=model_id,credentials=credentials, project_id=project_id)
+        return Model(model_id=model_id, credentials=credentials, project_id=project_id)
 
     async def _nonstream_completion(self, request: CompletionRequest) -> ChatCompletionResponse:
         params = await self._get_params(request)
@@ -186,6 +180,7 @@ class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
     async def _stream_chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator:
         params = await self._get_params(request)
         model_id = request.model
+
         # if we shift to TogetherAsyncClient, we won't need this wrapper
         async def _to_async_generator():
             s = self._get_client(model_id).generate_text_stream(**params)
@@ -225,19 +220,29 @@ class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
             if request.sampling_params.additional_params.get("temperature"):
                 input_dict["params"][GenParams.TEMPERATURE] = request.sampling_params.additional_params["temperature"]
             if request.sampling_params.additional_params.get("length_penalty"):
-                input_dict["params"][GenParams.LENGTH_PENALTY] = request.sampling_params.additional_params["length_penalty"]
+                input_dict["params"][GenParams.LENGTH_PENALTY] = request.sampling_params.additional_params[
+                    "length_penalty"
+                ]
             if request.sampling_params.additional_params.get("random_seed"):
                 input_dict["params"][GenParams.RANDOM_SEED] = request.sampling_params.additional_params["random_seed"]
             if request.sampling_params.additional_params.get("min_new_tokens"):
-                input_dict["params"][GenParams.MIN_NEW_TOKENS] = request.sampling_params.additional_params["min_new_tokens"]
+                input_dict["params"][GenParams.MIN_NEW_TOKENS] = request.sampling_params.additional_params[
+                    "min_new_tokens"
+                ]
             if request.sampling_params.additional_params.get("stop_sequences"):
-                input_dict["params"][GenParams.STOP_SEQUENCES] = request.sampling_params.additional_params["stop_sequences"]
+                input_dict["params"][GenParams.STOP_SEQUENCES] = request.sampling_params.additional_params[
+                    "stop_sequences"
+                ]
             if request.sampling_params.additional_params.get("time_limit"):
                 input_dict["params"][GenParams.TIME_LIMIT] = request.sampling_params.additional_params["time_limit"]
             if request.sampling_params.additional_params.get("truncate_input_tokens"):
-                input_dict["params"][GenParams.TRUNCATE_INPUT_TOKENS] = request.sampling_params.additional_params["truncate_input_tokens"]
+                input_dict["params"][GenParams.TRUNCATE_INPUT_TOKENS] = request.sampling_params.additional_params[
+                    "truncate_input_tokens"
+                ]
             if request.sampling_params.additional_params.get("return_options"):
-                input_dict["params"][GenParams.RETURN_OPTIONS] = request.sampling_params.additional_params["return_options"]
+                input_dict["params"][GenParams.RETURN_OPTIONS] = request.sampling_params.additional_params[
+                    "return_options"
+                ]
 
         params = {
             **input_dict,
diff --git a/llama_stack/templates/dependencies.json b/llama_stack/templates/dependencies.json
index 41424304d..90215b537 100644
--- a/llama_stack/templates/dependencies.json
+++ b/llama_stack/templates/dependencies.json
@@ -766,10 +766,13 @@
     "blobfile",
     "chardet",
     "datasets",
+    "emoji",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
+    "ibm_watson_machine_learning",
+    "langdetect",
     "matplotlib",
     "mcp",
     "nltk",
@@ -782,6 +785,7 @@
     "psycopg2-binary",
     "pymongo",
     "pypdf",
+    "pythainlp",
     "redis",
     "requests",
     "scikit-learn",
@@ -789,7 +793,7 @@
     "sentencepiece",
     "tqdm",
     "transformers",
-    "uvicorn",
-    "ibm_watson_machine_learning"
+    "tree_sitter",
+    "uvicorn"
   ]
 }
diff --git a/llama_stack/templates/watsonx/doc_template.md b/llama_stack/templates/watsonx/doc_template.md
index 11c0cb7b5..e56a32bbf 100644
--- a/llama_stack/templates/watsonx/doc_template.md
+++ b/llama_stack/templates/watsonx/doc_template.md
@@ -60,7 +60,7 @@ docker run \
   --port $LLAMA_STACK_PORT \
   --env WATSONX_API_KEY=$WATSONX_API_KEY \
   --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
-  --env WATSONX_BASE_URL=$WATSONX_BASE_URL 
+  --env WATSONX_BASE_URL=$WATSONX_BASE_URL
 ```
 
 ### Via Conda
diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml
index 568837d6a..1048f7192 100644
--- a/llama_stack/templates/watsonx/run.yaml
+++ b/llama_stack/templates/watsonx/run.yaml
@@ -17,6 +17,7 @@ providers:
     config:
       url: ${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}
       api_key: ${env.WATSONX_API_KEY:}
+      project_id: ${env.WATSONX_PROJECT_ID:}
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
@@ -25,6 +26,11 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
   agents:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -37,7 +43,7 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
       sinks: ${env.TELEMETRY_SINKS:console,sqlite}
       sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/watsonx/trace_store.db}
   eval:
@@ -49,6 +55,13 @@ providers:
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/meta_reference_eval.db
   datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
@@ -60,10 +73,33 @@ providers:
   - provider_id: basic
     provider_type: inline::basic
     config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
   tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: code-interpreter
+    provider_type: inline::code-interpreter
+    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/registry.db
@@ -73,57 +109,102 @@ models:
   provider_id: watsonx
   provider_model_id: meta-llama/llama-3-3-70b-instruct
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.3-70B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-3-70b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/llama-2-13b-chat
   provider_id: watsonx
   provider_model_id: meta-llama/llama-2-13b-chat
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-2-13b
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-2-13b-chat
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/llama-3-1-70b-instruct
   provider_id: watsonx
   provider_model_id: meta-llama/llama-3-1-70b-instruct
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-70B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-1-70b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/llama-3-1-8b-instruct
   provider_id: watsonx
   provider_model_id: meta-llama/llama-3-1-8b-instruct
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-8B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-1-8b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/llama-3-2-11b-vision-instruct
   provider_id: watsonx
   provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/llama-3-2-1b-instruct
   provider_id: watsonx
   provider_model_id: meta-llama/llama-3-2-1b-instruct
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-1B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-1b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/llama-3-2-3b-instruct
   provider_id: watsonx
   provider_model_id: meta-llama/llama-3-2-3b-instruct
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-3B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-3b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/llama-3-2-90b-vision-instruct
   provider_id: watsonx
   provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
   model_type: llm
 - metadata: {}
-  model_id: meta-llama/llama-3-405b-instruct
+  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
   provider_id: watsonx
-  provider_model_id: meta-llama/llama-3-405b-instruct
+  provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/llama-guard-3-11b-vision
   provider_id: watsonx
   provider_model_id: meta-llama/llama-guard-3-11b-vision
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-Guard-3-11B-Vision
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-guard-3-11b-vision
+  model_type: llm
 shields: []
 vector_dbs: []
 datasets: []
 scoring_fns: []
 benchmarks: []
 tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
+- toolgroup_id: builtin::code_interpreter
+  provider_id: code-interpreter
 server:
   port: 8321