From efe5b124f3dbff2dcff84440d5ef44d62e9329f6 Mon Sep 17 00:00:00 2001 From: Sajikumar JS Date: Thu, 17 Apr 2025 23:45:27 +0530 Subject: [PATCH] pre-commit issues fix --- .../remote_hosted_distro/watsonx.md | 88 +++++++++++++++++++ .../remote/inference/watsonx/config.py | 10 +-- .../remote/inference/watsonx/models.py | 4 +- .../remote/inference/watsonx/watsonx.py | 39 ++++---- llama_stack/templates/dependencies.json | 8 +- llama_stack/templates/watsonx/doc_template.md | 2 +- llama_stack/templates/watsonx/run.yaml | 87 +++++++++++++++++- 7 files changed, 207 insertions(+), 31 deletions(-) create mode 100644 docs/source/distributions/remote_hosted_distro/watsonx.md diff --git a/docs/source/distributions/remote_hosted_distro/watsonx.md b/docs/source/distributions/remote_hosted_distro/watsonx.md new file mode 100644 index 000000000..248903d73 --- /dev/null +++ b/docs/source/distributions/remote_hosted_distro/watsonx.md @@ -0,0 +1,88 @@ +--- +orphan: true +--- + +# WatsonX Distribution + +```{toctree} +:maxdepth: 2 +:hidden: + +self +``` + +The `llamastack/distribution-watsonx` distribution consists of the following provider configurations. + +| API | Provider(s) | +|-----|-------------| +| agents | `inline::meta-reference` | +| datasetio | `remote::huggingface`, `inline::localfs` | +| eval | `inline::meta-reference` | +| inference | `remote::watsonx` | +| safety | `inline::llama-guard` | +| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | +| telemetry | `inline::meta-reference` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` | +| vector_io | `inline::faiss` | + + + +### Environment Variables + +The following environment variables can be configured: + +- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) +- `WATSONX_API_KEY`: Watsonx API Key (default: ``) +- `WATSONX_PROJECT_ID`: Watsonx Project ID (default: ``) + +### Models + +The following models are available by default: + +- `meta-llama/llama-3-3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `meta-llama/llama-2-13b-chat (aliases: meta-llama/Llama-2-13b)` +- `meta-llama/llama-3-1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `meta-llama/llama-3-1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `meta-llama/llama-3-2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `meta-llama/llama-3-2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)` +- `meta-llama/llama-3-2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `meta-llama/llama-3-2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `meta-llama/llama-guard-3-11b-vision (aliases: meta-llama/Llama-Guard-3-11B-Vision)` + + +### Prerequisite: API Keys + +Make sure you have access to a WatsonX API Key. You can get one by referring [watsonx.ai](https://www.ibm.com/docs/en/masv-and-l/maximo-manage/continuous-delivery?topic=setup-create-watsonx-api-key). + + +## Running Llama Stack with WatsonX + +You can do this via Conda (build code), venv or Docker which has a pre-built image. + +### Via Docker + +This method allows you to get started quickly without having to build the distribution code. + +```bash +LLAMA_STACK_PORT=5001 +docker run \ + -it \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ./run.yaml:/root/my-run.yaml \ + llamastack/distribution-watsonx \ + --yaml-config /root/my-run.yaml \ + --port $LLAMA_STACK_PORT \ + --env WATSONX_API_KEY=$WATSONX_API_KEY \ + --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \ + --env WATSONX_BASE_URL=$WATSONX_BASE_URL +``` + +### Via Conda + +```bash +llama stack build --template watsonx --image-type conda +llama stack run ./run.yaml \ + --port $LLAMA_STACK_PORT \ + --env WATSONX_API_KEY=$WATSONX_API_KEY \ + --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID +``` diff --git a/llama_stack/providers/remote/inference/watsonx/config.py b/llama_stack/providers/remote/inference/watsonx/config.py index 2f25b54af..3cb14f4e7 100644 --- a/llama_stack/providers/remote/inference/watsonx/config.py +++ b/llama_stack/providers/remote/inference/watsonx/config.py @@ -5,10 +5,11 @@ # the root directory of this source tree. import os -from typing import Optional, Dict, Any +from typing import Any, Dict, Optional + +from pydantic import BaseModel, Field, SecretStr from llama_stack.schema_utils import json_schema_type -from pydantic import BaseModel, Field, SecretStr class WatsonXProviderDataValidator(BaseModel): @@ -19,7 +20,6 @@ class WatsonXProviderDataValidator(BaseModel): @json_schema_type class WatsonXConfig(BaseModel): - url: str = Field( default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"), description="A base url for accessing the Watsonx.ai", @@ -42,5 +42,5 @@ class WatsonXConfig(BaseModel): return { "url": "${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}", "api_key": "${env.WATSONX_API_KEY:}", - "project_id": "${env.WATSONX_PROJECT_ID:}" - } \ No newline at end of file + "project_id": "${env.WATSONX_PROJECT_ID:}", + } diff --git a/llama_stack/providers/remote/inference/watsonx/models.py b/llama_stack/providers/remote/inference/watsonx/models.py index 1eaafc4ce..d98f0510a 100644 --- a/llama_stack/providers/remote/inference/watsonx/models.py +++ b/llama_stack/providers/remote/inference/watsonx/models.py @@ -43,7 +43,5 @@ MODEL_ENTRIES = [ build_hf_repo_model_entry( "meta-llama/llama-guard-3-11b-vision", CoreModelId.llama_guard_3_11b_vision.value, - ) - + ), ] - diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py index 10ba1e484..1bb8a6bbf 100644 --- a/llama_stack/providers/remote/inference/watsonx/watsonx.py +++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py @@ -6,8 +6,10 @@ from typing import AsyncGenerator, List, Optional, Union +from ibm_watson_machine_learning.foundation_models import Model +from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams + from llama_stack.apis.common.content_types import InterleavedContent, InterleavedContentItem -from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.apis.inference import ( ChatCompletionRequest, ChatCompletionResponse, @@ -18,7 +20,6 @@ from llama_stack.apis.inference import ( LogProbConfig, Message, ResponseFormat, - ResponseFormatType, SamplingParams, TextTruncation, ToolChoice, @@ -26,6 +27,7 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionChoice, OpenAICompatCompletionResponse, @@ -41,14 +43,9 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( ) from . import WatsonXConfig - -from ibm_watson_machine_learning.foundation_models import Model -from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams - from .models import MODEL_ENTRIES - class WatsonXInferenceAdapter(Inference, ModelRegistryHelper): def __init__(self, config: WatsonXConfig) -> None: ModelRegistryHelper.__init__(self, MODEL_ENTRIES) @@ -94,12 +91,9 @@ class WatsonXInferenceAdapter(Inference, ModelRegistryHelper): config_api_key = self._config.api_key.get_secret_value() if self._config.api_key else None config_url = self._config.url project_id = self._config.project_id - credentials = { - "url": config_url, - "apikey": config_api_key - } + credentials = {"url": config_url, "apikey": config_api_key} - return Model(model_id=model_id,credentials=credentials, project_id=project_id) + return Model(model_id=model_id, credentials=credentials, project_id=project_id) async def _nonstream_completion(self, request: CompletionRequest) -> ChatCompletionResponse: params = await self._get_params(request) @@ -186,6 +180,7 @@ class WatsonXInferenceAdapter(Inference, ModelRegistryHelper): async def _stream_chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator: params = await self._get_params(request) model_id = request.model + # if we shift to TogetherAsyncClient, we won't need this wrapper async def _to_async_generator(): s = self._get_client(model_id).generate_text_stream(**params) @@ -225,19 +220,29 @@ class WatsonXInferenceAdapter(Inference, ModelRegistryHelper): if request.sampling_params.additional_params.get("temperature"): input_dict["params"][GenParams.TEMPERATURE] = request.sampling_params.additional_params["temperature"] if request.sampling_params.additional_params.get("length_penalty"): - input_dict["params"][GenParams.LENGTH_PENALTY] = request.sampling_params.additional_params["length_penalty"] + input_dict["params"][GenParams.LENGTH_PENALTY] = request.sampling_params.additional_params[ + "length_penalty" + ] if request.sampling_params.additional_params.get("random_seed"): input_dict["params"][GenParams.RANDOM_SEED] = request.sampling_params.additional_params["random_seed"] if request.sampling_params.additional_params.get("min_new_tokens"): - input_dict["params"][GenParams.MIN_NEW_TOKENS] = request.sampling_params.additional_params["min_new_tokens"] + input_dict["params"][GenParams.MIN_NEW_TOKENS] = request.sampling_params.additional_params[ + "min_new_tokens" + ] if request.sampling_params.additional_params.get("stop_sequences"): - input_dict["params"][GenParams.STOP_SEQUENCES] = request.sampling_params.additional_params["stop_sequences"] + input_dict["params"][GenParams.STOP_SEQUENCES] = request.sampling_params.additional_params[ + "stop_sequences" + ] if request.sampling_params.additional_params.get("time_limit"): input_dict["params"][GenParams.TIME_LIMIT] = request.sampling_params.additional_params["time_limit"] if request.sampling_params.additional_params.get("truncate_input_tokens"): - input_dict["params"][GenParams.TRUNCATE_INPUT_TOKENS] = request.sampling_params.additional_params["truncate_input_tokens"] + input_dict["params"][GenParams.TRUNCATE_INPUT_TOKENS] = request.sampling_params.additional_params[ + "truncate_input_tokens" + ] if request.sampling_params.additional_params.get("return_options"): - input_dict["params"][GenParams.RETURN_OPTIONS] = request.sampling_params.additional_params["return_options"] + input_dict["params"][GenParams.RETURN_OPTIONS] = request.sampling_params.additional_params[ + "return_options" + ] params = { **input_dict, diff --git a/llama_stack/templates/dependencies.json b/llama_stack/templates/dependencies.json index 41424304d..90215b537 100644 --- a/llama_stack/templates/dependencies.json +++ b/llama_stack/templates/dependencies.json @@ -766,10 +766,13 @@ "blobfile", "chardet", "datasets", + "emoji", "faiss-cpu", "fastapi", "fire", "httpx", + "ibm_watson_machine_learning", + "langdetect", "matplotlib", "mcp", "nltk", @@ -782,6 +785,7 @@ "psycopg2-binary", "pymongo", "pypdf", + "pythainlp", "redis", "requests", "scikit-learn", @@ -789,7 +793,7 @@ "sentencepiece", "tqdm", "transformers", - "uvicorn", - "ibm_watson_machine_learning" + "tree_sitter", + "uvicorn" ] } diff --git a/llama_stack/templates/watsonx/doc_template.md b/llama_stack/templates/watsonx/doc_template.md index 11c0cb7b5..e56a32bbf 100644 --- a/llama_stack/templates/watsonx/doc_template.md +++ b/llama_stack/templates/watsonx/doc_template.md @@ -60,7 +60,7 @@ docker run \ --port $LLAMA_STACK_PORT \ --env WATSONX_API_KEY=$WATSONX_API_KEY \ --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \ - --env WATSONX_BASE_URL=$WATSONX_BASE_URL + --env WATSONX_BASE_URL=$WATSONX_BASE_URL ``` ### Via Conda diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml index 568837d6a..1048f7192 100644 --- a/llama_stack/templates/watsonx/run.yaml +++ b/llama_stack/templates/watsonx/run.yaml @@ -17,6 +17,7 @@ providers: config: url: ${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com} api_key: ${env.WATSONX_API_KEY:} + project_id: ${env.WATSONX_PROJECT_ID:} vector_io: - provider_id: faiss provider_type: inline::faiss @@ -25,6 +26,11 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] agents: - provider_id: meta-reference provider_type: inline::meta-reference @@ -37,7 +43,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: ${env.OTEL_SERVICE_NAME:llama-stack} + service_name: "${env.OTEL_SERVICE_NAME:\u200B}" sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/watsonx/trace_store.db} eval: @@ -49,6 +55,13 @@ providers: namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/meta_reference_eval.db datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: @@ -60,10 +73,33 @@ providers: - provider_id: basic provider_type: inline::basic config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:} tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/registry.db @@ -73,57 +109,102 @@ models: provider_id: watsonx provider_model_id: meta-llama/llama-3-3-70b-instruct model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: watsonx + provider_model_id: meta-llama/llama-3-3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/llama-2-13b-chat provider_id: watsonx provider_model_id: meta-llama/llama-2-13b-chat model_type: llm +- metadata: {} + model_id: meta-llama/Llama-2-13b + provider_id: watsonx + provider_model_id: meta-llama/llama-2-13b-chat + model_type: llm - metadata: {} model_id: meta-llama/llama-3-1-70b-instruct provider_id: watsonx provider_model_id: meta-llama/llama-3-1-70b-instruct model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-70B-Instruct + provider_id: watsonx + provider_model_id: meta-llama/llama-3-1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/llama-3-1-8b-instruct provider_id: watsonx provider_model_id: meta-llama/llama-3-1-8b-instruct model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: watsonx + provider_model_id: meta-llama/llama-3-1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/llama-3-2-11b-vision-instruct provider_id: watsonx provider_model_id: meta-llama/llama-3-2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: watsonx + provider_model_id: meta-llama/llama-3-2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/llama-3-2-1b-instruct provider_id: watsonx provider_model_id: meta-llama/llama-3-2-1b-instruct model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-1B-Instruct + provider_id: watsonx + provider_model_id: meta-llama/llama-3-2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/llama-3-2-3b-instruct provider_id: watsonx provider_model_id: meta-llama/llama-3-2-3b-instruct model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct + provider_id: watsonx + provider_model_id: meta-llama/llama-3-2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/llama-3-2-90b-vision-instruct provider_id: watsonx provider_model_id: meta-llama/llama-3-2-90b-vision-instruct model_type: llm - metadata: {} - model_id: meta-llama/llama-3-405b-instruct + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: watsonx - provider_model_id: meta-llama/llama-3-405b-instruct + provider_model_id: meta-llama/llama-3-2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/llama-guard-3-11b-vision provider_id: watsonx provider_model_id: meta-llama/llama-guard-3-11b-vision model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision + provider_id: watsonx + provider_model_id: meta-llama/llama-guard-3-11b-vision + model_type: llm shields: [] vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search - toolgroup_id: builtin::rag provider_id: rag-runtime +- toolgroup_id: builtin::code_interpreter + provider_id: code-interpreter server: port: 8321