pre-commit issues fix

This commit is contained in:
Sajikumar JS 2025-04-17 23:45:27 +05:30
parent 34a3f1a749
commit efe5b124f3
7 changed files with 207 additions and 31 deletions

View file

@ -0,0 +1,88 @@
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# WatsonX Distribution
```{toctree}
:maxdepth: 2
:hidden:
self
```
The `llamastack/distribution-watsonx` distribution consists of the following provider configurations.
| API | Provider(s) |
|-----|-------------|
| agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` |
| inference | `remote::watsonx` |
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
| vector_io | `inline::faiss` |
### Environment Variables
The following environment variables can be configured:
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
- `WATSONX_API_KEY`: Watsonx API Key (default: ``)
- `WATSONX_PROJECT_ID`: Watsonx Project ID (default: ``)
### Models
The following models are available by default:
- `meta-llama/llama-3-3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
- `meta-llama/llama-2-13b-chat (aliases: meta-llama/Llama-2-13b)`
- `meta-llama/llama-3-1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta-llama/llama-3-1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/llama-3-2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
- `meta-llama/llama-3-2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
- `meta-llama/llama-3-2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta-llama/llama-3-2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
- `meta-llama/llama-guard-3-11b-vision (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
### Prerequisite: API Keys
Make sure you have access to a WatsonX API Key. You can get one by referring [watsonx.ai](https://www.ibm.com/docs/en/masv-and-l/maximo-manage/continuous-delivery?topic=setup-create-watsonx-api-key).
## Running Llama Stack with WatsonX
You can do this via Conda (build code), venv or Docker which has a pre-built image.
### Via Docker
This method allows you to get started quickly without having to build the distribution code.
```bash
LLAMA_STACK_PORT=5001
docker run \
-it \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ./run.yaml:/root/my-run.yaml \
llamastack/distribution-watsonx \
--yaml-config /root/my-run.yaml \
--port $LLAMA_STACK_PORT \
--env WATSONX_API_KEY=$WATSONX_API_KEY \
--env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
--env WATSONX_BASE_URL=$WATSONX_BASE_URL
```
### Via Conda
```bash
llama stack build --template watsonx --image-type conda
llama stack run ./run.yaml \
--port $LLAMA_STACK_PORT \
--env WATSONX_API_KEY=$WATSONX_API_KEY \
--env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID
```

View file

@ -5,10 +5,11 @@
# the root directory of this source tree. # the root directory of this source tree.
import os import os
from typing import Optional, Dict, Any from typing import Any, Dict, Optional
from pydantic import BaseModel, Field, SecretStr
from llama_stack.schema_utils import json_schema_type from llama_stack.schema_utils import json_schema_type
from pydantic import BaseModel, Field, SecretStr
class WatsonXProviderDataValidator(BaseModel): class WatsonXProviderDataValidator(BaseModel):
@ -19,7 +20,6 @@ class WatsonXProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class WatsonXConfig(BaseModel): class WatsonXConfig(BaseModel):
url: str = Field( url: str = Field(
default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"), default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"),
description="A base url for accessing the Watsonx.ai", description="A base url for accessing the Watsonx.ai",
@ -42,5 +42,5 @@ class WatsonXConfig(BaseModel):
return { return {
"url": "${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}", "url": "${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}",
"api_key": "${env.WATSONX_API_KEY:}", "api_key": "${env.WATSONX_API_KEY:}",
"project_id": "${env.WATSONX_PROJECT_ID:}" "project_id": "${env.WATSONX_PROJECT_ID:}",
} }

View file

@ -43,7 +43,5 @@ MODEL_ENTRIES = [
build_hf_repo_model_entry( build_hf_repo_model_entry(
"meta-llama/llama-guard-3-11b-vision", "meta-llama/llama-guard-3-11b-vision",
CoreModelId.llama_guard_3_11b_vision.value, CoreModelId.llama_guard_3_11b_vision.value,
) ),
] ]

View file

@ -6,8 +6,10 @@
from typing import AsyncGenerator, List, Optional, Union from typing import AsyncGenerator, List, Optional, Union
from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
from llama_stack.apis.common.content_types import InterleavedContent, InterleavedContentItem from llama_stack.apis.common.content_types import InterleavedContent, InterleavedContentItem
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from llama_stack.apis.inference import ( from llama_stack.apis.inference import (
ChatCompletionRequest, ChatCompletionRequest,
ChatCompletionResponse, ChatCompletionResponse,
@ -18,7 +20,6 @@ from llama_stack.apis.inference import (
LogProbConfig, LogProbConfig,
Message, Message,
ResponseFormat, ResponseFormat,
ResponseFormatType,
SamplingParams, SamplingParams,
TextTruncation, TextTruncation,
ToolChoice, ToolChoice,
@ -26,6 +27,7 @@ from llama_stack.apis.inference import (
ToolDefinition, ToolDefinition,
ToolPromptFormat, ToolPromptFormat,
) )
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from llama_stack.providers.utils.inference.openai_compat import ( from llama_stack.providers.utils.inference.openai_compat import (
OpenAICompatCompletionChoice, OpenAICompatCompletionChoice,
OpenAICompatCompletionResponse, OpenAICompatCompletionResponse,
@ -41,14 +43,9 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
) )
from . import WatsonXConfig from . import WatsonXConfig
from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
from .models import MODEL_ENTRIES from .models import MODEL_ENTRIES
class WatsonXInferenceAdapter(Inference, ModelRegistryHelper): class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
def __init__(self, config: WatsonXConfig) -> None: def __init__(self, config: WatsonXConfig) -> None:
ModelRegistryHelper.__init__(self, MODEL_ENTRIES) ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
@ -94,12 +91,9 @@ class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
config_api_key = self._config.api_key.get_secret_value() if self._config.api_key else None config_api_key = self._config.api_key.get_secret_value() if self._config.api_key else None
config_url = self._config.url config_url = self._config.url
project_id = self._config.project_id project_id = self._config.project_id
credentials = { credentials = {"url": config_url, "apikey": config_api_key}
"url": config_url,
"apikey": config_api_key
}
return Model(model_id=model_id,credentials=credentials, project_id=project_id) return Model(model_id=model_id, credentials=credentials, project_id=project_id)
async def _nonstream_completion(self, request: CompletionRequest) -> ChatCompletionResponse: async def _nonstream_completion(self, request: CompletionRequest) -> ChatCompletionResponse:
params = await self._get_params(request) params = await self._get_params(request)
@ -186,6 +180,7 @@ class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
async def _stream_chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator: async def _stream_chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator:
params = await self._get_params(request) params = await self._get_params(request)
model_id = request.model model_id = request.model
# if we shift to TogetherAsyncClient, we won't need this wrapper # if we shift to TogetherAsyncClient, we won't need this wrapper
async def _to_async_generator(): async def _to_async_generator():
s = self._get_client(model_id).generate_text_stream(**params) s = self._get_client(model_id).generate_text_stream(**params)
@ -225,19 +220,29 @@ class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
if request.sampling_params.additional_params.get("temperature"): if request.sampling_params.additional_params.get("temperature"):
input_dict["params"][GenParams.TEMPERATURE] = request.sampling_params.additional_params["temperature"] input_dict["params"][GenParams.TEMPERATURE] = request.sampling_params.additional_params["temperature"]
if request.sampling_params.additional_params.get("length_penalty"): if request.sampling_params.additional_params.get("length_penalty"):
input_dict["params"][GenParams.LENGTH_PENALTY] = request.sampling_params.additional_params["length_penalty"] input_dict["params"][GenParams.LENGTH_PENALTY] = request.sampling_params.additional_params[
"length_penalty"
]
if request.sampling_params.additional_params.get("random_seed"): if request.sampling_params.additional_params.get("random_seed"):
input_dict["params"][GenParams.RANDOM_SEED] = request.sampling_params.additional_params["random_seed"] input_dict["params"][GenParams.RANDOM_SEED] = request.sampling_params.additional_params["random_seed"]
if request.sampling_params.additional_params.get("min_new_tokens"): if request.sampling_params.additional_params.get("min_new_tokens"):
input_dict["params"][GenParams.MIN_NEW_TOKENS] = request.sampling_params.additional_params["min_new_tokens"] input_dict["params"][GenParams.MIN_NEW_TOKENS] = request.sampling_params.additional_params[
"min_new_tokens"
]
if request.sampling_params.additional_params.get("stop_sequences"): if request.sampling_params.additional_params.get("stop_sequences"):
input_dict["params"][GenParams.STOP_SEQUENCES] = request.sampling_params.additional_params["stop_sequences"] input_dict["params"][GenParams.STOP_SEQUENCES] = request.sampling_params.additional_params[
"stop_sequences"
]
if request.sampling_params.additional_params.get("time_limit"): if request.sampling_params.additional_params.get("time_limit"):
input_dict["params"][GenParams.TIME_LIMIT] = request.sampling_params.additional_params["time_limit"] input_dict["params"][GenParams.TIME_LIMIT] = request.sampling_params.additional_params["time_limit"]
if request.sampling_params.additional_params.get("truncate_input_tokens"): if request.sampling_params.additional_params.get("truncate_input_tokens"):
input_dict["params"][GenParams.TRUNCATE_INPUT_TOKENS] = request.sampling_params.additional_params["truncate_input_tokens"] input_dict["params"][GenParams.TRUNCATE_INPUT_TOKENS] = request.sampling_params.additional_params[
"truncate_input_tokens"
]
if request.sampling_params.additional_params.get("return_options"): if request.sampling_params.additional_params.get("return_options"):
input_dict["params"][GenParams.RETURN_OPTIONS] = request.sampling_params.additional_params["return_options"] input_dict["params"][GenParams.RETURN_OPTIONS] = request.sampling_params.additional_params[
"return_options"
]
params = { params = {
**input_dict, **input_dict,

View file

@ -766,10 +766,13 @@
"blobfile", "blobfile",
"chardet", "chardet",
"datasets", "datasets",
"emoji",
"faiss-cpu", "faiss-cpu",
"fastapi", "fastapi",
"fire", "fire",
"httpx", "httpx",
"ibm_watson_machine_learning",
"langdetect",
"matplotlib", "matplotlib",
"mcp", "mcp",
"nltk", "nltk",
@ -782,6 +785,7 @@
"psycopg2-binary", "psycopg2-binary",
"pymongo", "pymongo",
"pypdf", "pypdf",
"pythainlp",
"redis", "redis",
"requests", "requests",
"scikit-learn", "scikit-learn",
@ -789,7 +793,7 @@
"sentencepiece", "sentencepiece",
"tqdm", "tqdm",
"transformers", "transformers",
"uvicorn", "tree_sitter",
"ibm_watson_machine_learning" "uvicorn"
] ]
} }

View file

@ -17,6 +17,7 @@ providers:
config: config:
url: ${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com} url: ${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}
api_key: ${env.WATSONX_API_KEY:} api_key: ${env.WATSONX_API_KEY:}
project_id: ${env.WATSONX_PROJECT_ID:}
vector_io: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
@ -25,6 +26,11 @@ providers:
type: sqlite type: sqlite
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents: agents:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
@ -37,7 +43,7 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack} service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
sinks: ${env.TELEMETRY_SINKS:console,sqlite} sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/watsonx/trace_store.db} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/watsonx/trace_store.db}
eval: eval:
@ -49,6 +55,13 @@ providers:
namespace: null namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
@ -60,10 +73,33 @@ providers:
- provider_id: basic - provider_id: basic
provider_type: inline::basic provider_type: inline::basic
config: {} config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime: tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: rag-runtime - provider_id: rag-runtime
provider_type: inline::rag-runtime provider_type: inline::rag-runtime
config: {} config: {}
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
config: {}
metadata_store: metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/registry.db
@ -73,57 +109,102 @@ models:
provider_id: watsonx provider_id: watsonx
provider_model_id: meta-llama/llama-3-3-70b-instruct provider_model_id: meta-llama/llama-3-3-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-3-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/llama-2-13b-chat model_id: meta-llama/llama-2-13b-chat
provider_id: watsonx provider_id: watsonx
provider_model_id: meta-llama/llama-2-13b-chat provider_model_id: meta-llama/llama-2-13b-chat
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-2-13b
provider_id: watsonx
provider_model_id: meta-llama/llama-2-13b-chat
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/llama-3-1-70b-instruct model_id: meta-llama/llama-3-1-70b-instruct
provider_id: watsonx provider_id: watsonx
provider_model_id: meta-llama/llama-3-1-70b-instruct provider_model_id: meta-llama/llama-3-1-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-1-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/llama-3-1-8b-instruct model_id: meta-llama/llama-3-1-8b-instruct
provider_id: watsonx provider_id: watsonx
provider_model_id: meta-llama/llama-3-1-8b-instruct provider_model_id: meta-llama/llama-3-1-8b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-1-8b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/llama-3-2-11b-vision-instruct model_id: meta-llama/llama-3-2-11b-vision-instruct
provider_id: watsonx provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-11b-vision-instruct provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/llama-3-2-1b-instruct model_id: meta-llama/llama-3-2-1b-instruct
provider_id: watsonx provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-1b-instruct provider_model_id: meta-llama/llama-3-2-1b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-1b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/llama-3-2-3b-instruct model_id: meta-llama/llama-3-2-3b-instruct
provider_id: watsonx provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-3b-instruct provider_model_id: meta-llama/llama-3-2-3b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-3b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/llama-3-2-90b-vision-instruct model_id: meta-llama/llama-3-2-90b-vision-instruct
provider_id: watsonx provider_id: watsonx
provider_model_id: meta-llama/llama-3-2-90b-vision-instruct provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/llama-3-405b-instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: watsonx provider_id: watsonx
provider_model_id: meta-llama/llama-3-405b-instruct provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/llama-guard-3-11b-vision model_id: meta-llama/llama-guard-3-11b-vision
provider_id: watsonx provider_id: watsonx
provider_model_id: meta-llama/llama-guard-3-11b-vision provider_model_id: meta-llama/llama-guard-3-11b-vision
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: watsonx
provider_model_id: meta-llama/llama-guard-3-11b-vision
model_type: llm
shields: [] shields: []
vector_dbs: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
benchmarks: [] benchmarks: []
tool_groups: tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag - toolgroup_id: builtin::rag
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server: server:
port: 8321 port: 8321