mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-16 11:32:36 +00:00
Address review comments
Signed-off-by: Bill Murdock <bmurdock@redhat.com>
This commit is contained in:
parent
ca771cd921
commit
1d941b6aa0
9 changed files with 61 additions and 200 deletions
|
|
@ -277,7 +277,7 @@ Available Models:
|
|||
api=Api.inference,
|
||||
adapter_type="watsonx",
|
||||
provider_type="remote::watsonx",
|
||||
pip_packages=["ibm_watsonx_ai"],
|
||||
pip_packages=["litellm"],
|
||||
module="llama_stack.providers.remote.inference.watsonx",
|
||||
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
|
||||
|
|
|
|||
|
|
@ -25,12 +25,16 @@ class WatsonXConfig(RemoteInferenceProviderConfig):
|
|||
default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"),
|
||||
description="A base url for accessing the watsonx.ai",
|
||||
)
|
||||
# This seems like it should be required, but none of the other remote inference
|
||||
# providers require it, so this is optional here too for consistency.
|
||||
# The OpenAIConfig uses default=None instead, so this is following that precedent.
|
||||
api_key: SecretStr | None = Field(
|
||||
default_factory=lambda: os.getenv("WATSONX_API_KEY"),
|
||||
default=None,
|
||||
description="The watsonx.ai API key",
|
||||
)
|
||||
# As above, this is optional here too for consistency.
|
||||
project_id: str | None = Field(
|
||||
default_factory=lambda: os.getenv("WATSONX_PROJECT_ID"),
|
||||
default=None,
|
||||
description="The watsonx.ai project ID",
|
||||
)
|
||||
timeout: int = Field(
|
||||
|
|
|
|||
|
|
@ -16,9 +16,6 @@ from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOp
|
|||
|
||||
|
||||
class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
|
||||
_config: WatsonXConfig
|
||||
__provider_id__: str = "watsonx"
|
||||
|
||||
def __init__(self, config: WatsonXConfig):
|
||||
LiteLLMOpenAIMixin.__init__(
|
||||
self,
|
||||
|
|
@ -29,17 +26,9 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
|
|||
self.available_models = None
|
||||
self.config = config
|
||||
|
||||
# get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
return self.config.url
|
||||
|
||||
async def initialize(self):
|
||||
await super().initialize()
|
||||
|
||||
async def shutdown(self):
|
||||
await super().shutdown()
|
||||
|
||||
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
|
||||
# Get base parameters from parent
|
||||
params = await super()._get_params(request)
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import base64
|
||||
import struct
|
||||
from collections.abc import AsyncIterator
|
||||
from typing import Any
|
||||
|
||||
|
|
@ -16,6 +18,7 @@ from llama_stack.apis.inference import (
|
|||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAICompletion,
|
||||
OpenAIEmbeddingData,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIEmbeddingUsage,
|
||||
OpenAIMessageParam,
|
||||
|
|
@ -26,7 +29,6 @@ from llama_stack.core.request_headers import NeedsRequestProviderData
|
|||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
b64_encode_openai_embeddings_response,
|
||||
convert_message_to_openai_dict_new,
|
||||
convert_tooldef_to_openai_tool,
|
||||
get_sampling_options,
|
||||
|
|
@ -334,6 +336,7 @@ class LiteLLMOpenAIMixin(
|
|||
api_key=self.get_api_key(),
|
||||
api_base=self.api_base,
|
||||
)
|
||||
logger.info(f"params to litellm (openai compat): {params}")
|
||||
return await litellm.acompletion(**params)
|
||||
|
||||
async def check_model_availability(self, model: str) -> bool:
|
||||
|
|
@ -349,3 +352,28 @@ class LiteLLMOpenAIMixin(
|
|||
return False
|
||||
|
||||
return model in litellm.models_by_provider[self.litellm_provider_name]
|
||||
|
||||
|
||||
def b64_encode_openai_embeddings_response(
|
||||
response_data: list[dict], encoding_format: str | None = "float"
|
||||
) -> list[OpenAIEmbeddingData]:
|
||||
"""
|
||||
Process the OpenAI embeddings response to encode the embeddings in base64 format if specified.
|
||||
"""
|
||||
data = []
|
||||
for i, embedding_data in enumerate(response_data):
|
||||
if encoding_format == "base64":
|
||||
byte_array = bytearray()
|
||||
for embedding_value in embedding_data["embedding"]:
|
||||
byte_array.extend(struct.pack("f", float(embedding_value)))
|
||||
|
||||
response_embedding = base64.b64encode(byte_array).decode("utf-8")
|
||||
else:
|
||||
response_embedding = embedding_data["embedding"]
|
||||
data.append(
|
||||
OpenAIEmbeddingData(
|
||||
embedding=response_embedding,
|
||||
index=i,
|
||||
)
|
||||
)
|
||||
return data
|
||||
|
|
|
|||
|
|
@ -3,9 +3,7 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import base64
|
||||
import json
|
||||
import struct
|
||||
import time
|
||||
import uuid
|
||||
import warnings
|
||||
|
|
@ -103,7 +101,6 @@ from llama_stack.apis.inference import (
|
|||
JsonSchemaResponseFormat,
|
||||
Message,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIEmbeddingData,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
SamplingParams,
|
||||
|
|
@ -1402,28 +1399,3 @@ def prepare_openai_embeddings_params(
|
|||
params["user"] = user
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def b64_encode_openai_embeddings_response(
|
||||
response_data: list[dict], encoding_format: str | None = "float"
|
||||
) -> list[OpenAIEmbeddingData]:
|
||||
"""
|
||||
Process the OpenAI embeddings response to encode the embeddings in base64 format if specified.
|
||||
"""
|
||||
data = []
|
||||
for i, embedding_data in enumerate(response_data):
|
||||
if encoding_format == "base64":
|
||||
byte_array = bytearray()
|
||||
for embedding_value in embedding_data["embedding"]:
|
||||
byte_array.extend(struct.pack("f", float(embedding_value)))
|
||||
|
||||
response_embedding = base64.b64encode(byte_array).decode("utf-8")
|
||||
else:
|
||||
response_embedding = embedding_data["embedding"]
|
||||
data.append(
|
||||
OpenAIEmbeddingData(
|
||||
embedding=response_embedding,
|
||||
index=i,
|
||||
)
|
||||
)
|
||||
return data
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue