removing the base64 encoding

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
Francisco Javier Arceo 2025-06-13 14:13:20 -04:00
parent e0f1788e9e
commit 5ef31f6752
3 changed files with 2 additions and 27 deletions

View file

@ -71,7 +71,6 @@ from llama_stack.providers.utils.inference.openai_compat import (
process_chat_completion_stream_response, process_chat_completion_stream_response,
process_completion_response, process_completion_response,
process_completion_stream_response, process_completion_stream_response,
process_embedding_b64_encoded_input,
) )
from llama_stack.providers.utils.inference.prompt_adapter import ( from llama_stack.providers.utils.inference.prompt_adapter import (
chat_completion_request_to_prompt, chat_completion_request_to_prompt,
@ -397,6 +396,7 @@ class OllamaInferenceAdapter(
if model_obj.provider_resource_id is None: if model_obj.provider_resource_id is None:
raise ValueError(f"Model {model} has no provider_resource_id set") raise ValueError(f"Model {model} has no provider_resource_id set")
# Note, at the moment Ollama does not support encoding_format, dimensions, and user parameters
params = prepare_openai_embeddings_params( params = prepare_openai_embeddings_params(
model=model_obj.provider_resource_id, model=model_obj.provider_resource_id,
input=input, input=input,
@ -404,9 +404,6 @@ class OllamaInferenceAdapter(
dimensions=dimensions, dimensions=dimensions,
user=user, user=user,
) )
# Note, at the moment Ollama does not support encoding_format, dimensions, and user parameters
# but we implement the encoding here
params = process_embedding_b64_encoded_input(params)
response = await self.openai_client.embeddings.create(**params) response = await self.openai_client.embeddings.create(**params)
data = b64_encode_openai_embeddings_response(response.data, encoding_format) data = b64_encode_openai_embeddings_response(response.data, encoding_format)

View file

@ -1515,24 +1515,6 @@ def prepare_openai_embeddings_params(
return params return params
def process_embedding_b64_encoded_input(params: dict[str, Any]) -> dict[str, Any]:
"""
Process the embeddings parameters to encode the input in base64 format if specified.
Currently implemented for ollama as base64 is not yet supported by their compatible API.
"""
if params.get("encoding_format") == "base64":
processed_params = params.copy()
input = params.get("input")
if isinstance(input, str):
processed_params["input"] = base64.b64encode(input.encode()).decode()
elif isinstance(input, list):
processed_params["input"] = [base64.b64encode(i.encode()).decode() for i in input]
else:
return params
return processed_params
def b64_encode_openai_embeddings_response( def b64_encode_openai_embeddings_response(
response_data: dict, encoding_format: str | None = "float" response_data: dict, encoding_format: str | None = "float"
) -> list[OpenAIEmbeddingData]: ) -> list[OpenAIEmbeddingData]:

View file

@ -34,11 +34,7 @@ def skip_if_model_doesnt_support_variable_dimensions(model_id):
pytest.skip("{model_id} does not support variable output embedding dimensions") pytest.skip("{model_id} does not support variable output embedding dimensions")
@pytest.fixture( @pytest.fixture(params=["openai_client", "llama_stack_client"])
params=[
"openai_client",
]
)
def compat_client(request, client_with_models): def compat_client(request, client_with_models):
if request.param == "openai_client" and isinstance(client_with_models, LlamaStackAsLibraryClient): if request.param == "openai_client" and isinstance(client_with_models, LlamaStackAsLibraryClient):
pytest.skip("OpenAI client tests not supported with library client") pytest.skip("OpenAI client tests not supported with library client")