removing the base64 encoding

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
2025-12-20 18:42:26 +00:00 · 2025-06-13 14:13:20 -04:00 · 2025-06-13 14:13:20 -04:00 · 5ef31f6752
commit 5ef31f6752
parent e0f1788e9e
3 changed files with 2 additions and 27 deletions
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -71,7 +71,6 @@ from llama_stack.providers.utils.inference.openai_compat import (
    process_chat_completion_stream_response,
    process_completion_response,
    process_completion_stream_response,
-    process_embedding_b64_encoded_input,
 )
 from llama_stack.providers.utils.inference.prompt_adapter import (
    chat_completion_request_to_prompt,
@ -397,6 +396,7 @@ class OllamaInferenceAdapter(
        if model_obj.provider_resource_id is None:
            raise ValueError(f"Model {model} has no provider_resource_id set")

+        # Note, at the moment Ollama does not support encoding_format, dimensions, and user parameters
        params = prepare_openai_embeddings_params(
            model=model_obj.provider_resource_id,
            input=input,
@ -404,9 +404,6 @@ class OllamaInferenceAdapter(
            dimensions=dimensions,
            user=user,
        )
-        # Note, at the moment Ollama does not support encoding_format, dimensions, and user parameters
-        # but we implement the encoding here
-        params = process_embedding_b64_encoded_input(params)

        response = await self.openai_client.embeddings.create(**params)
        data = b64_encode_openai_embeddings_response(response.data, encoding_format)
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@ -1515,24 +1515,6 @@ def prepare_openai_embeddings_params(
    return params


-def process_embedding_b64_encoded_input(params: dict[str, Any]) -> dict[str, Any]:
-    """
-    Process the embeddings parameters to encode the input in base64 format if specified.
-        Currently implemented for ollama as base64 is not yet supported by their compatible API.
-    """
-    if params.get("encoding_format") == "base64":
-        processed_params = params.copy()
-        input = params.get("input")
-        if isinstance(input, str):
-            processed_params["input"] = base64.b64encode(input.encode()).decode()
-        elif isinstance(input, list):
-            processed_params["input"] = [base64.b64encode(i.encode()).decode() for i in input]
-    else:
-        return params
-
-    return processed_params
-
-
 def b64_encode_openai_embeddings_response(
    response_data: dict, encoding_format: str | None = "float"
 ) -> list[OpenAIEmbeddingData]:
--- a/tests/integration/inference/test_openai_embeddings.py
+++ b/tests/integration/inference/test_openai_embeddings.py
@ -34,11 +34,7 @@ def skip_if_model_doesnt_support_variable_dimensions(model_id):
        pytest.skip("{model_id} does not support variable output embedding dimensions")


-@pytest.fixture(
-    params=[
-        "openai_client",
-    ]
-)
+@pytest.fixture(params=["openai_client", "llama_stack_client"])
 def compat_client(request, client_with_models):
    if request.param == "openai_client" and isinstance(client_with_models, LlamaStackAsLibraryClient):
        pytest.skip("OpenAI client tests not supported with library client")