diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index d4bf692f8..f49348c27 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -71,7 +71,6 @@ from llama_stack.providers.utils.inference.openai_compat import ( process_chat_completion_stream_response, process_completion_response, process_completion_stream_response, - process_embedding_b64_encoded_input, ) from llama_stack.providers.utils.inference.prompt_adapter import ( chat_completion_request_to_prompt, @@ -397,6 +396,7 @@ class OllamaInferenceAdapter( if model_obj.provider_resource_id is None: raise ValueError(f"Model {model} has no provider_resource_id set") + # Note, at the moment Ollama does not support encoding_format, dimensions, and user parameters params = prepare_openai_embeddings_params( model=model_obj.provider_resource_id, input=input, @@ -404,9 +404,6 @@ class OllamaInferenceAdapter( dimensions=dimensions, user=user, ) - # Note, at the moment Ollama does not support encoding_format, dimensions, and user parameters - # but we implement the encoding here - params = process_embedding_b64_encoded_input(params) response = await self.openai_client.embeddings.create(**params) data = b64_encode_openai_embeddings_response(response.data, encoding_format) diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index 54a8d4de6..5f0f7fa58 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -1515,24 +1515,6 @@ def prepare_openai_embeddings_params( return params -def process_embedding_b64_encoded_input(params: dict[str, Any]) -> dict[str, Any]: - """ - Process the embeddings parameters to encode the input in base64 format if specified. - Currently implemented for ollama as base64 is not yet supported by their compatible API. - """ - if params.get("encoding_format") == "base64": - processed_params = params.copy() - input = params.get("input") - if isinstance(input, str): - processed_params["input"] = base64.b64encode(input.encode()).decode() - elif isinstance(input, list): - processed_params["input"] = [base64.b64encode(i.encode()).decode() for i in input] - else: - return params - - return processed_params - - def b64_encode_openai_embeddings_response( response_data: dict, encoding_format: str | None = "float" ) -> list[OpenAIEmbeddingData]: diff --git a/tests/integration/inference/test_openai_embeddings.py b/tests/integration/inference/test_openai_embeddings.py index 6d5068a34..1b8bd9038 100644 --- a/tests/integration/inference/test_openai_embeddings.py +++ b/tests/integration/inference/test_openai_embeddings.py @@ -34,11 +34,7 @@ def skip_if_model_doesnt_support_variable_dimensions(model_id): pytest.skip("{model_id} does not support variable output embedding dimensions") -@pytest.fixture( - params=[ - "openai_client", - ] -) +@pytest.fixture(params=["openai_client", "llama_stack_client"]) def compat_client(request, client_with_models): if request.param == "openai_client" and isinstance(client_with_models, LlamaStackAsLibraryClient): pytest.skip("OpenAI client tests not supported with library client")