mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-20 18:42:26 +00:00
removing the base64 encoding
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
e0f1788e9e
commit
5ef31f6752
3 changed files with 2 additions and 27 deletions
|
|
@ -71,7 +71,6 @@ from llama_stack.providers.utils.inference.openai_compat import (
|
|||
process_chat_completion_stream_response,
|
||||
process_completion_response,
|
||||
process_completion_stream_response,
|
||||
process_embedding_b64_encoded_input,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
chat_completion_request_to_prompt,
|
||||
|
|
@ -397,6 +396,7 @@ class OllamaInferenceAdapter(
|
|||
if model_obj.provider_resource_id is None:
|
||||
raise ValueError(f"Model {model} has no provider_resource_id set")
|
||||
|
||||
# Note, at the moment Ollama does not support encoding_format, dimensions, and user parameters
|
||||
params = prepare_openai_embeddings_params(
|
||||
model=model_obj.provider_resource_id,
|
||||
input=input,
|
||||
|
|
@ -404,9 +404,6 @@ class OllamaInferenceAdapter(
|
|||
dimensions=dimensions,
|
||||
user=user,
|
||||
)
|
||||
# Note, at the moment Ollama does not support encoding_format, dimensions, and user parameters
|
||||
# but we implement the encoding here
|
||||
params = process_embedding_b64_encoded_input(params)
|
||||
|
||||
response = await self.openai_client.embeddings.create(**params)
|
||||
data = b64_encode_openai_embeddings_response(response.data, encoding_format)
|
||||
|
|
|
|||
|
|
@ -1515,24 +1515,6 @@ def prepare_openai_embeddings_params(
|
|||
return params
|
||||
|
||||
|
||||
def process_embedding_b64_encoded_input(params: dict[str, Any]) -> dict[str, Any]:
|
||||
"""
|
||||
Process the embeddings parameters to encode the input in base64 format if specified.
|
||||
Currently implemented for ollama as base64 is not yet supported by their compatible API.
|
||||
"""
|
||||
if params.get("encoding_format") == "base64":
|
||||
processed_params = params.copy()
|
||||
input = params.get("input")
|
||||
if isinstance(input, str):
|
||||
processed_params["input"] = base64.b64encode(input.encode()).decode()
|
||||
elif isinstance(input, list):
|
||||
processed_params["input"] = [base64.b64encode(i.encode()).decode() for i in input]
|
||||
else:
|
||||
return params
|
||||
|
||||
return processed_params
|
||||
|
||||
|
||||
def b64_encode_openai_embeddings_response(
|
||||
response_data: dict, encoding_format: str | None = "float"
|
||||
) -> list[OpenAIEmbeddingData]:
|
||||
|
|
|
|||
|
|
@ -34,11 +34,7 @@ def skip_if_model_doesnt_support_variable_dimensions(model_id):
|
|||
pytest.skip("{model_id} does not support variable output embedding dimensions")
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"openai_client",
|
||||
]
|
||||
)
|
||||
@pytest.fixture(params=["openai_client", "llama_stack_client"])
|
||||
def compat_client(request, client_with_models):
|
||||
if request.param == "openai_client" and isinstance(client_with_models, LlamaStackAsLibraryClient):
|
||||
pytest.skip("OpenAI client tests not supported with library client")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue