refactoring some code into openai_compat

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
Francisco Javier Arceo 2025-06-13 12:06:58 -04:00
parent 89d4a05303
commit e0f1788e9e
4 changed files with 95 additions and 40 deletions

View file

@ -45,7 +45,6 @@ from llama_stack.apis.inference.inference import (
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAICompletion,
OpenAIEmbeddingData,
OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage,
OpenAIMessageParam,
@ -64,12 +63,15 @@ from llama_stack.providers.utils.inference.model_registry import (
from llama_stack.providers.utils.inference.openai_compat import (
OpenAICompatCompletionChoice,
OpenAICompatCompletionResponse,
b64_encode_openai_embeddings_response,
get_sampling_options,
prepare_openai_completion_params,
prepare_openai_embeddings_params,
process_chat_completion_response,
process_chat_completion_stream_response,
process_completion_response,
process_completion_stream_response,
process_embedding_b64_encoded_input,
)
from llama_stack.providers.utils.inference.prompt_adapter import (
chat_completion_request_to_prompt,
@ -392,28 +394,22 @@ class OllamaInferenceAdapter(
if model_obj.model_type != ModelType.embedding:
raise ValueError(f"Model {model} is not an embedding model")
params: dict[str, Any] = {
"model": model_obj.provider_resource_id,
"input": input,
}
if model_obj.provider_resource_id is None:
raise ValueError(f"Model {model} has no provider_resource_id set")
params = prepare_openai_embeddings_params(
model=model_obj.provider_resource_id,
input=input,
encoding_format=encoding_format,
dimensions=dimensions,
user=user,
)
# Note, at the moment Ollama does not support encoding_format, dimensions, and user parameters
if encoding_format is not None:
params["encoding_format"] = encoding_format
if dimensions is not None:
params["dimensions"] = dimensions
if user is not None:
params["user"] = user
# but we implement the encoding here
params = process_embedding_b64_encoded_input(params)
response = await self.openai_client.embeddings.create(**params)
data = []
for i, embedding_data in enumerate(response.data):
data.append(
OpenAIEmbeddingData(
embedding=embedding_data.embedding,
index=i,
)
)
data = b64_encode_openai_embeddings_response(response.data, encoding_format)
usage = OpenAIEmbeddingUsage(
prompt_tokens=response.usage.prompt_tokens,