diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index d3d107e1d..67a22cbe3 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -7,12 +7,10 @@ import asyncio import base64 -import uuid from collections.abc import AsyncGenerator, AsyncIterator from typing import Any -from ollama import AsyncClient # type: ignore[attr-defined] -from openai import AsyncOpenAI +from ollama import AsyncClient as AsyncOllamaClient from llama_stack.apis.common.content_types import ( ImageContentItem, @@ -37,9 +35,6 @@ from llama_stack.apis.inference import ( Message, OpenAIChatCompletion, OpenAIChatCompletionChunk, - OpenAICompletion, - OpenAIEmbeddingsResponse, - OpenAIEmbeddingUsage, OpenAIMessageParam, OpenAIResponseFormatParam, ResponseFormat, @@ -64,15 +59,14 @@ from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionChoice, OpenAICompatCompletionResponse, - b64_encode_openai_embeddings_response, get_sampling_options, prepare_openai_completion_params, - prepare_openai_embeddings_params, process_chat_completion_response, process_chat_completion_stream_response, process_completion_response, process_completion_stream_response, ) +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack.providers.utils.inference.prompt_adapter import ( chat_completion_request_to_prompt, completion_request_to_prompt, @@ -89,6 +83,7 @@ logger = get_logger(name=__name__, category="inference::ollama") class OllamaInferenceAdapter( + OpenAIMixin, InferenceProvider, ModelsProtocolPrivate, ): @@ -98,23 +93,21 @@ class OllamaInferenceAdapter( def __init__(self, config: OllamaImplConfig) -> None: self.register_helper = ModelRegistryHelper(MODEL_ENTRIES) self.config = config - self._clients: dict[asyncio.AbstractEventLoop, AsyncClient] = {} - self._openai_client = None + self._clients: dict[asyncio.AbstractEventLoop, AsyncOllamaClient] = {} @property - def client(self) -> AsyncClient: + def ollama_client(self) -> AsyncOllamaClient: # ollama client attaches itself to the current event loop (sadly?) loop = asyncio.get_running_loop() if loop not in self._clients: - self._clients[loop] = AsyncClient(host=self.config.url) + self._clients[loop] = AsyncOllamaClient(host=self.config.url) return self._clients[loop] - @property - def openai_client(self) -> AsyncOpenAI: - if self._openai_client is None: - url = self.config.url.rstrip("/") - self._openai_client = AsyncOpenAI(base_url=f"{url}/v1", api_key="ollama") - return self._openai_client + def get_api_key(self): + return "NO_KEY" + + def get_base_url(self): + return self.config.url.rstrip("/") + "/v1" async def initialize(self) -> None: logger.info(f"checking connectivity to Ollama at `{self.config.url}`...") @@ -129,7 +122,7 @@ class OllamaInferenceAdapter( async def list_models(self) -> list[Model] | None: provider_id = self.__provider_id__ - response = await self.client.list() + response = await self.ollama_client.list() # always add the two embedding models which can be pulled on demand models = [ @@ -189,7 +182,7 @@ class OllamaInferenceAdapter( HealthResponse: A dictionary containing the health status. """ try: - await self.client.ps() + await self.ollama_client.ps() return HealthResponse(status=HealthStatus.OK) except Exception as e: return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}") @@ -238,7 +231,7 @@ class OllamaInferenceAdapter( params = await self._get_params(request) async def _generate_and_convert_to_openai_compat(): - s = await self.client.generate(**params) + s = await self.ollama_client.generate(**params) async for chunk in s: choice = OpenAICompatCompletionChoice( finish_reason=chunk["done_reason"] if chunk["done"] else None, @@ -254,7 +247,7 @@ class OllamaInferenceAdapter( async def _nonstream_completion(self, request: CompletionRequest) -> CompletionResponse: params = await self._get_params(request) - r = await self.client.generate(**params) + r = await self.ollama_client.generate(**params) choice = OpenAICompatCompletionChoice( finish_reason=r["done_reason"] if r["done"] else None, @@ -346,9 +339,9 @@ class OllamaInferenceAdapter( async def _nonstream_chat_completion(self, request: ChatCompletionRequest) -> ChatCompletionResponse: params = await self._get_params(request) if "messages" in params: - r = await self.client.chat(**params) + r = await self.ollama_client.chat(**params) else: - r = await self.client.generate(**params) + r = await self.ollama_client.generate(**params) if "message" in r: choice = OpenAICompatCompletionChoice( @@ -372,9 +365,9 @@ class OllamaInferenceAdapter( async def _generate_and_convert_to_openai_compat(): if "messages" in params: - s = await self.client.chat(**params) + s = await self.ollama_client.chat(**params) else: - s = await self.client.generate(**params) + s = await self.ollama_client.generate(**params) async for chunk in s: if "message" in chunk: choice = OpenAICompatCompletionChoice( @@ -407,7 +400,7 @@ class OllamaInferenceAdapter( assert all(not content_has_media(content) for content in contents), ( "Ollama does not support media for embeddings" ) - response = await self.client.embed( + response = await self.ollama_client.embed( model=model.provider_resource_id, input=[interleaved_content_as_str(content) for content in contents], ) @@ -422,14 +415,14 @@ class OllamaInferenceAdapter( pass # Ignore statically unknown model, will check live listing if model.model_type == ModelType.embedding: - response = await self.client.list() + response = await self.ollama_client.list() if model.provider_resource_id not in [m.model for m in response.models]: - await self.client.pull(model.provider_resource_id) + await self.ollama_client.pull(model.provider_resource_id) # we use list() here instead of ps() - # - ps() only lists running models, not available models # - models not currently running are run by the ollama server as needed - response = await self.client.list() + response = await self.ollama_client.list() available_models = [m.model for m in response.models] provider_resource_id = model.provider_resource_id @@ -448,90 +441,6 @@ class OllamaInferenceAdapter( return model - async def openai_embeddings( - self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, - ) -> OpenAIEmbeddingsResponse: - model_obj = await self._get_model(model) - if model_obj.provider_resource_id is None: - raise ValueError(f"Model {model} has no provider_resource_id set") - - # Note, at the moment Ollama does not support encoding_format, dimensions, and user parameters - params = prepare_openai_embeddings_params( - model=model_obj.provider_resource_id, - input=input, - encoding_format=encoding_format, - dimensions=dimensions, - user=user, - ) - - response = await self.openai_client.embeddings.create(**params) - data = b64_encode_openai_embeddings_response(response.data, encoding_format) - - usage = OpenAIEmbeddingUsage( - prompt_tokens=response.usage.prompt_tokens, - total_tokens=response.usage.total_tokens, - ) - # TODO: Investigate why model_obj.identifier is used instead of response.model - return OpenAIEmbeddingsResponse( - data=data, - model=model_obj.identifier, - usage=usage, - ) - - async def openai_completion( - self, - model: str, - prompt: str | list[str] | list[int] | list[list[int]], - best_of: int | None = None, - echo: bool | None = None, - frequency_penalty: float | None = None, - logit_bias: dict[str, float] | None = None, - logprobs: bool | None = None, - max_tokens: int | None = None, - n: int | None = None, - presence_penalty: float | None = None, - seed: int | None = None, - stop: str | list[str] | None = None, - stream: bool | None = None, - stream_options: dict[str, Any] | None = None, - temperature: float | None = None, - top_p: float | None = None, - user: str | None = None, - guided_choice: list[str] | None = None, - prompt_logprobs: int | None = None, - suffix: str | None = None, - ) -> OpenAICompletion: - if not isinstance(prompt, str): - raise ValueError("Ollama does not support non-string prompts for completion") - - model_obj = await self._get_model(model) - params = await prepare_openai_completion_params( - model=model_obj.provider_resource_id, - prompt=prompt, - best_of=best_of, - echo=echo, - frequency_penalty=frequency_penalty, - logit_bias=logit_bias, - logprobs=logprobs, - max_tokens=max_tokens, - n=n, - presence_penalty=presence_penalty, - seed=seed, - stop=stop, - stream=stream, - stream_options=stream_options, - temperature=temperature, - top_p=top_p, - user=user, - suffix=suffix, - ) - return await self.openai_client.completions.create(**params) # type: ignore - async def openai_chat_completion( self, model: str, @@ -599,25 +508,7 @@ class OllamaInferenceAdapter( top_p=top_p, user=user, ) - response = await self.openai_client.chat.completions.create(**params) - return await self._adjust_ollama_chat_completion_response_ids(response) - - async def _adjust_ollama_chat_completion_response_ids( - self, - response: OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk], - ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: - id = f"chatcmpl-{uuid.uuid4()}" - if isinstance(response, AsyncIterator): - - async def stream_with_chunk_ids() -> AsyncIterator[OpenAIChatCompletionChunk]: - async for chunk in response: - chunk.id = id - yield chunk - - return stream_with_chunk_ids() - else: - response.id = id - return response + return await OpenAIMixin.openai_chat_completion(self, **params) async def convert_message_to_openai_dict_for_ollama(message: Message) -> list[dict]: diff --git a/tests/integration/recordings/responses/3a81146f2afa.json b/tests/integration/recordings/responses/3a81146f2afa.json new file mode 100644 index 000000000..e2d2d52d6 --- /dev/null +++ b/tests/integration/recordings/responses/3a81146f2afa.json @@ -0,0 +1,990 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ", + "max_tokens": 50, + "stream": true, + "extra_body": {} + }, + "endpoint": "/v1/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": [ + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": "Blue" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": ".\n\n" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": "The" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " completed" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " sentence" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " is" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " a" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " well" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": "-known" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " phrase" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " from" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " a" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " traditional" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " English" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " poem" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": ":\n\n" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": "\"" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": "R" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": "oses" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " are" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " red" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": "," + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " v" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": "io" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": "lets" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " are" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " blue" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": ",\n" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": "Sugar" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " is" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " sweet" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": "," + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " and" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " so" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " are" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " you" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": ".\"" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " However" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": "," + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " in" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " many" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " variations" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " of" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " this" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " poem" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": "," + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " the" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " line" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": " \"" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": null, + "index": 0, + "logprobs": null, + "text": "vio" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + }, + { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-439", + "choices": [ + { + "finish_reason": "length", + "index": 0, + "logprobs": null, + "text": "" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": null + } + } + ], + "is_streaming": true + } +} diff --git a/tests/integration/recordings/responses/6412295819a1.json b/tests/integration/recordings/responses/6412295819a1.json new file mode 100644 index 000000000..728380b02 --- /dev/null +++ b/tests/integration/recordings/responses/6412295819a1.json @@ -0,0 +1,43 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ", + "stream": false, + "extra_body": {} + }, + "endpoint": "/v1/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-104", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "text": "blue.\n\nI completed the sentence with \"blue\" because it is a common completion used to complete the traditional nursery rhyme, which ends with:\n\nRoses are red,\nViolets are blue.\n\nThe complete rhyme is often remembered and recited as follows:\n\nRoses are red,\nViolets are blue,\nSugar is sweet,\nAnd so are you!" + } + ], + "created": 1757857132, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 72, + "prompt_tokens": 50, + "total_tokens": 122, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + } +} diff --git a/tests/integration/recordings/responses/ecae140151d1.json b/tests/integration/recordings/responses/ecae140151d1.json new file mode 100644 index 000000000..433597080 --- /dev/null +++ b/tests/integration/recordings/responses/ecae140151d1.json @@ -0,0 +1,43 @@ +{ + "request": { + "method": "POST", + "url": "http://0.0.0.0:11434/v1/v1/completions", + "headers": {}, + "body": { + "model": "llama3.2:3b-instruct-fp16", + "prompt": "Say completions", + "max_tokens": 20, + "extra_body": {} + }, + "endpoint": "/v1/completions", + "model": "llama3.2:3b-instruct-fp16" + }, + "response": { + "body": { + "__type__": "openai.types.completion.Completion", + "__data__": { + "id": "cmpl-406", + "choices": [ + { + "finish_reason": "length", + "index": 0, + "logprobs": null, + "text": "Sure, I'd be happy to provide some definitions and examples of related words or phrases.\n\nTo better" + } + ], + "created": 1757857133, + "model": "llama3.2:3b-instruct-fp16", + "object": "text_completion", + "system_fingerprint": "fp_ollama", + "usage": { + "completion_tokens": 20, + "prompt_tokens": 28, + "total_tokens": 48, + "completion_tokens_details": null, + "prompt_tokens_details": null + } + } + }, + "is_streaming": false + } +}