feat(tests): introduce inference record/replay to increase test reliability (#2941)

Implements a comprehensive recording and replay system for inference API calls that eliminates dependency on online inference providers during testing. The system treats inference as deterministic by recording real API responses and replaying them in subsequent test runs. Applies to OpenAI clients (which should cover many inference requests) as well as Ollama AsyncClient. For storing, we use a hybrid system: Sqlite for fast lookups and JSON files for easy greppability / debuggability. As expected, tests become much much faster (more than 3x in just inference testing.) ```bash LLAMA_STACK_TEST_INFERENCE_MODE=record LLAMA_STACK_TEST_RECORDING_DIR=<...> \ uv run pytest -s -v tests/integration/inference \ --stack-config=starter \ -k "not( builtin_tool or safety_with_image or code_interpreter or test_rag )" \ --text-model="ollama/llama3.2:3b-instruct-fp16" \ --embedding-model=sentence-transformers/all-MiniLM-L6-v2 ``` ```bash LLAMA_STACK_TEST_INFERENCE_MODE=replay LLAMA_STACK_TEST_RECORDING_DIR=<...> \ uv run pytest -s -v tests/integration/inference \ --stack-config=starter \ -k "not( builtin_tool or safety_with_image or code_interpreter or test_rag )" \ --text-model="ollama/llama3.2:3b-instruct-fp16" \ --embedding-model=sentence-transformers/all-MiniLM-L6-v2 ``` - `LLAMA_STACK_TEST_INFERENCE_MODE`: `live` (default), `record`, or `replay` - `LLAMA_STACK_TEST_RECORDING_DIR`: Storage location (must be specified for record or replay modes)
2025-12-03 09:53:45 +00:00 · 2025-07-29 12:41:31 -07:00 · 2025-07-29 12:41:31 -07:00 · 08b4a1deb3
commit 08b4a1deb3
parent abf1d6a703
33 changed files with 9880 additions and 2 deletions
--- a/llama_stack/distribution/routers/inference.py
+++ b/llama_stack/distribution/routers/inference.py
@ -79,11 +79,9 @@ class InferenceRouter(Inference):
    async def initialize(self) -> None:
        logger.debug("InferenceRouter.initialize")
        pass
    async def shutdown(self) -> None:
        logger.debug("InferenceRouter.shutdown")
        pass
    async def register_model(
        self,
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@ -94,6 +94,7 @@ RESOURCES = [
 REGISTRY_REFRESH_INTERVAL_SECONDS = 300
 REGISTRY_REFRESH_TASK = None
 TEST_RECORDING_CONTEXT = None
 async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
@ -307,6 +308,15 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
 async def construct_stack(
    run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None
 ) -> dict[Api, Any]:
    if "LLAMA_STACK_TEST_INFERENCE_MODE" in os.environ:
        from llama_stack.testing.inference_recorder import setup_inference_recording
        global TEST_RECORDING_CONTEXT
        TEST_RECORDING_CONTEXT = setup_inference_recording()
        if TEST_RECORDING_CONTEXT:
            TEST_RECORDING_CONTEXT.__enter__()
            logger.info(f"Inference recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}")
    dist_registry, _ = await create_dist_registry(run_config.metadata_store, run_config.image_name)
    policy = run_config.server.auth.access_policy if run_config.server.auth else []
    impls = await resolve_impls(
@ -352,6 +362,13 @@ async def shutdown_stack(impls: dict[Api, Any]):
        except (Exception, asyncio.CancelledError) as e:
            logger.exception(f"Failed to shutdown {impl_name}: {e}")
    global TEST_RECORDING_CONTEXT
    if TEST_RECORDING_CONTEXT:
        try:
            TEST_RECORDING_CONTEXT.__exit__(None, None, None)
        except Exception as e:
            logger.error(f"Error during inference recording cleanup: {e}")
    global REGISTRY_REFRESH_TASK
    if REGISTRY_REFRESH_TASK:
        REGISTRY_REFRESH_TASK.cancel()
--- a/llama_stack/testing/init.py
+++ b/llama_stack/testing/init.py
@ -0,0 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
--- a/llama_stack/testing/inference_recorder.py
+++ b/llama_stack/testing/inference_recorder.py
@ -0,0 +1,480 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from __future__ import annotations  # for forward references
 import hashlib
 import json
 import os
 import sqlite3
 from collections.abc import Generator
 from contextlib import contextmanager
 from enum import StrEnum
 from pathlib import Path
 from typing import Any, Literal, cast
 from llama_stack.log import get_logger
 logger = get_logger(__name__, category="testing")
 # Global state for the recording system
 _current_mode: str | None = None
 _current_storage: ResponseStorage | None = None
 _original_methods: dict[str, Any] = {}
 from openai.types.completion_choice import CompletionChoice
 # update the "finish_reason" field, since its type definition is wrong (no None is accepted)
 CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
 CompletionChoice.model_rebuild()
 class InferenceMode(StrEnum):
    LIVE = "live"
    RECORD = "record"
    REPLAY = "replay"
 def normalize_request(method: str, url: str, headers: dict[str, Any], body: dict[str, Any]) -> str:
    """Create a normalized hash of the request for consistent matching."""
    # Extract just the endpoint path
    from urllib.parse import urlparse
    parsed = urlparse(url)
    normalized = {"method": method.upper(), "endpoint": parsed.path, "body": body}
    # Create hash - sort_keys=True ensures deterministic ordering
    normalized_json = json.dumps(normalized, sort_keys=True)
    return hashlib.sha256(normalized_json.encode()).hexdigest()
 def get_inference_mode() -> InferenceMode:
    return InferenceMode(os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE", "live").lower())
 def setup_inference_recording():
    """
    Returns a context manager that can be used to record or replay inference requests. This is to be used in tests
    to increase their reliability and reduce reliance on expensive, external services.
    Currently, this is only supported for OpenAI and Ollama clients. These should cover the vast majority of use cases.
    Calls to the /models endpoint are not currently trapped. We probably need to add support for this.
    Two environment variables are required:
    - LLAMA_STACK_TEST_INFERENCE_MODE: The mode to run in. Must be 'live', 'record', or 'replay'.
    - LLAMA_STACK_TEST_RECORDING_DIR: The directory to store the recordings in.
    The recordings are stored in a SQLite database and a JSON file for each request. The SQLite database is used to
    quickly find the correct recording for a given request. The JSON files are used to store the request and response
    bodies.
    """
    mode = get_inference_mode()
    if mode not in InferenceMode:
        raise ValueError(f"Invalid LLAMA_STACK_TEST_INFERENCE_MODE: {mode}. Must be 'live', 'record', or 'replay'")
    if mode == InferenceMode.LIVE:
        return None
    if "LLAMA_STACK_TEST_RECORDING_DIR" not in os.environ:
        raise ValueError("LLAMA_STACK_TEST_RECORDING_DIR must be set for recording or replaying")
    storage_dir = os.environ["LLAMA_STACK_TEST_RECORDING_DIR"]
    return inference_recording(mode=mode, storage_dir=storage_dir)
 def _serialize_response(response: Any) -> Any:
    if hasattr(response, "model_dump"):
        data = response.model_dump(mode="json")
        return {
            "__type__": f"{response.__class__.__module__}.{response.__class__.__qualname__}",
            "__data__": data,
        }
    elif hasattr(response, "__dict__"):
        return dict(response.__dict__)
    else:
        return response
 def _deserialize_response(data: dict[str, Any]) -> Any:
    # Check if this is a serialized Pydantic model with type information
    if isinstance(data, dict) and "__type__" in data and "__data__" in data:
        try:
            # Import the original class and reconstruct the object
            module_path, class_name = data["__type__"].rsplit(".", 1)
            module = __import__(module_path, fromlist=[class_name])
            cls = getattr(module, class_name)
            if not hasattr(cls, "model_validate"):
                raise ValueError(f"Pydantic class {cls} does not support model_validate?")
            return cls.model_validate(data["__data__"])
        except (ImportError, AttributeError, TypeError, ValueError) as e:
            logger.warning(f"Failed to deserialize object of type {data['__type__']}: {e}")
            return data["__data__"]
    return data
 class ResponseStorage:
    """Handles SQLite index + JSON file storage/retrieval for inference recordings."""
    def __init__(self, test_dir: Path):
        self.test_dir = test_dir
        self.responses_dir = self.test_dir / "responses"
        self.db_path = self.test_dir / "index.sqlite"
        self._ensure_directories()
        self._init_database()
    def _ensure_directories(self):
        self.test_dir.mkdir(parents=True, exist_ok=True)
        self.responses_dir.mkdir(exist_ok=True)
    def _init_database(self):
        with sqlite3.connect(self.db_path) as conn:
            conn.execute("""
                CREATE TABLE IF NOT EXISTS recordings (
                    request_hash TEXT PRIMARY KEY,
                    response_file TEXT,
                    endpoint TEXT,
                    model TEXT,
                    timestamp TEXT,
                    is_streaming BOOLEAN
                )
            """)
    def store_recording(self, request_hash: str, request: dict[str, Any], response: dict[str, Any]):
        """Store a request/response pair."""
        # Generate unique response filename
        response_file = f"{request_hash[:12]}.json"
        response_path = self.responses_dir / response_file
        # Serialize response body if needed
        serialized_response = dict(response)
        if "body" in serialized_response:
            if isinstance(serialized_response["body"], list):
                # Handle streaming responses (list of chunks)
                serialized_response["body"] = [_serialize_response(chunk) for chunk in serialized_response["body"]]
            else:
                # Handle single response
                serialized_response["body"] = _serialize_response(serialized_response["body"])
        # Save response to JSON file
        with open(response_path, "w") as f:
            json.dump({"request": request, "response": serialized_response}, f, indent=2)
            f.write("\n")
            f.flush()
        # Update SQLite index
        with sqlite3.connect(self.db_path) as conn:
            conn.execute(
                """
                INSERT OR REPLACE INTO recordings
                (request_hash, response_file, endpoint, model, timestamp, is_streaming)
                VALUES (?, ?, ?, ?, datetime('now'), ?)
            """,
                (
                    request_hash,
                    response_file,
                    request.get("endpoint", ""),
                    request.get("model", ""),
                    response.get("is_streaming", False),
                ),
            )
    def find_recording(self, request_hash: str) -> dict[str, Any] | None:
        """Find a recorded response by request hash."""
        with sqlite3.connect(self.db_path) as conn:
            result = conn.execute(
                "SELECT response_file FROM recordings WHERE request_hash = ?", (request_hash,)
            ).fetchone()
        if not result:
            return None
        response_file = result[0]
        response_path = self.responses_dir / response_file
        if not response_path.exists():
            return None
        with open(response_path) as f:
            data = json.load(f)
        # Deserialize response body if needed
        if "response" in data and "body" in data["response"]:
            if isinstance(data["response"]["body"], list):
                # Handle streaming responses
                data["response"]["body"] = [_deserialize_response(chunk) for chunk in data["response"]["body"]]
            else:
                # Handle single response
                data["response"]["body"] = _deserialize_response(data["response"]["body"])
        return cast(dict[str, Any], data)
 async def _patched_inference_method(original_method, self, client_type, method_name=None, *args, **kwargs):
    global _current_mode, _current_storage
    if _current_mode == InferenceMode.LIVE or _current_storage is None:
        # Normal operation
        return await original_method(self, *args, **kwargs)
    # Get base URL and endpoint based on client type
    if client_type == "openai":
        base_url = str(self._client.base_url)
        # Determine endpoint based on the method's module/class path
        method_str = str(original_method)
        if "chat.completions" in method_str:
            endpoint = "/v1/chat/completions"
        elif "embeddings" in method_str:
            endpoint = "/v1/embeddings"
        elif "completions" in method_str:
            endpoint = "/v1/completions"
        else:
            # Fallback - try to guess from the self object
            if hasattr(self, "_resource") and hasattr(self._resource, "_resource"):
                resource_name = getattr(self._resource._resource, "_resource", "unknown")
                if "chat" in str(resource_name):
                    endpoint = "/v1/chat/completions"
                elif "embeddings" in str(resource_name):
                    endpoint = "/v1/embeddings"
                else:
                    endpoint = "/v1/completions"
            else:
                endpoint = "/v1/completions"
    elif client_type == "ollama":
        # Get base URL from the client (Ollama client uses host attribute)
        base_url = getattr(self, "host", "http://localhost:11434")
        if not base_url.startswith("http"):
            base_url = f"http://{base_url}"
        # Determine endpoint based on method name
        if method_name == "generate":
            endpoint = "/api/generate"
        elif method_name == "chat":
            endpoint = "/api/chat"
        elif method_name == "embed":
            endpoint = "/api/embeddings"
        elif method_name == "list":
            endpoint = "/api/tags"
        else:
            endpoint = f"/api/{method_name}"
    else:
        raise ValueError(f"Unknown client type: {client_type}")
    url = base_url.rstrip("/") + endpoint
    # Normalize request for matching
    method = "POST"
    headers = {}
    body = kwargs
    request_hash = normalize_request(method, url, headers, body)
    if _current_mode == InferenceMode.REPLAY:
        recording = _current_storage.find_recording(request_hash)
        if recording:
            response_body = recording["response"]["body"]
            if recording["response"].get("is_streaming", False):
                async def replay_stream():
                    for chunk in response_body:
                        yield chunk
                return replay_stream()
            else:
                return response_body
        else:
            raise RuntimeError(
                f"No recorded response found for request hash: {request_hash}\n"
                f"Endpoint: {endpoint}\n"
                f"Model: {body.get('model', 'unknown')}\n"
                f"To record this response, run with LLAMA_STACK_INFERENCE_MODE=record"
            )
    elif _current_mode == InferenceMode.RECORD:
        response = await original_method(self, *args, **kwargs)
        request_data = {
            "method": method,
            "url": url,
            "headers": headers,
            "body": body,
            "endpoint": endpoint,
            "model": body.get("model", ""),
        }
        # Determine if this is a streaming request based on request parameters
        is_streaming = body.get("stream", False)
        if is_streaming:
            # For streaming responses, we need to collect all chunks immediately before yielding
            # This ensures the recording is saved even if the generator isn't fully consumed
            chunks = []
            async for chunk in response:
                chunks.append(chunk)
            # Store the recording immediately
            response_data = {"body": chunks, "is_streaming": True}
            _current_storage.store_recording(request_hash, request_data, response_data)
            # Return a generator that replays the stored chunks
            async def replay_recorded_stream():
                for chunk in chunks:
                    yield chunk
            return replay_recorded_stream()
        else:
            response_data = {"body": response, "is_streaming": False}
            _current_storage.store_recording(request_hash, request_data, response_data)
            return response
    else:
        raise AssertionError(f"Invalid mode: {_current_mode}")
 def patch_inference_clients():
    """Install monkey patches for OpenAI client methods and Ollama AsyncClient methods."""
    global _original_methods
    from ollama import AsyncClient as OllamaAsyncClient
    from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
    from openai.resources.completions import AsyncCompletions
    from openai.resources.embeddings import AsyncEmbeddings
    # Store original methods for both OpenAI and Ollama clients
    _original_methods = {
        "chat_completions_create": AsyncChatCompletions.create,
        "completions_create": AsyncCompletions.create,
        "embeddings_create": AsyncEmbeddings.create,
        "ollama_generate": OllamaAsyncClient.generate,
        "ollama_chat": OllamaAsyncClient.chat,
        "ollama_embed": OllamaAsyncClient.embed,
        "ollama_ps": OllamaAsyncClient.ps,
        "ollama_pull": OllamaAsyncClient.pull,
        "ollama_list": OllamaAsyncClient.list,
    }
    # Create patched methods for OpenAI client
    async def patched_chat_completions_create(self, *args, **kwargs):
        return await _patched_inference_method(
            _original_methods["chat_completions_create"], self, "openai", *args, **kwargs
        )
    async def patched_completions_create(self, *args, **kwargs):
        return await _patched_inference_method(_original_methods["completions_create"], self, "openai", *args, **kwargs)
    async def patched_embeddings_create(self, *args, **kwargs):
        return await _patched_inference_method(_original_methods["embeddings_create"], self, "openai", *args, **kwargs)
    # Apply OpenAI patches
    AsyncChatCompletions.create = patched_chat_completions_create
    AsyncCompletions.create = patched_completions_create
    AsyncEmbeddings.create = patched_embeddings_create
    # Create patched methods for Ollama client
    async def patched_ollama_generate(self, *args, **kwargs):
        return await _patched_inference_method(
            _original_methods["ollama_generate"], self, "ollama", "generate", *args, **kwargs
        )
    async def patched_ollama_chat(self, *args, **kwargs):
        return await _patched_inference_method(
            _original_methods["ollama_chat"], self, "ollama", "chat", *args, **kwargs
        )
    async def patched_ollama_embed(self, *args, **kwargs):
        return await _patched_inference_method(
            _original_methods["ollama_embed"], self, "ollama", "embed", *args, **kwargs
        )
    async def patched_ollama_ps(self, *args, **kwargs):
        return await _patched_inference_method(_original_methods["ollama_ps"], self, "ollama", "ps", *args, **kwargs)
    async def patched_ollama_pull(self, *args, **kwargs):
        return await _patched_inference_method(
            _original_methods["ollama_pull"], self, "ollama", "pull", *args, **kwargs
        )
    async def patched_ollama_list(self, *args, **kwargs):
        return await _patched_inference_method(
            _original_methods["ollama_list"], self, "ollama", "list", *args, **kwargs
        )
    # Apply Ollama patches
    OllamaAsyncClient.generate = patched_ollama_generate
    OllamaAsyncClient.chat = patched_ollama_chat
    OllamaAsyncClient.embed = patched_ollama_embed
    OllamaAsyncClient.ps = patched_ollama_ps
    OllamaAsyncClient.pull = patched_ollama_pull
    OllamaAsyncClient.list = patched_ollama_list
 def unpatch_inference_clients():
    """Remove monkey patches and restore original OpenAI and Ollama client methods."""
    global _original_methods
    if not _original_methods:
        return
    # Import here to avoid circular imports
    from ollama import AsyncClient as OllamaAsyncClient
    from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
    from openai.resources.completions import AsyncCompletions
    from openai.resources.embeddings import AsyncEmbeddings
    # Restore OpenAI client methods
    AsyncChatCompletions.create = _original_methods["chat_completions_create"]
    AsyncCompletions.create = _original_methods["completions_create"]
    AsyncEmbeddings.create = _original_methods["embeddings_create"]
    # Restore Ollama client methods if they were patched
    OllamaAsyncClient.generate = _original_methods["ollama_generate"]
    OllamaAsyncClient.chat = _original_methods["ollama_chat"]
    OllamaAsyncClient.embed = _original_methods["ollama_embed"]
    OllamaAsyncClient.ps = _original_methods["ollama_ps"]
    OllamaAsyncClient.pull = _original_methods["ollama_pull"]
    OllamaAsyncClient.list = _original_methods["ollama_list"]
    _original_methods.clear()
@contextmanager
 def inference_recording(mode: str = "live", storage_dir: str | Path | None = None) -> Generator[None, None, None]:
    """Context manager for inference recording/replaying."""
    global _current_mode, _current_storage
    # Set defaults
    if storage_dir is None:
        storage_dir_path = Path.home() / ".llama" / "recordings"
    else:
        storage_dir_path = Path(storage_dir)
    # Store previous state
    prev_mode = _current_mode
    prev_storage = _current_storage
    try:
        _current_mode = mode
        if mode in ["record", "replay"]:
            _current_storage = ResponseStorage(storage_dir_path)
            patch_inference_clients()
        yield
    finally:
        # Restore previous state
        if mode in ["record", "replay"]:
            unpatch_inference_clients()
        _current_mode = prev_mode
        _current_storage = prev_storage
--- a/tests/integration/inference/recordings/index.sqlite
+++ b/tests/integration/inference/recordings/index.sqlite
--- a/tests/integration/inference/recordings/responses/12913f20f6ac.json
+++ b/tests/integration/inference/recordings/responses/12913f20f6ac.json
@ -0,0 +1,284 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/v1/v1/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "messages": [
        {
          "role": "user",
          "content": "What's the name of the Sun in latin?"
        }
      ],
      "stream": true
    },
    "endpoint": "/v1/completions",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-471",
          "choices": [
            {
              "delta": {
                "content": "The",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814881,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-471",
          "choices": [
            {
              "delta": {
                "content": " Latin",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814881,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-471",
          "choices": [
            {
              "delta": {
                "content": " word",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814881,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-471",
          "choices": [
            {
              "delta": {
                "content": " for",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814881,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-471",
          "choices": [
            {
              "delta": {
                "content": " the",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814881,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-471",
          "choices": [
            {
              "delta": {
                "content": " Sun",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814881,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-471",
          "choices": [
            {
              "delta": {
                "content": " is",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814881,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-471",
          "choices": [
            {
              "delta": {
                "content": " Sol",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814881,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-471",
          "choices": [
            {
              "delta": {
                "content": ".",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814881,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-471",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": "stop",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814881,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/inference/recordings/responses/1b8394f90636.json
+++ b/tests/integration/inference/recordings/responses/1b8394f90636.json
@ -0,0 +1,41 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "prompt": "<|begin_of_text|>Complete the sentence using one word: Roses are red, violets are ",
      "raw": true,
      "options": {
        "temperature": 0.0,
        "max_tokens": 50,
        "num_predict": 50
      },
      "stream": false
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": {
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama3.2:3b-instruct-fp16",
        "created_at": "2025-07-29T18:47:24.383192Z",
        "done": true,
        "done_reason": "stop",
        "total_duration": 2393598000,
        "load_duration": 90501917,
        "prompt_eval_count": 18,
        "prompt_eval_duration": 545025792,
        "eval_count": 43,
        "eval_duration": 1756031208,
        "response": " _______.\n\nThe best answer is blue. The traditional nursery rhyme goes like this:\n\nRoses are red,\nViolets are blue,\nSugar is sweet,\nAnd so are you! (Or something similar.)",
        "thinking": null,
        "context": null
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/inference/recordings/responses/211b1562d4e6.json
+++ b/tests/integration/inference/recordings/responses/211b1562d4e6.json
@ -0,0 +1,39 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhich planet do humans live on?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": false
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": {
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama3.2:3b-instruct-fp16",
        "created_at": "2025-07-29T18:47:29.108049Z",
        "done": true,
        "done_reason": "stop",
        "total_duration": 334746667,
        "load_duration": 55090709,
        "prompt_eval_count": 23,
        "prompt_eval_duration": 74557791,
        "eval_count": 6,
        "eval_duration": 204410292,
        "response": "Humans live on Earth.",
        "thinking": null,
        "context": null
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/inference/recordings/responses/31407e035752.json
+++ b/tests/integration/inference/recordings/responses/31407e035752.json
@ -0,0 +1,544 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/v1/v1/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "messages": [
        {
          "role": "user",
          "content": "What is the name of the US captial?"
        }
      ],
      "stream": true
    },
    "endpoint": "/v1/completions",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": "The",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814884,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": " capital",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814884,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": " of",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814884,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": " the",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814884,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": " United",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814884,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": " States",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814884,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": " is",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814884,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": " Washington",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814884,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": ",",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814884,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": " D",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814885,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": ".C",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814885,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": ".",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814885,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": " (",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814885,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": "short",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814885,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": " for",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814885,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": " District",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814885,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": " of",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814885,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": " Columbia",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814885,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": ").",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814885,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-850",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": "stop",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814885,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/inference/recordings/responses/35db283fef1d.json
+++ b/tests/integration/inference/recordings/responses/35db283fef1d.json
@ -0,0 +1,84 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/v1/v1/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "messages": [
        {
          "role": "user",
          "content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
        }
      ],
      "stream": false,
      "tools": [
        {
          "type": "function",
          "function": {
            "name": "get_weather",
            "description": "Get the weather in a given city",
            "parameters": {
              "type": "object",
              "properties": {
                "city": {
                  "type": "string",
                  "description": "The city to get the weather for"
                }
              }
            }
          }
        }
      ]
    },
    "endpoint": "/v1/completions",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
        "id": "chatcmpl-331",
        "choices": [
          {
            "finish_reason": "tool_calls",
            "index": 0,
            "logprobs": null,
            "message": {
              "content": "",
              "refusal": null,
              "role": "assistant",
              "annotations": null,
              "audio": null,
              "function_call": null,
              "tool_calls": [
                {
                  "id": "call_za2swdo9",
                  "function": {
                    "arguments": "{\"city\":\"Tokyo\"}",
                    "name": "get_weather"
                  },
                  "type": "function",
                  "index": 0
                }
              ]
            }
          }
        ],
        "created": 1753814888,
        "model": "llama3.2:3b-instruct-fp16",
        "object": "chat.completion",
        "service_tier": null,
        "system_fingerprint": "fp_ollama",
        "usage": {
          "completion_tokens": 18,
          "prompt_tokens": 177,
          "total_tokens": 195,
          "completion_tokens_details": null,
          "prompt_tokens_details": null
        }
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/inference/recordings/responses/3877ecf1bc62.json
+++ b/tests/integration/inference/recordings/responses/3877ecf1bc62.json
@ -0,0 +1,22 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/pull",
    "headers": {},
    "body": {},
    "endpoint": "/api/pull",
    "model": ""
  },
  "response": {
    "body": {
      "__type__": "ollama._types.ProgressResponse",
      "__data__": {
        "status": "success",
        "completed": null,
        "total": null,
        "digest": null
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/inference/recordings/responses/3c3f13cb7794.json
+++ b/tests/integration/inference/recordings/responses/3c3f13cb7794.json
@ -0,0 +1,221 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat's the name of the Sun in latin?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": true
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:29.322498Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "The",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:29.366077Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " Latin",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:29.408909Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " word",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:29.451051Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " for",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:29.492622Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " \"",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:29.534265Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "Sun",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:29.576141Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "\"",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:29.617693Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " is",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:29.658779Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " Sol",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:29.699936Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ".",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:29.74208Z",
          "done": true,
          "done_reason": "stop",
          "total_duration": 570982833,
          "load_duration": 78768458,
          "prompt_eval_count": 26,
          "prompt_eval_duration": 69632083,
          "eval_count": 11,
          "eval_duration": 421479000,
          "response": "",
          "thinking": null,
          "context": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/inference/recordings/responses/4014dd44c15f.json
+++ b/tests/integration/inference/recordings/responses/4014dd44c15f.json
@ -0,0 +1,104 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/v1/v1/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "messages": [
        {
          "role": "user",
          "content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
        }
      ],
      "stream": true,
      "tools": [
        {
          "type": "function",
          "function": {
            "name": "get_weather",
            "description": "Get the weather in a given city",
            "parameters": {
              "type": "object",
              "properties": {
                "city": {
                  "type": "string",
                  "description": "The city to get the weather for"
                }
              }
            }
          }
        }
      ]
    },
    "endpoint": "/v1/completions",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-448",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": [
                  {
                    "index": 0,
                    "id": "call_esyvjxp3",
                    "function": {
                      "arguments": "{\"city\":\"Tokyo\"}",
                      "name": "get_weather"
                    },
                    "type": "function"
                  }
                ]
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814883,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-448",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": "tool_calls",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814883,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/inference/recordings/responses/40f524d1934a.json
+++ b/tests/integration/inference/recordings/responses/40f524d1934a.json
@ -0,0 +1,221 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"get_weather\",\n        \"description\": \"Get the current weather\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"location\"],\n            \"properties\": {\n                \"location\": {\n                    \"type\": \"string\",\n                    \"description\": \"The city and state (both required), e.g. San Francisco, CA.\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nPretend you are a weather assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat's the weather like in San Francisco?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": true
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:31.070599Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "[",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:31.112828Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "get",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:31.154976Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "_weather",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:31.197203Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "(location",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:31.239672Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "=\"",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:31.281331Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "San",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:31.323134Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " Francisco",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:31.364766Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ",",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:31.406481Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " CA",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:31.448383Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "\")]",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:31.490154Z",
          "done": true,
          "done_reason": "stop",
          "total_duration": 531176667,
          "load_duration": 65048792,
          "prompt_eval_count": 324,
          "prompt_eval_duration": 44536417,
          "eval_count": 11,
          "eval_duration": 420819750,
          "response": "",
          "thinking": null,
          "context": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/inference/recordings/responses/48d2fb183a2a.json
+++ b/tests/integration/inference/recordings/responses/48d2fb183a2a.json
@ -0,0 +1,86 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nPlease give me information about Michael Jordan.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nPlease respond in JSON format with the schema: {\"$defs\": {\"NBAStats\": {\"properties\": {\"year_for_draft\": {\"title\": \"Year For Draft\", \"type\": \"integer\"}, \"num_seasons_in_nba\": {\"title\": \"Num Seasons In Nba\", \"type\": \"integer\"}}, \"required\": [\"year_for_draft\", \"num_seasons_in_nba\"], \"title\": \"NBAStats\", \"type\": \"object\"}}, \"properties\": {\"first_name\": {\"title\": \"First Name\", \"type\": \"string\"}, \"last_name\": {\"title\": \"Last Name\", \"type\": \"string\"}, \"year_of_birth\": {\"title\": \"Year Of Birth\", \"type\": \"integer\"}, \"nba_stats\": {\"$ref\": \"#/$defs/NBAStats\"}}, \"required\": [\"first_name\", \"last_name\", \"year_of_birth\", \"nba_stats\"], \"title\": \"AnswerFormat\", \"type\": \"object\"}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "format": {
        "$defs": {
          "NBAStats": {
            "properties": {
              "year_for_draft": {
                "title": "Year For Draft",
                "type": "integer"
              },
              "num_seasons_in_nba": {
                "title": "Num Seasons In Nba",
                "type": "integer"
              }
            },
            "required": [
              "year_for_draft",
              "num_seasons_in_nba"
            ],
            "title": "NBAStats",
            "type": "object"
          }
        },
        "properties": {
          "first_name": {
            "title": "First Name",
            "type": "string"
          },
          "last_name": {
            "title": "Last Name",
            "type": "string"
          },
          "year_of_birth": {
            "title": "Year Of Birth",
            "type": "integer"
          },
          "nba_stats": {
            "$ref": "#/$defs/NBAStats"
          }
        },
        "required": [
          "first_name",
          "last_name",
          "year_of_birth",
          "nba_stats"
        ],
        "title": "AnswerFormat",
        "type": "object"
      },
      "options": {
        "temperature": 0.0
      },
      "stream": false
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": {
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama3.2:3b-instruct-fp16",
        "created_at": "2025-07-29T18:47:48.260787Z",
        "done": true,
        "done_reason": "stop",
        "total_duration": 3136253292,
        "load_duration": 81917125,
        "prompt_eval_count": 259,
        "prompt_eval_duration": 540110750,
        "eval_count": 60,
        "eval_duration": 2513196708,
        "response": "{\n  \"first_name\": \"Michael\",\n  \"last_name\": \"Jordan\",\n  \"year_of_birth\": 1963,\n  \"nba_stats\": {\n    \"year_for_draft\": 1984,\n    \"num_seasons_in_nba\": 15\n  }\n}",
        "thinking": null,
        "context": null
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/inference/recordings/responses/4a3a4447b16b.json
+++ b/tests/integration/inference/recordings/responses/4a3a4447b16b.json
@ -0,0 +1,132 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/tags",
    "headers": {},
    "body": {},
    "endpoint": "/api/tags",
    "model": ""
  },
  "response": {
    "body": {
      "__type__": "ollama._types.ListResponse",
      "__data__": {
        "models": [
          {
            "model": "nomic-embed-text:latest",
            "modified_at": "2025-07-29T11:45:57.155575-07:00",
            "digest": "0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f",
            "size": 274302450,
            "details": {
              "parent_model": "",
              "format": "gguf",
              "family": "nomic-bert",
              "families": [
                "nomic-bert"
              ],
              "parameter_size": "137M",
              "quantization_level": "F16"
            }
          },
          {
            "model": "llama-guard3:1b",
            "modified_at": "2025-07-25T14:39:44.978630-07:00",
            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
            "size": 1600181919,
            "details": {
              "parent_model": "",
              "format": "gguf",
              "family": "llama",
              "families": [
                "llama"
              ],
              "parameter_size": "1.5B",
              "quantization_level": "Q8_0"
            }
          },
          {
            "model": "all-minilm:l6-v2",
            "modified_at": "2025-07-24T15:15:11.129290-07:00",
            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
            "size": 45960996,
            "details": {
              "parent_model": "",
              "format": "gguf",
              "family": "bert",
              "families": [
                "bert"
              ],
              "parameter_size": "23M",
              "quantization_level": "F16"
            }
          },
          {
            "model": "llama3.2:1b",
            "modified_at": "2025-07-17T22:02:24.953208-07:00",
            "digest": "baf6a787fdffd633537aa2eb51cfd54cb93ff08e28040095462bb63daf552878",
            "size": 1321098329,
            "details": {
              "parent_model": "",
              "format": "gguf",
              "family": "llama",
              "families": [
                "llama"
              ],
              "parameter_size": "1.2B",
              "quantization_level": "Q8_0"
            }
          },
          {
            "model": "all-minilm:latest",
            "modified_at": "2025-06-03T16:50:10.946583-07:00",
            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
            "size": 45960996,
            "details": {
              "parent_model": "",
              "format": "gguf",
              "family": "bert",
              "families": [
                "bert"
              ],
              "parameter_size": "23M",
              "quantization_level": "F16"
            }
          },
          {
            "model": "llama3.2:3b",
            "modified_at": "2025-05-01T11:15:23.797447-07:00",
            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
            "size": 2019393189,
            "details": {
              "parent_model": "",
              "format": "gguf",
              "family": "llama",
              "families": [
                "llama"
              ],
              "parameter_size": "3.2B",
              "quantization_level": "Q4_K_M"
            }
          },
          {
            "model": "llama3.2:3b-instruct-fp16",
            "modified_at": "2025-04-30T15:33:48.939665-07:00",
            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
            "size": 6433703586,
            "details": {
              "parent_model": "",
              "format": "gguf",
              "family": "llama",
              "families": [
                "llama"
              ],
              "parameter_size": "3.2B",
              "quantization_level": "F16"
            }
          }
        ]
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/inference/recordings/responses/6cc063bbd7d3.json
+++ b/tests/integration/inference/recordings/responses/6cc063bbd7d3.json
@ -0,0 +1,383 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the name of the US captial?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": true
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:58.509395Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "The",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:58.561227Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " capital",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:58.604344Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " of",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:58.647038Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " the",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:58.688732Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " United",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:58.730495Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " States",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:58.772148Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " is",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:58.813191Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " Washington",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:58.85447Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ",",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:58.896136Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " D",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:58.937588Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ".C",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:58.978357Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ".",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:59.019403Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " (",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:59.06055Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "short",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:59.101456Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " for",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:59.142967Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " District",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:59.184487Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " of",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:59.226323Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " Columbia",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:59.269043Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ").",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:59.311737Z",
          "done": true,
          "done_reason": "stop",
          "total_duration": 1014917792,
          "load_duration": 140789542,
          "prompt_eval_count": 26,
          "prompt_eval_duration": 70044833,
          "eval_count": 20,
          "eval_duration": 803278042,
          "response": "",
          "thinking": null,
          "context": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/inference/recordings/responses/70adef2c30c4.json
+++ b/tests/integration/inference/recordings/responses/70adef2c30c4.json
@ -0,0 +1,39 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhich planet has rings around it with a name starting with letter S?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": false
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": {
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama3.2:3b-instruct-fp16",
        "created_at": "2025-07-29T18:47:58.183439Z",
        "done": true,
        "done_reason": "stop",
        "total_duration": 3440514791,
        "load_duration": 61560708,
        "prompt_eval_count": 30,
        "prompt_eval_duration": 92499375,
        "eval_count": 70,
        "eval_duration": 3284810375,
        "response": "The answer is Saturn! Saturn's ring system is one of the most iconic and well-known in our solar system. The rings are made up of ice particles, rock debris, and dust that orbit around the planet due to its gravitational pull.\n\nWould you like to know more about Saturn's rings or is there something else I can help you with?",
        "thinking": null,
        "context": null
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/inference/recordings/responses/75d0dd9d0fa3.json
+++ b/tests/integration/inference/recordings/responses/75d0dd9d0fa3.json
@ -0,0 +1,64 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "prompt": "<|begin_of_text|>Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003.Please respond in JSON format with the schema: {\"properties\": {\"name\": {\"title\": \"Name\", \"type\": \"string\"}, \"year_born\": {\"title\": \"Year Born\", \"type\": \"string\"}, \"year_retired\": {\"title\": \"Year Retired\", \"type\": \"string\"}}, \"required\": [\"name\", \"year_born\", \"year_retired\"], \"title\": \"AnswerFormat\", \"type\": \"object\"}",
      "raw": true,
      "format": {
        "properties": {
          "name": {
            "title": "Name",
            "type": "string"
          },
          "year_born": {
            "title": "Year Born",
            "type": "string"
          },
          "year_retired": {
            "title": "Year Retired",
            "type": "string"
          }
        },
        "required": [
          "name",
          "year_born",
          "year_retired"
        ],
        "title": "AnswerFormat",
        "type": "object"
      },
      "options": {
        "temperature": 0.0,
        "max_tokens": 50,
        "num_predict": 50
      },
      "stream": false
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": {
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama3.2:3b-instruct-fp16",
        "created_at": "2025-07-29T18:47:28.736819Z",
        "done": true,
        "done_reason": "stop",
        "total_duration": 1520367458,
        "load_duration": 59997042,
        "prompt_eval_count": 119,
        "prompt_eval_duration": 198841625,
        "eval_count": 29,
        "eval_duration": 1259800500,
        "response": "{ \"name\": \"Michael Jordan\", \"year_born\": \"1963\", \"year_retired\": \"2003\"}\n    ",
        "thinking": null,
        "context": null
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/inference/recordings/responses/80f09f27dd61.json
+++ b/tests/integration/inference/recordings/responses/80f09f27dd61.json
@ -0,0 +1,56 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/v1/v1/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "messages": [
        {
          "role": "user",
          "content": "Hello, world!"
        }
      ],
      "stream": false
    },
    "endpoint": "/v1/completions",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
        "id": "chatcmpl-33",
        "choices": [
          {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
              "content": "Hello! Welcome. How can I assist you today?",
              "refusal": null,
              "role": "assistant",
              "annotations": null,
              "audio": null,
              "function_call": null,
              "tool_calls": null
            }
          }
        ],
        "created": 1753814886,
        "model": "llama3.2:3b-instruct-fp16",
        "object": "chat.completion",
        "service_tier": null,
        "system_fingerprint": "fp_ollama",
        "usage": {
          "completion_tokens": 12,
          "prompt_tokens": 29,
          "total_tokens": 41,
          "completion_tokens_details": null,
          "prompt_tokens_details": null
        }
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/inference/recordings/responses/84cab42e1f5c.json
+++ b/tests/integration/inference/recordings/responses/84cab42e1f5c.json
@ -0,0 +1,989 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/v1/v1/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
      "max_tokens": 50,
      "stream": true
    },
    "endpoint": "/v1/completions",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": "Blue"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": ".\n\n"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": "My"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " response"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " is"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " based"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " on"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " a"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " traditional"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " poem"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " with"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " the"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " first"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " line"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " being"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " \""
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": "R"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": "oses"
            }
          ],
          "created": 1753814831,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " are"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " red"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": "\","
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " but"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " in"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " reality"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": ","
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " roses"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " come"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " in"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " various"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " colors"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " such"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " as"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " red"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": ","
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " pink"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": ","
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " yellow"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": ","
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " white"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": ","
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " and"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " purple"
            }
          ],
          "created": 1753814832,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": "."
            }
          ],
          "created": 1753814833,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " V"
            }
          ],
          "created": 1753814833,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": "io"
            }
          ],
          "created": 1753814833,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": "lets"
            }
          ],
          "created": 1753814833,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": ","
            }
          ],
          "created": 1753814833,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " on"
            }
          ],
          "created": 1753814833,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " the"
            }
          ],
          "created": 1753814833,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": null,
              "index": 0,
              "logprobs": null,
              "text": " other"
            }
          ],
          "created": 1753814833,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.completion.Completion",
        "__data__": {
          "id": "cmpl-313",
          "choices": [
            {
              "finish_reason": "length",
              "index": 0,
              "logprobs": null,
              "text": ""
            }
          ],
          "created": 1753814833,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "text_completion",
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/inference/recordings/responses/9b812cbcb88d.json
+++ b/tests/integration/inference/recordings/responses/9b812cbcb88d.json
@ -0,0 +1,39 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"get_weather\",\n        \"description\": \"Get the current weather\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"location\"],\n            \"properties\": {\n                \"location\": {\n                    \"type\": \"string\",\n                    \"description\": \"The city and state (both required), e.g. San Francisco, CA.\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nPretend you are a weather assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat's the weather like in San Francisco?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": false
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": {
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama3.2:3b-instruct-fp16",
        "created_at": "2025-07-29T18:47:30.907069Z",
        "done": true,
        "done_reason": "stop",
        "total_duration": 978723208,
        "load_duration": 82950875,
        "prompt_eval_count": 324,
        "prompt_eval_duration": 453827625,
        "eval_count": 11,
        "eval_duration": 439485709,
        "response": "[get_weather(location=\"San Francisco, CA\")]",
        "thinking": null,
        "context": null
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/inference/recordings/responses/9e7a83d3d596.json
+++ b/tests/integration/inference/recordings/responses/9e7a83d3d596.json
@ -0,0 +1,42 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/v1/v1/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
      "stream": false
    },
    "endpoint": "/v1/completions",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": {
      "__type__": "openai.types.completion.Completion",
      "__data__": {
        "id": "cmpl-719",
        "choices": [
          {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "text": "Blue.\n\nExplanation: This is a classic example of an alliterative poem, often referred to as \"red roses.\" The original phrase, \"Roses are red,\" was actually coined by Ernest Thesiger in 1910 and was followed by the complementary phrase, making the complete sentence a poetic device called an \"alliterative couplet.\""
          }
        ],
        "created": 1753814830,
        "model": "llama3.2:3b-instruct-fp16",
        "object": "text_completion",
        "system_fingerprint": "fp_ollama",
        "usage": {
          "completion_tokens": 71,
          "prompt_tokens": 50,
          "total_tokens": 121,
          "completion_tokens_details": null,
          "prompt_tokens_details": null
        }
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/inference/recordings/responses/a6810c23eda8.json
+++ b/tests/integration/inference/recordings/responses/a6810c23eda8.json
@ -0,0 +1,799 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "prompt": "<|begin_of_text|>Complete the sentence using one word: Roses are red, violets are ",
      "raw": true,
      "options": {
        "temperature": 0.0,
        "max_tokens": 50,
        "num_predict": 50
      },
      "stream": true
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:24.599113Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " ______",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:24.643599Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "_",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:24.685747Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ".\n\n",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:24.727604Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "The",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:24.768014Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " best",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:24.809356Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " answer",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:24.850402Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " is",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:24.891768Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " blue",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:24.933421Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ".",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:24.976048Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " The",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.016922Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " traditional",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.058091Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " nursery",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.098992Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " rhyme",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.140605Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " goes",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.18202Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " like",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.223443Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " this",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.264829Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ":\n\n",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.306517Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "R",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.347967Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "oses",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.389339Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " are",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.430357Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " red",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.471506Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ",\n",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.512744Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "V",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.55402Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "io",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.595747Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "lets",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.637436Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " are",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.678551Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " blue",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.719904Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ",\n",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.76118Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "Sugar",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.802641Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " is",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.843247Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " sweet",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.88468Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ",\n",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.92653Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "And",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:25.968022Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " so",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:26.00935Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " are",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:26.050576Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " you",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:26.091784Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "!",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:26.133496Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " (",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:26.175442Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "Or",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:26.217044Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " something",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:26.258582Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " similar",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:26.300334Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ".)",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:26.341814Z",
          "done": true,
          "done_reason": "stop",
          "total_duration": 1862375416,
          "load_duration": 73039291,
          "prompt_eval_count": 18,
          "prompt_eval_duration": 45477667,
          "eval_count": 43,
          "eval_duration": 1743432792,
          "response": "",
          "thinking": null,
          "context": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/inference/recordings/responses/ae6835cfe70e.json
+++ b/tests/integration/inference/recordings/responses/ae6835cfe70e.json
@ -0,0 +1,39 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"get_object_namespace_list\",\n        \"description\": \"Get the list of objects in a namespace\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"kind\", \"namespace\"],\n            \"properties\": {\n                \"kind\": {\n                    \"type\": \"string\",\n                    \"description\": \"the type of object\"\n                },\n                \"namespace\": {\n                    \"type\": \"string\",\n                    \"description\": \"the name of the namespace\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat pods are in the namespace openshift-lightspeed?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_object_namespace_list(kind=\"pod\", namespace=\"openshift-lightspeed\")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nthe objects are pod1, pod2, pod3<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": false
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": {
      "__type__": "ollama._types.GenerateResponse",
      "__data__": {
        "model": "llama3.2:3b-instruct-fp16",
        "created_at": "2025-07-29T18:48:00.342705Z",
        "done": true,
        "done_reason": "stop",
        "total_duration": 671224833,
        "load_duration": 82344875,
        "prompt_eval_count": 386,
        "prompt_eval_duration": 545215084,
        "eval_count": 2,
        "eval_duration": 43112416,
        "response": "[]",
        "thinking": null,
        "context": null
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/inference/recordings/responses/afb33182f365.json
+++ b/tests/integration/inference/recordings/responses/afb33182f365.json
@ -0,0 +1,56 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/v1/v1/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "messages": [
        {
          "role": "user",
          "content": "Which planet has rings around it with a name starting with letter S?"
        }
      ],
      "stream": false
    },
    "endpoint": "/v1/completions",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
        "id": "chatcmpl-541",
        "choices": [
          {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
              "content": "Saturn is the planet that has rings around itself.",
              "refusal": null,
              "role": "assistant",
              "annotations": null,
              "audio": null,
              "function_call": null,
              "tool_calls": null
            }
          }
        ],
        "created": 1753814884,
        "model": "llama3.2:3b-instruct-fp16",
        "object": "chat.completion",
        "service_tier": null,
        "system_fingerprint": "fp_ollama",
        "usage": {
          "completion_tokens": 12,
          "prompt_tokens": 39,
          "total_tokens": 51,
          "completion_tokens_details": null,
          "prompt_tokens_details": null
        }
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/inference/recordings/responses/b24590574a85.json
+++ b/tests/integration/inference/recordings/responses/b24590574a85.json
@ -0,0 +1,284 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/v1/v1/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "messages": [
        {
          "role": "user",
          "content": "Hello, world!"
        }
      ],
      "stream": true
    },
    "endpoint": "/v1/completions",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-528",
          "choices": [
            {
              "delta": {
                "content": "Hello",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814882,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-528",
          "choices": [
            {
              "delta": {
                "content": "!",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814882,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-528",
          "choices": [
            {
              "delta": {
                "content": " How",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814882,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-528",
          "choices": [
            {
              "delta": {
                "content": " can",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814882,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-528",
          "choices": [
            {
              "delta": {
                "content": " I",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814882,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-528",
          "choices": [
            {
              "delta": {
                "content": " help",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814882,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-528",
          "choices": [
            {
              "delta": {
                "content": " you",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814882,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-528",
          "choices": [
            {
              "delta": {
                "content": " today",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814882,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-528",
          "choices": [
            {
              "delta": {
                "content": "?",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814882,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "chatcmpl-528",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": "stop",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 1753814882,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/inference/recordings/responses/b4cda53cd04f.json
+++ b/tests/integration/inference/recordings/responses/b4cda53cd04f.json
@ -0,0 +1,56 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/v1/v1/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "messages": [
        {
          "role": "user",
          "content": "Which planet do humans live on?"
        }
      ],
      "stream": false
    },
    "endpoint": "/v1/completions",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
        "id": "chatcmpl-4",
        "choices": [
          {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
              "content": "Humans live on Earth.",
              "refusal": null,
              "role": "assistant",
              "annotations": null,
              "audio": null,
              "function_call": null,
              "tool_calls": null
            }
          }
        ],
        "created": 1753814880,
        "model": "llama3.2:3b-instruct-fp16",
        "object": "chat.completion",
        "service_tier": null,
        "system_fingerprint": "fp_ollama",
        "usage": {
          "completion_tokens": 6,
          "prompt_tokens": 32,
          "total_tokens": 38,
          "completion_tokens_details": null,
          "prompt_tokens_details": null
        }
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/inference/recordings/responses/b91f1fb4aedb.json
+++ b/tests/integration/inference/recordings/responses/b91f1fb4aedb.json
@ -0,0 +1,221 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"get_weather\",\n        \"description\": \"Get the current weather\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"location\"],\n            \"properties\": {\n                \"location\": {\n                    \"type\": \"string\",\n                    \"description\": \"The city and state (both required), e.g. San Francisco, CA.\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nPretend you are a weather assistant.\nYou MUST use one of the provided functions/tools to answer the user query.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat's the weather like in San Francisco?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": true
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:31.891582Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "[",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:31.939133Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "get",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:31.985171Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "_weather",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:32.030448Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "(location",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:32.075659Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "=\"",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:32.123939Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "San",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:32.169545Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " Francisco",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:32.214044Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ",",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:32.259104Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " CA",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:32.306215Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "\")]",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:32.351121Z",
          "done": true,
          "done_reason": "stop",
          "total_duration": 641307458,
          "load_duration": 70513916,
          "prompt_eval_count": 339,
          "prompt_eval_duration": 106020875,
          "eval_count": 11,
          "eval_duration": 464057250,
          "response": "",
          "thinking": null,
          "context": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/inference/recordings/responses/bbd0637dce16.json
+++ b/tests/integration/inference/recordings/responses/bbd0637dce16.json
--- a/tests/integration/inference/recordings/responses/d0ac68cbde69.json
+++ b/tests/integration/inference/recordings/responses/d0ac68cbde69.json
@ -0,0 +1,38 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/ps",
    "headers": {},
    "body": {},
    "endpoint": "/api/ps",
    "model": ""
  },
  "response": {
    "body": {
      "__type__": "ollama._types.ProcessResponse",
      "__data__": {
        "models": [
          {
            "model": "llama3.2:3b-instruct-fp16",
            "name": "llama3.2:3b-instruct-fp16",
            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
            "expires_at": "2025-07-29T11:53:06.458806-07:00",
            "size": 8581748736,
            "size_vram": 8581748736,
            "details": {
              "parent_model": "",
              "format": "gguf",
              "family": "llama",
              "families": [
                "llama"
              ],
              "parameter_size": "3.2B",
              "quantization_level": "F16"
            }
          }
        ]
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/inference/recordings/responses/dd9e7d5913e9.json
+++ b/tests/integration/inference/recordings/responses/dd9e7d5913e9.json
@ -0,0 +1,59 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"get_object_namespace_list\",\n        \"description\": \"Get the list of objects in a namespace\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"kind\", \"namespace\"],\n            \"properties\": {\n                \"kind\": {\n                    \"type\": \"string\",\n                    \"description\": \"the type of object\"\n                },\n                \"namespace\": {\n                    \"type\": \"string\",\n                    \"description\": \"the name of the namespace\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat pods are in the namespace openshift-lightspeed?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_object_namespace_list(kind=\"pod\", namespace=\"openshift-lightspeed\")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nthe objects are pod1, pod2, pod3<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": true
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:48.95435Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "[]",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-07-29T18:47:48.996247Z",
          "done": true,
          "done_reason": "stop",
          "total_duration": 667274458,
          "load_duration": 80712750,
          "prompt_eval_count": 386,
          "prompt_eval_duration": 543388792,
          "eval_count": 2,
          "eval_duration": 42471125,
          "response": "",
          "thinking": null,
          "context": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/unit/distribution/test_inference_recordings.py
+++ b/tests/unit/distribution/test_inference_recordings.py
@ -0,0 +1,291 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import sqlite3
 import tempfile
 from pathlib import Path
 from unittest.mock import patch
 import pytest
 from openai import AsyncOpenAI
 # Import the real Pydantic response types instead of using Mocks
 from llama_stack.apis.inference import (
    OpenAIAssistantMessageParam,
    OpenAIChatCompletion,
    OpenAIChoice,
    OpenAIEmbeddingData,
    OpenAIEmbeddingsResponse,
    OpenAIEmbeddingUsage,
 )
 from llama_stack.testing.inference_recorder import (
    InferenceMode,
    ResponseStorage,
    inference_recording,
    normalize_request,
 )
@pytest.fixture
 def temp_storage_dir():
    """Create a temporary directory for test recordings."""
    with tempfile.TemporaryDirectory() as temp_dir:
        yield Path(temp_dir)
@pytest.fixture
 def real_openai_chat_response():
    """Real OpenAI chat completion response using proper Pydantic objects."""
    return OpenAIChatCompletion(
        id="chatcmpl-test123",
        choices=[
            OpenAIChoice(
                index=0,
                message=OpenAIAssistantMessageParam(
                    role="assistant", content="Hello! I'm doing well, thank you for asking."
                ),
                finish_reason="stop",
            )
        ],
        created=1234567890,
        model="llama3.2:3b",
    )
@pytest.fixture
 def real_embeddings_response():
    """Real OpenAI embeddings response using proper Pydantic objects."""
    return OpenAIEmbeddingsResponse(
        object="list",
        data=[
            OpenAIEmbeddingData(object="embedding", embedding=[0.1, 0.2, 0.3], index=0),
            OpenAIEmbeddingData(object="embedding", embedding=[0.4, 0.5, 0.6], index=1),
        ],
        model="nomic-embed-text",
        usage=OpenAIEmbeddingUsage(prompt_tokens=6, total_tokens=6),
    )
 class TestInferenceRecording:
    """Test the inference recording system."""
    def test_request_normalization(self):
        """Test that request normalization produces consistent hashes."""
        # Test basic normalization
        hash1 = normalize_request(
            "POST",
            "http://localhost:11434/v1/chat/completions",
            {},
            {"model": "llama3.2:3b", "messages": [{"role": "user", "content": "Hello world"}], "temperature": 0.7},
        )
        # Same request should produce same hash
        hash2 = normalize_request(
            "POST",
            "http://localhost:11434/v1/chat/completions",
            {},
            {"model": "llama3.2:3b", "messages": [{"role": "user", "content": "Hello world"}], "temperature": 0.7},
        )
        assert hash1 == hash2
        # Different content should produce different hash
        hash3 = normalize_request(
            "POST",
            "http://localhost:11434/v1/chat/completions",
            {},
            {
                "model": "llama3.2:3b",
                "messages": [{"role": "user", "content": "Different message"}],
                "temperature": 0.7,
            },
        )
        assert hash1 != hash3
    def test_request_normalization_edge_cases(self):
        """Test request normalization is precise about request content."""
        # Test that different whitespace produces different hashes (no normalization)
        hash1 = normalize_request(
            "POST",
            "http://test/v1/chat/completions",
            {},
            {"messages": [{"role": "user", "content": "Hello   world\n\n"}]},
        )
        hash2 = normalize_request(
            "POST", "http://test/v1/chat/completions", {}, {"messages": [{"role": "user", "content": "Hello world"}]}
        )
        assert hash1 != hash2  # Different whitespace should produce different hashes
        # Test that different float precision produces different hashes (no rounding)
        hash3 = normalize_request("POST", "http://test/v1/chat/completions", {}, {"temperature": 0.7000001})
        hash4 = normalize_request("POST", "http://test/v1/chat/completions", {}, {"temperature": 0.7})
        assert hash3 != hash4  # Different precision should produce different hashes
    def test_response_storage(self, temp_storage_dir):
        """Test the ResponseStorage class."""
        temp_storage_dir = temp_storage_dir / "test_response_storage"
        storage = ResponseStorage(temp_storage_dir)
        # Test directory creation
        assert storage.test_dir.exists()
        assert storage.responses_dir.exists()
        assert storage.db_path.exists()
        # Test storing and retrieving a recording
        request_hash = "test_hash_123"
        request_data = {
            "method": "POST",
            "url": "http://localhost:11434/v1/chat/completions",
            "endpoint": "/v1/chat/completions",
            "model": "llama3.2:3b",
        }
        response_data = {"body": {"content": "test response"}, "is_streaming": False}
        storage.store_recording(request_hash, request_data, response_data)
        # Verify SQLite record
        with sqlite3.connect(storage.db_path) as conn:
            result = conn.execute("SELECT * FROM recordings WHERE request_hash = ?", (request_hash,)).fetchone()
        assert result is not None
        assert result[0] == request_hash  # request_hash
        assert result[2] == "/v1/chat/completions"  # endpoint
        assert result[3] == "llama3.2:3b"  # model
        # Verify file storage and retrieval
        retrieved = storage.find_recording(request_hash)
        assert retrieved is not None
        assert retrieved["request"]["model"] == "llama3.2:3b"
        assert retrieved["response"]["body"]["content"] == "test response"
    async def test_recording_mode(self, temp_storage_dir, real_openai_chat_response):
        """Test that recording mode captures and stores responses."""
        async def mock_create(*args, **kwargs):
            return real_openai_chat_response
        temp_storage_dir = temp_storage_dir / "test_recording_mode"
        with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
            with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
                client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
                response = await client.chat.completions.create(
                    model="llama3.2:3b",
                    messages=[{"role": "user", "content": "Hello, how are you?"}],
                    temperature=0.7,
                    max_tokens=50,
                )
                # Verify the response was returned correctly
                assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
        # Verify recording was stored
        storage = ResponseStorage(temp_storage_dir)
        with sqlite3.connect(storage.db_path) as conn:
            recordings = conn.execute("SELECT COUNT(*) FROM recordings").fetchone()[0]
        assert recordings == 1
    async def test_replay_mode(self, temp_storage_dir, real_openai_chat_response):
        """Test that replay mode returns stored responses without making real calls."""
        async def mock_create(*args, **kwargs):
            return real_openai_chat_response
        temp_storage_dir = temp_storage_dir / "test_replay_mode"
        # First, record a response
        with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
            with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
                client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
                response = await client.chat.completions.create(
                    model="llama3.2:3b",
                    messages=[{"role": "user", "content": "Hello, how are you?"}],
                    temperature=0.7,
                    max_tokens=50,
                )
        # Now test replay mode - should not call the original method
        with patch("openai.resources.chat.completions.AsyncCompletions.create") as mock_create_patch:
            with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
                client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
                response = await client.chat.completions.create(
                    model="llama3.2:3b",
                    messages=[{"role": "user", "content": "Hello, how are you?"}],
                    temperature=0.7,
                    max_tokens=50,
                )
                # Verify we got the recorded response
                assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
                # Verify the original method was NOT called
                mock_create_patch.assert_not_called()
    async def test_replay_missing_recording(self, temp_storage_dir):
        """Test that replay mode fails when no recording is found."""
        temp_storage_dir = temp_storage_dir / "test_replay_missing_recording"
        with patch("openai.resources.chat.completions.AsyncCompletions.create"):
            with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
                client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
                with pytest.raises(RuntimeError, match="No recorded response found"):
                    await client.chat.completions.create(
                        model="llama3.2:3b", messages=[{"role": "user", "content": "This was never recorded"}]
                    )
    async def test_embeddings_recording(self, temp_storage_dir, real_embeddings_response):
        """Test recording and replay of embeddings calls."""
        async def mock_create(*args, **kwargs):
            return real_embeddings_response
        temp_storage_dir = temp_storage_dir / "test_embeddings_recording"
        # Record
        with patch("openai.resources.embeddings.AsyncEmbeddings.create", side_effect=mock_create):
            with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
                client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
                response = await client.embeddings.create(
                    model="nomic-embed-text", input=["Hello world", "Test embedding"]
                )
                assert len(response.data) == 2
        # Replay
        with patch("openai.resources.embeddings.AsyncEmbeddings.create") as mock_create_patch:
            with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
                client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
                response = await client.embeddings.create(
                    model="nomic-embed-text", input=["Hello world", "Test embedding"]
                )
                # Verify we got the recorded response
                assert len(response.data) == 2
                assert response.data[0].embedding == [0.1, 0.2, 0.3]
                # Verify original method was not called
                mock_create_patch.assert_not_called()
    async def test_live_mode(self, real_openai_chat_response):
        """Test that live mode passes through to original methods."""
        async def mock_create(*args, **kwargs):
            return real_openai_chat_response
        with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
            with inference_recording(mode=InferenceMode.LIVE):
                client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
                response = await client.chat.completions.create(
                    model="llama3.2:3b", messages=[{"role": "user", "content": "Hello"}]
                )
                # Verify the response was returned
                assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."