much simpler

2025-12-22 22:39:41 +00:00 · 2025-07-28 20:30:38 -07:00 · 2025-07-28 20:30:38 -07:00 · 481a893eb7
commit 481a893eb7
parent e59c13f2b8
19 changed files with 6365 additions and 302 deletions
--- a/llama_stack/testing/inference_recorder.py
+++ b/llama_stack/testing/inference_recorder.py
@ -4,13 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from __future__ import annotations
+from __future__ import annotations  # for forward references

 import hashlib
 import json
 import os
 import sqlite3
-import uuid
 from collections.abc import Generator
 from contextlib import contextmanager
 from pathlib import Path
@ -28,78 +27,18 @@ def normalize_request(method: str, url: str, headers: dict[str, Any], body: dict
    from urllib.parse import urlparse

    parsed = urlparse(url)
-    endpoint = parsed.path
+    normalized = {"method": method.upper(), "endpoint": parsed.path, "body": body}

-    # Create normalized request dict
-    normalized: dict[str, Any] = {
-        "method": method.upper(),
-        "endpoint": endpoint,
-    }
-
-    # Normalize body parameters
-    if body:
-        # Handle model parameter
-        if "model" in body:
-            normalized["model"] = body["model"]
-
-        # Handle messages (normalize whitespace)
-        if "messages" in body:
-            normalized_messages = []
-            for msg in body["messages"]:
-                normalized_msg = dict(msg)
-                if "content" in normalized_msg and isinstance(normalized_msg["content"], str):
-                    # Normalize whitespace
-                    normalized_msg["content"] = " ".join(normalized_msg["content"].split())
-                normalized_messages.append(normalized_msg)
-            normalized["messages"] = normalized_messages
-
-        # Handle other parameters (sort for consistency)
-        other_params = {}
-        for key, value in body.items():
-            if key not in ["model", "messages"]:
-                if isinstance(value, float):
-                    # Round floats to 6 decimal places
-                    other_params[key] = round(value, 6)
-                else:
-                    other_params[key] = value
-
-        if other_params:
-            # Sort dictionary keys for consistent hashing
-            normalized["parameters"] = dict(sorted(other_params.items()))
-
-    # Create hash
+    # Create hash - sort_keys=True ensures deterministic ordering
    normalized_json = json.dumps(normalized, sort_keys=True)
    return hashlib.sha256(normalized_json.encode()).hexdigest()


-def get_current_test_id() -> str:
-    """Extract test ID from pytest context or fall back to environment/generated ID."""
-    # Try to get from pytest context
-    try:
-        import _pytest.fixtures
-
-        if hasattr(_pytest.fixtures, "_current_request") and _pytest.fixtures._current_request:
-            request = _pytest.fixtures._current_request
-            if hasattr(request, "node"):
-                # Use the test node ID as our test identifier
-                node_id: str = request.node.nodeid
-                # Clean up the node ID to be filesystem-safe
-                test_id = node_id.replace("/", "_").replace("::", "_").replace(".py", "")
-                return test_id
-    except AttributeError:
-        pass
-
-    # Fall back to environment-based or generated ID
-    return os.environ.get("LLAMA_STACK_TEST_ID", f"test_{uuid.uuid4().hex[:8]}")
-
-
 def get_inference_mode() -> str:
-    """Get the inference recording mode from environment variables."""
    return os.environ.get("LLAMA_STACK_INFERENCE_MODE", "live").lower()


 def setup_inference_recording():
-    """Convenience function to set up inference recording based on environment variables."""
    mode = get_inference_mode()

    if mode not in ["live", "record", "replay"]:
@ -113,14 +52,14 @@ def setup_inference_recording():

        return live_mode()

-    test_id = get_current_test_id()
-    storage_dir = os.environ.get("LLAMA_STACK_RECORDING_DIR", str(Path.home() / ".llama" / "recordings"))
+    if "LLAMA_STACK_RECORDING_DIR" not in os.environ:
+        raise ValueError("LLAMA_STACK_RECORDING_DIR must be set for recording or replaying")
+    storage_dir = os.environ["LLAMA_STACK_RECORDING_DIR"]

-    return inference_recording(mode=mode, test_id=test_id, storage_dir=storage_dir)
+    return inference_recording(mode=mode, storage_dir=storage_dir)


 def _serialize_response(response: Any) -> Any:
-    """Serialize OpenAI response objects to JSON-compatible format."""
    if hasattr(response, "model_dump"):
        return response.model_dump()
    elif hasattr(response, "__dict__"):
@ -130,19 +69,14 @@ def _serialize_response(response: Any) -> Any:


 def _deserialize_response(data: dict[str, Any]) -> dict[str, Any]:
-    """Deserialize response data back to a dict format."""
-    # For simplicity, just return the dict - this preserves all the data
-    # The original response structure is sufficient for replaying
    return data


 class ResponseStorage:
    """Handles SQLite index + JSON file storage/retrieval for inference recordings."""

-    def __init__(self, base_dir: Path, test_id: str):
-        self.base_dir = base_dir
-        self.test_id = test_id
-        self.test_dir = base_dir / test_id
+    def __init__(self, test_dir: Path):
+        self.test_dir = test_dir
        self.responses_dir = self.test_dir / "responses"
        self.db_path = self.test_dir / "index.sqlite"

@ -234,37 +168,55 @@ class ResponseStorage:
        return cast(dict[str, Any], data)


-async def _patched_create_method(original_method, self, **kwargs):
-    """Patched version of OpenAI client create methods."""
+async def _patched_inference_method(original_method, self, client_type, method_name=None, **kwargs):
    global _current_mode, _current_storage

    if _current_mode == "live" or _current_storage is None:
        # Normal operation
        return await original_method(self, **kwargs)

-    # Get base URL from the client
-    base_url = str(self._client.base_url)
+    # Get base URL and endpoint based on client type
+    if client_type == "openai":
+        base_url = str(self._client.base_url)

-    # Determine endpoint based on the method's module/class path
-    method_str = str(original_method)
-    if "chat.completions" in method_str:
-        endpoint = "/v1/chat/completions"
-    elif "embeddings" in method_str:
-        endpoint = "/v1/embeddings"
-    elif "completions" in method_str:
-        endpoint = "/v1/completions"
-    else:
-        # Fallback - try to guess from the self object
-        if hasattr(self, "_resource") and hasattr(self._resource, "_resource"):
-            resource_name = getattr(self._resource._resource, "_resource", "unknown")
-            if "chat" in str(resource_name):
-                endpoint = "/v1/chat/completions"
-            elif "embeddings" in str(resource_name):
-                endpoint = "/v1/embeddings"
+        # Determine endpoint based on the method's module/class path
+        method_str = str(original_method)
+        if "chat.completions" in method_str:
+            endpoint = "/v1/chat/completions"
+        elif "embeddings" in method_str:
+            endpoint = "/v1/embeddings"
+        elif "completions" in method_str:
+            endpoint = "/v1/completions"
+        else:
+            # Fallback - try to guess from the self object
+            if hasattr(self, "_resource") and hasattr(self._resource, "_resource"):
+                resource_name = getattr(self._resource._resource, "_resource", "unknown")
+                if "chat" in str(resource_name):
+                    endpoint = "/v1/chat/completions"
+                elif "embeddings" in str(resource_name):
+                    endpoint = "/v1/embeddings"
+                else:
+                    endpoint = "/v1/completions"
            else:
                endpoint = "/v1/completions"
+
+    elif client_type == "ollama":
+        # Get base URL from the client (Ollama client uses host attribute)
+        base_url = getattr(self, "host", "http://localhost:11434")
+        if not base_url.startswith("http"):
+            base_url = f"http://{base_url}"
+
+        # Determine endpoint based on method name
+        if method_name == "generate":
+            endpoint = "/api/generate"
+        elif method_name == "chat":
+            endpoint = "/api/chat"
+        elif method_name == "embed":
+            endpoint = "/api/embeddings"
        else:
-            endpoint = "/v1/completions"
+            endpoint = f"/api/{method_name}"
+    else:
+        raise ValueError(f"Unknown client type: {client_type}")

    url = base_url.rstrip("/") + endpoint

@ -276,15 +228,12 @@ async def _patched_create_method(original_method, self, **kwargs):
    request_hash = normalize_request(method, url, headers, body)

    if _current_mode == "replay":
-        # Try to find recorded response
        recording = _current_storage.find_recording(request_hash)
        if recording:
-            # Return recorded response
            response_body = recording["response"]["body"]

-            # Handle streaming responses
            if recording["response"].get("is_streaming", False):
-                # For streaming, we need to return an async iterator
+
                async def replay_stream():
                    for chunk in response_body:
                        yield chunk
@ -301,110 +250,8 @@ async def _patched_create_method(original_method, self, **kwargs):
            )

    elif _current_mode == "record":
-        # Make real request and record it
        response = await original_method(self, **kwargs)

-        # Store the recording
-        request_data = {
-            "method": method,
-            "url": url,
-            "headers": headers,
-            "body": body,
-            "endpoint": endpoint,
-            "model": body.get("model", ""),
-        }
-
-        # Determine if this is a streaming request based on request parameters
-        is_streaming = body.get("stream", False)
-
-        if is_streaming:
-            # For streaming responses, we need to collect all chunks immediately before yielding
-            # This ensures the recording is saved even if the generator isn't fully consumed
-            chunks = []
-            async for chunk in response:
-                chunks.append(chunk)
-
-            # Store the recording immediately
-            response_data = {"body": chunks, "is_streaming": True}
-            _current_storage.store_recording(request_hash, request_data, response_data)
-
-            # Return a generator that replays the stored chunks
-            async def replay_recorded_stream():
-                for chunk in chunks:
-                    yield chunk
-
-            return replay_recorded_stream()
-        else:
-            response_data = {"body": response, "is_streaming": False}
-            _current_storage.store_recording(request_hash, request_data, response_data)
-            return response
-
-    else:
-        return await original_method(self, **kwargs)
-
-
-async def _patched_ollama_method(original_method, self, method_name, **kwargs):
-    """Patched version of Ollama AsyncClient methods."""
-    global _current_mode, _current_storage
-
-    if _current_mode == "live" or _current_storage is None:
-        # Normal operation
-        return await original_method(self, **kwargs)
-
-    # Get base URL from the client (Ollama client uses host attribute)
-    base_url = getattr(self, "host", "http://localhost:11434")
-    if not base_url.startswith("http"):
-        base_url = f"http://{base_url}"
-
-    # Determine endpoint based on method name
-    if method_name == "generate":
-        endpoint = "/api/generate"
-    elif method_name == "chat":
-        endpoint = "/api/chat"
-    elif method_name == "embed":
-        endpoint = "/api/embeddings"
-    else:
-        endpoint = f"/api/{method_name}"
-
-    url = base_url.rstrip("/") + endpoint
-
-    # Normalize request for matching
-    method = "POST"
-    headers = {}
-    body = kwargs
-
-    request_hash = normalize_request(method, url, headers, body)
-
-    if _current_mode == "replay":
-        # Try to find recorded response
-        recording = _current_storage.find_recording(request_hash)
-        if recording:
-            # Return recorded response
-            response_body = recording["response"]["body"]
-
-            # Handle streaming responses for Ollama
-            if recording["response"].get("is_streaming", False):
-                # For streaming, we need to return an async iterator
-                async def replay_ollama_stream():
-                    for chunk in response_body:
-                        yield chunk
-
-                return replay_ollama_stream()
-            else:
-                return response_body
-        else:
-            raise RuntimeError(
-                f"No recorded response found for request hash: {request_hash}\n"
-                f"Endpoint: {endpoint}\n"
-                f"Model: {body.get('model', 'unknown')}\n"
-                f"To record this response, run with LLAMA_STACK_INFERENCE_MODE=record"
-            )
-
-    elif _current_mode == "record":
-        # Make real request and record it
-        response = await original_method(self, **kwargs)
-
-        # Store the recording
        request_data = {
            "method": method,
            "url": url,
@ -448,45 +295,31 @@ def patch_inference_clients():
    global _original_methods

    # Import here to avoid circular imports
-    from openai import AsyncOpenAI
+    # Also import Ollama AsyncClient
+    from ollama import AsyncClient as OllamaAsyncClient
    from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
    from openai.resources.completions import AsyncCompletions
    from openai.resources.embeddings import AsyncEmbeddings

-    # Also import Ollama AsyncClient
-    try:
-        from ollama import AsyncClient as OllamaAsyncClient
-    except ImportError:
-        ollama_async_client = None
-    else:
-        ollama_async_client = OllamaAsyncClient
-
    # Store original methods for both OpenAI and Ollama clients
    _original_methods = {
        "chat_completions_create": AsyncChatCompletions.create,
        "completions_create": AsyncCompletions.create,
        "embeddings_create": AsyncEmbeddings.create,
+        "ollama_generate": OllamaAsyncClient.generate,
+        "ollama_chat": OllamaAsyncClient.chat,
+        "ollama_embed": OllamaAsyncClient.embed,
    }

-    # Add Ollama client methods if available
-    if ollama_async_client:
-        _original_methods.update(
-            {
-                "ollama_generate": ollama_async_client.generate,
-                "ollama_chat": ollama_async_client.chat,
-                "ollama_embed": ollama_async_client.embed,
-            }
-        )
-
    # Create patched methods for OpenAI client
    async def patched_chat_completions_create(self, **kwargs):
-        return await _patched_create_method(_original_methods["chat_completions_create"], self, **kwargs)
+        return await _patched_inference_method(_original_methods["chat_completions_create"], self, "openai", **kwargs)

    async def patched_completions_create(self, **kwargs):
-        return await _patched_create_method(_original_methods["completions_create"], self, **kwargs)
+        return await _patched_inference_method(_original_methods["completions_create"], self, "openai", **kwargs)

    async def patched_embeddings_create(self, **kwargs):
-        return await _patched_create_method(_original_methods["embeddings_create"], self, **kwargs)
+        return await _patched_inference_method(_original_methods["embeddings_create"], self, "openai", **kwargs)

    # Apply OpenAI patches
    AsyncChatCompletions.create = patched_chat_completions_create
@ -494,40 +327,21 @@ def patch_inference_clients():
    AsyncEmbeddings.create = patched_embeddings_create

    # Create patched methods for Ollama client
-    if ollama_async_client:
+    async def patched_ollama_generate(self, **kwargs):
+        return await _patched_inference_method(
+            _original_methods["ollama_generate"], self, "ollama", "generate", **kwargs
+        )

-        async def patched_ollama_generate(self, **kwargs):
-            return await _patched_ollama_method(_original_methods["ollama_generate"], self, "generate", **kwargs)
+    async def patched_ollama_chat(self, **kwargs):
+        return await _patched_inference_method(_original_methods["ollama_chat"], self, "ollama", "chat", **kwargs)

-        async def patched_ollama_chat(self, **kwargs):
-            return await _patched_ollama_method(_original_methods["ollama_chat"], self, "chat", **kwargs)
+    async def patched_ollama_embed(self, **kwargs):
+        return await _patched_inference_method(_original_methods["ollama_embed"], self, "ollama", "embed", **kwargs)

-        async def patched_ollama_embed(self, **kwargs):
-            return await _patched_ollama_method(_original_methods["ollama_embed"], self, "embed", **kwargs)
-
-        # Apply Ollama patches
-        ollama_async_client.generate = patched_ollama_generate
-        ollama_async_client.chat = patched_ollama_chat
-        ollama_async_client.embed = patched_ollama_embed
-
-    # Also try to patch the AsyncOpenAI __init__ to trace client creation
-    original_openai_init = AsyncOpenAI.__init__
-
-    def patched_openai_init(self, *args, **kwargs):
-        result = original_openai_init(self, *args, **kwargs)
-
-        # After client is created, try to re-patch its methods
-        if hasattr(self, "chat") and hasattr(self.chat, "completions"):
-            original_chat_create = self.chat.completions.create
-
-            async def instance_patched_chat_create(**kwargs):
-                return await _patched_create_method(original_chat_create, self.chat.completions, **kwargs)
-
-            self.chat.completions.create = instance_patched_chat_create
-
-        return result
-
-    AsyncOpenAI.__init__ = patched_openai_init
+    # Apply Ollama patches
+    OllamaAsyncClient.generate = patched_ollama_generate
+    OllamaAsyncClient.chat = patched_ollama_chat
+    OllamaAsyncClient.embed = patched_ollama_embed


 def unpatch_inference_clients():
@ -538,43 +352,26 @@ def unpatch_inference_clients():
        return

    # Import here to avoid circular imports
+    from ollama import AsyncClient as OllamaAsyncClient
    from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
    from openai.resources.completions import AsyncCompletions
    from openai.resources.embeddings import AsyncEmbeddings

    # Restore OpenAI client methods
-    if "chat_completions_create" in _original_methods:
-        AsyncChatCompletions.create = _original_methods["chat_completions_create"]
-
-    if "completions_create" in _original_methods:
-        AsyncCompletions.create = _original_methods["completions_create"]
-
-    if "embeddings_create" in _original_methods:
-        AsyncEmbeddings.create = _original_methods["embeddings_create"]
+    AsyncChatCompletions.create = _original_methods["chat_completions_create"]
+    AsyncCompletions.create = _original_methods["completions_create"]
+    AsyncEmbeddings.create = _original_methods["embeddings_create"]

    # Restore Ollama client methods if they were patched
-    try:
-        from ollama import AsyncClient as OllamaAsyncClient
-
-        if "ollama_generate" in _original_methods:
-            OllamaAsyncClient.generate = _original_methods["ollama_generate"]
-
-        if "ollama_chat" in _original_methods:
-            OllamaAsyncClient.chat = _original_methods["ollama_chat"]
-
-        if "ollama_embed" in _original_methods:
-            OllamaAsyncClient.embed = _original_methods["ollama_embed"]
-
-    except ImportError:
-        pass
+    OllamaAsyncClient.generate = _original_methods["ollama_generate"]
+    OllamaAsyncClient.chat = _original_methods["ollama_chat"]
+    OllamaAsyncClient.embed = _original_methods["ollama_embed"]

    _original_methods.clear()


@contextmanager
-def inference_recording(
-    mode: str = "live", test_id: str | None = None, storage_dir: str | Path | None = None
-) -> Generator[None, None, None]:
+def inference_recording(mode: str = "live", storage_dir: str | Path | None = None) -> Generator[None, None, None]:
    """Context manager for inference recording/replaying."""
    global _current_mode, _current_storage

@ -584,9 +381,6 @@ def inference_recording(
    else:
        storage_dir_path = Path(storage_dir)

-    if test_id is None:
-        test_id = f"test_{uuid.uuid4().hex[:8]}"
-
    # Store previous state
    prev_mode = _current_mode
    prev_storage = _current_storage
@ -595,7 +389,7 @@ def inference_recording(
        _current_mode = mode

        if mode in ["record", "replay"]:
-            _current_storage = ResponseStorage(storage_dir_path, test_id)
+            _current_storage = ResponseStorage(storage_dir_path)
            patch_inference_clients()

        yield
--- a/tests/integration/inference/recordings/index.sqlite
+++ b/tests/integration/inference/recordings/index.sqlite
--- a/tests/integration/inference/recordings/responses/1b8394f90636.json
+++ b/tests/integration/inference/recordings/responses/1b8394f90636.json
@ -0,0 +1,38 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "prompt": "<|begin_of_text|>Complete the sentence using one word: Roses are red, violets are ",
+      "raw": true,
+      "options": {
+        "temperature": 0.0,
+        "max_tokens": 50,
+        "num_predict": 50
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "created_at": "2025-07-29T04:16:53.555099Z",
+      "done": true,
+      "done_reason": "stop",
+      "total_duration": 2124168875,
+      "load_duration": 58506875,
+      "prompt_eval_count": 18,
+      "prompt_eval_duration": 70072583,
+      "eval_count": 43,
+      "eval_duration": 1994446917,
+      "response": " _______.\n\nThe best answer is blue. The traditional nursery rhyme goes like this:\n\nRoses are red,\nViolets are blue,\nSugar is sweet,\nAnd so are you! (Or something similar.)",
+      "thinking": null,
+      "context": null
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/responses/211b1562d4e6.json
+++ b/tests/integration/inference/recordings/responses/211b1562d4e6.json
@ -0,0 +1,36 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhich planet do humans live on?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "created_at": "2025-07-29T04:16:57.535525Z",
+      "done": true,
+      "done_reason": "stop",
+      "total_duration": 358691334,
+      "load_duration": 76787334,
+      "prompt_eval_count": 23,
+      "prompt_eval_duration": 72235375,
+      "eval_count": 6,
+      "eval_duration": 208986666,
+      "response": "Humans live on Earth.",
+      "thinking": null,
+      "context": null
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/responses/3c3f13cb7794.json
+++ b/tests/integration/inference/recordings/responses/3c3f13cb7794.json
@ -0,0 +1,188 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat's the name of the Sun in latin?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": true
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:57.691771Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "The",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:57.732262Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " Latin",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:57.77294Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " word",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:57.814484Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " for",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:57.854875Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " \"",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:57.895957Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "Sun",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:57.937445Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "\"",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:57.978832Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " is",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:58.019242Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " Sol",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:58.059902Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": ".",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:58.100535Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 528254250,
+        "load_duration": 50177125,
+        "prompt_eval_count": 26,
+        "prompt_eval_duration": 68018458,
+        "eval_count": 11,
+        "eval_duration": 409555959,
+        "response": "",
+        "thinking": null,
+        "context": null
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/inference/recordings/responses/40f524d1934a.json
+++ b/tests/integration/inference/recordings/responses/40f524d1934a.json
@ -0,0 +1,188 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"get_weather\",\n        \"description\": \"Get the current weather\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"location\"],\n            \"properties\": {\n                \"location\": {\n                    \"type\": \"string\",\n                    \"description\": \"The city and state (both required), e.g. San Francisco, CA.\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nPretend you are a weather assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat's the weather like in San Francisco?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": true
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:59.480955Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "[",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:59.527418Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "get",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:59.571522Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "_weather",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:59.615027Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "(location",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:59.660598Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "=\"",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:59.705052Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "San",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:59.754386Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " Francisco",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:59.796942Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": ",",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:59.845807Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " CA",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:59.891254Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "\")]",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:59.934197Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 574307083,
+        "load_duration": 72062083,
+        "prompt_eval_count": 324,
+        "prompt_eval_duration": 47115625,
+        "eval_count": 11,
+        "eval_duration": 454426708,
+        "response": "",
+        "thinking": null,
+        "context": null
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/inference/recordings/responses/48d2fb183a2a.json
+++ b/tests/integration/inference/recordings/responses/48d2fb183a2a.json
@ -0,0 +1,83 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nPlease give me information about Michael Jordan.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nPlease respond in JSON format with the schema: {\"$defs\": {\"NBAStats\": {\"properties\": {\"year_for_draft\": {\"title\": \"Year For Draft\", \"type\": \"integer\"}, \"num_seasons_in_nba\": {\"title\": \"Num Seasons In Nba\", \"type\": \"integer\"}}, \"required\": [\"year_for_draft\", \"num_seasons_in_nba\"], \"title\": \"NBAStats\", \"type\": \"object\"}}, \"properties\": {\"first_name\": {\"title\": \"First Name\", \"type\": \"string\"}, \"last_name\": {\"title\": \"Last Name\", \"type\": \"string\"}, \"year_of_birth\": {\"title\": \"Year Of Birth\", \"type\": \"integer\"}, \"nba_stats\": {\"$ref\": \"#/$defs/NBAStats\"}}, \"required\": [\"first_name\", \"last_name\", \"year_of_birth\", \"nba_stats\"], \"title\": \"AnswerFormat\", \"type\": \"object\"}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "format": {
+        "$defs": {
+          "NBAStats": {
+            "properties": {
+              "year_for_draft": {
+                "title": "Year For Draft",
+                "type": "integer"
+              },
+              "num_seasons_in_nba": {
+                "title": "Num Seasons In Nba",
+                "type": "integer"
+              }
+            },
+            "required": [
+              "year_for_draft",
+              "num_seasons_in_nba"
+            ],
+            "title": "NBAStats",
+            "type": "object"
+          }
+        },
+        "properties": {
+          "first_name": {
+            "title": "First Name",
+            "type": "string"
+          },
+          "last_name": {
+            "title": "Last Name",
+            "type": "string"
+          },
+          "year_of_birth": {
+            "title": "Year Of Birth",
+            "type": "integer"
+          },
+          "nba_stats": {
+            "$ref": "#/$defs/NBAStats"
+          }
+        },
+        "required": [
+          "first_name",
+          "last_name",
+          "year_of_birth",
+          "nba_stats"
+        ],
+        "title": "AnswerFormat",
+        "type": "object"
+      },
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "created_at": "2025-07-29T04:17:13.438182Z",
+      "done": true,
+      "done_reason": "stop",
+      "total_duration": 2975265833,
+      "load_duration": 95592083,
+      "prompt_eval_count": 259,
+      "prompt_eval_duration": 367103709,
+      "eval_count": 60,
+      "eval_duration": 2511576708,
+      "response": "{\n  \"first_name\": \"Michael\",\n  \"last_name\": \"Jordan\",\n  \"year_of_birth\": 1963,\n  \"nba_stats\": {\n    \"year_for_draft\": 1984,\n    \"num_seasons_in_nba\": 15\n  }\n}",
+      "thinking": null,
+      "context": null
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/responses/6cc063bbd7d3.json
+++ b/tests/integration/inference/recordings/responses/6cc063bbd7d3.json
@ -0,0 +1,323 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the name of the US captial?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": true
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.227427Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "The",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.275725Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " capital",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.316195Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " of",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.356832Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " the",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.397682Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " United",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.438761Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " States",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.480453Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " is",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.523691Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " Washington",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.565106Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": ",",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.606315Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " D",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.647209Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": ".C",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.687828Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": ".",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.728386Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " (",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.769091Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "short",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.809726Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " for",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.850489Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " District",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.89147Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " of",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.932311Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " Columbia",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:21.973566Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": ").",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:22.014466Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 1034011167,
+        "load_duration": 176591709,
+        "prompt_eval_count": 26,
+        "prompt_eval_duration": 68104583,
+        "eval_count": 20,
+        "eval_duration": 788670334,
+        "response": "",
+        "thinking": null,
+        "context": null
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/inference/recordings/responses/70adef2c30c4.json
+++ b/tests/integration/inference/recordings/responses/70adef2c30c4.json
@ -0,0 +1,36 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhich planet has rings around it with a name starting with letter S?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "created_at": "2025-07-29T04:17:20.924128Z",
+      "done": true,
+      "done_reason": "stop",
+      "total_duration": 3308469666,
+      "load_duration": 66702250,
+      "prompt_eval_count": 30,
+      "prompt_eval_duration": 391410334,
+      "eval_count": 70,
+      "eval_duration": 2849497291,
+      "response": "The answer is Saturn! Saturn's ring system is one of the most iconic and well-known in our solar system. The rings are made up of ice particles, rock debris, and dust that orbit around the planet due to its gravitational pull.\n\nWould you like to know more about Saturn's rings or is there something else I can help you with?",
+      "thinking": null,
+      "context": null
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/responses/75d0dd9d0fa3.json
+++ b/tests/integration/inference/recordings/responses/75d0dd9d0fa3.json
@ -0,0 +1,61 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "prompt": "<|begin_of_text|>Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003.Please respond in JSON format with the schema: {\"properties\": {\"name\": {\"title\": \"Name\", \"type\": \"string\"}, \"year_born\": {\"title\": \"Year Born\", \"type\": \"string\"}, \"year_retired\": {\"title\": \"Year Retired\", \"type\": \"string\"}}, \"required\": [\"name\", \"year_born\", \"year_retired\"], \"title\": \"AnswerFormat\", \"type\": \"object\"}",
+      "raw": true,
+      "format": {
+        "properties": {
+          "name": {
+            "title": "Name",
+            "type": "string"
+          },
+          "year_born": {
+            "title": "Year Born",
+            "type": "string"
+          },
+          "year_retired": {
+            "title": "Year Retired",
+            "type": "string"
+          }
+        },
+        "required": [
+          "name",
+          "year_born",
+          "year_retired"
+        ],
+        "title": "AnswerFormat",
+        "type": "object"
+      },
+      "options": {
+        "temperature": 0.0,
+        "max_tokens": 50,
+        "num_predict": 50
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "created_at": "2025-07-29T04:16:57.15491Z",
+      "done": true,
+      "done_reason": "stop",
+      "total_duration": 1570055875,
+      "load_duration": 87677125,
+      "prompt_eval_count": 119,
+      "prompt_eval_duration": 190281458,
+      "eval_count": 29,
+      "eval_duration": 1291217083,
+      "response": "{ \"name\": \"Michael Jordan\", \"year_born\": \"1963\", \"year_retired\": \"2003\"}\n    ",
+      "thinking": null,
+      "context": null
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/responses/84cab42e1f5c.json
+++ b/tests/integration/inference/recordings/responses/84cab42e1f5c.json
@ -0,0 +1,836 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
+      "max_tokens": 50,
+      "stream": true
+    },
+    "endpoint": "/v1/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": "blue"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": ".\n\n"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": "This"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " is"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " a"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " reference"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " to"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " the"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " traditional"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " English"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " nursery"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " rhyme"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": ","
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " \""
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": "R"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": "oses"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " Are"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " Red"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": ","
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " V"
+          }
+        ],
+        "created": 1753762609,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": "io"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": "lets"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " Are"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " Blue"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": ".\""
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " The"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " completed"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " version"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " of"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " the"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " rhyme"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " typically"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " goes"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " like"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " this"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": ":\n\n"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": "R"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": "oses"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " are"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " red"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": ",\n"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": "V"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": "io"
+          }
+        ],
+        "created": 1753762610,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": "lets"
+          }
+        ],
+        "created": 1753762611,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " are"
+          }
+        ],
+        "created": 1753762611,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " blue"
+          }
+        ],
+        "created": 1753762611,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": ".\n"
+          }
+        ],
+        "created": 1753762611,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": "Sugar"
+          }
+        ],
+        "created": 1753762611,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " is"
+          }
+        ],
+        "created": 1753762611,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": null,
+            "index": 0,
+            "logprobs": null,
+            "text": " sweet"
+          }
+        ],
+        "created": 1753762611,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      },
+      {
+        "id": "cmpl-808",
+        "choices": [
+          {
+            "finish_reason": "length",
+            "index": 0,
+            "logprobs": null,
+            "text": ""
+          }
+        ],
+        "created": 1753762611,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": null
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/inference/recordings/responses/9b812cbcb88d.json
+++ b/tests/integration/inference/recordings/responses/9b812cbcb88d.json
@ -0,0 +1,36 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"get_weather\",\n        \"description\": \"Get the current weather\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"location\"],\n            \"properties\": {\n                \"location\": {\n                    \"type\": \"string\",\n                    \"description\": \"The city and state (both required), e.g. San Francisco, CA.\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nPretend you are a weather assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat's the weather like in San Francisco?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "created_at": "2025-07-29T04:16:59.104609Z",
+      "done": true,
+      "done_reason": "stop",
+      "total_duration": 948932208,
+      "load_duration": 68549542,
+      "prompt_eval_count": 324,
+      "prompt_eval_duration": 460136875,
+      "eval_count": 11,
+      "eval_duration": 419553208,
+      "response": "[get_weather(location=\"San Francisco, CA\")]",
+      "thinking": null,
+      "context": null
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/responses/9e7a83d3d596.json
+++ b/tests/integration/inference/recordings/responses/9e7a83d3d596.json
@ -0,0 +1,39 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/v1/v1/completions",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
+      "stream": false
+    },
+    "endpoint": "/v1/completions",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "id": "cmpl-68",
+      "choices": [
+        {
+          "finish_reason": "stop",
+          "index": 0,
+          "logprobs": null,
+          "text": "Blue.\n\nThe completed quote is a well-known poetic phrase often used as a tongue-in-cheek romantic gesture. However, it's worth noting that true violets are actually purple in color, not blue. This phrase is a playful variation of the traditional \"Roses are red, violets are blue,\" which typically goes like this:\n\n\"Roses are red, violets are blue,\nSugar is sweet, and so are you.\"\n\nThis original quote has been used for centuries to make a lighthearted, whimsical compliment in poetry, songs, and spoken words."
+        }
+      ],
+      "created": 1753762608,
+      "model": "llama3.2:3b-instruct-fp16",
+      "object": "text_completion",
+      "system_fingerprint": "fp_ollama",
+      "usage": {
+        "completion_tokens": 120,
+        "prompt_tokens": 50,
+        "total_tokens": 170,
+        "completion_tokens_details": null,
+        "prompt_tokens_details": null
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/responses/a6810c23eda8.json
+++ b/tests/integration/inference/recordings/responses/a6810c23eda8.json
@ -0,0 +1,670 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "prompt": "<|begin_of_text|>Complete the sentence using one word: Roses are red, violets are ",
+      "raw": true,
+      "options": {
+        "temperature": 0.0,
+        "max_tokens": 50,
+        "num_predict": 50
+      },
+      "stream": true
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:53.717175Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " ______",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:53.759811Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "_",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:53.802135Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": ".\n\n",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:53.843818Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "The",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:53.8848Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " best",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:53.926824Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " answer",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:53.96764Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " is",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.008868Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " blue",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.049584Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": ".",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.090467Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " The",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.131216Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " traditional",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.171811Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " nursery",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.212449Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " rhyme",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.254055Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " goes",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.296182Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " like",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.339421Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " this",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.380632Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": ":\n\n",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.423681Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "R",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.466032Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "oses",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.508317Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " are",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.551009Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " red",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.595853Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": ",\n",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.638044Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "V",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.679396Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "io",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.72096Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "lets",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.763977Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " are",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.80598Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " blue",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.847977Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": ",\n",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.890637Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "Sugar",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.931597Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " is",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:54.972266Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " sweet",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:55.01467Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": ",\n",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:55.055561Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "And",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:55.097823Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " so",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:55.139389Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " are",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:55.181536Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " you",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:55.224644Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "!",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:55.267976Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " (",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:55.311629Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "Or",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:55.355343Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " something",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:55.396541Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " similar",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:55.437899Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": ".)",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:16:55.479038Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 1881416167,
+        "load_duration": 69754000,
+        "prompt_eval_count": 18,
+        "prompt_eval_duration": 46361125,
+        "eval_count": 43,
+        "eval_duration": 1762259458,
+        "response": "",
+        "thinking": null,
+        "context": null
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/inference/recordings/responses/ae6835cfe70e.json
+++ b/tests/integration/inference/recordings/responses/ae6835cfe70e.json
@ -0,0 +1,36 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"get_object_namespace_list\",\n        \"description\": \"Get the list of objects in a namespace\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"kind\", \"namespace\"],\n            \"properties\": {\n                \"kind\": {\n                    \"type\": \"string\",\n                    \"description\": \"the type of object\"\n                },\n                \"namespace\": {\n                    \"type\": \"string\",\n                    \"description\": \"the name of the namespace\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat pods are in the namespace openshift-lightspeed?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_object_namespace_list(kind=\"pod\", namespace=\"openshift-lightspeed\")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nthe objects are pod1, pod2, pod3<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": false
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "created_at": "2025-07-29T04:17:22.73932Z",
+      "done": true,
+      "done_reason": "stop",
+      "total_duration": 660872000,
+      "load_duration": 76282083,
+      "prompt_eval_count": 386,
+      "prompt_eval_duration": 541896167,
+      "eval_count": 2,
+      "eval_duration": 42127791,
+      "response": "[]",
+      "thinking": null,
+      "context": null
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/responses/b91f1fb4aedb.json
+++ b/tests/integration/inference/recordings/responses/b91f1fb4aedb.json
@ -0,0 +1,188 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"get_weather\",\n        \"description\": \"Get the current weather\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"location\"],\n            \"properties\": {\n                \"location\": {\n                    \"type\": \"string\",\n                    \"description\": \"The city and state (both required), e.g. San Francisco, CA.\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nPretend you are a weather assistant.\nYou MUST use one of the provided functions/tools to answer the user query.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat's the weather like in San Francisco?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": true
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:00.217546Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "[",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:00.267879Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "get",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:00.315525Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "_weather",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:00.362669Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "(location",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:00.406139Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "=\"",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:00.450302Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "San",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:00.496893Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " Francisco",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:00.540977Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": ",",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:00.586272Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": " CA",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:00.631743Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "\")]",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:00.676251Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 682827167,
+        "load_duration": 111852875,
+        "prompt_eval_count": 339,
+        "prompt_eval_duration": 109521833,
+        "eval_count": 11,
+        "eval_duration": 460495042,
+        "response": "",
+        "thinking": null,
+        "context": null
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/inference/recordings/responses/bbd0637dce16.json
+++ b/tests/integration/inference/recordings/responses/bbd0637dce16.json
--- a/tests/integration/inference/recordings/responses/dd9e7d5913e9.json
+++ b/tests/integration/inference/recordings/responses/dd9e7d5913e9.json
@ -0,0 +1,53 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/generate",
+    "headers": {},
+    "body": {
+      "model": "llama3.2:3b-instruct-fp16",
+      "raw": true,
+      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n    {\n        \"name\": \"get_object_namespace_list\",\n        \"description\": \"Get the list of objects in a namespace\",\n        \"parameters\": {\n            \"type\": \"dict\",\n            \"required\": [\"kind\", \"namespace\"],\n            \"properties\": {\n                \"kind\": {\n                    \"type\": \"string\",\n                    \"description\": \"the type of object\"\n                },\n                \"namespace\": {\n                    \"type\": \"string\",\n                    \"description\": \"the name of the namespace\"\n                }\n            }\n        }\n    }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat pods are in the namespace openshift-lightspeed?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_object_namespace_list(kind=\"pod\", namespace=\"openshift-lightspeed\")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nthe objects are pod1, pod2, pod3<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
+      "options": {
+        "temperature": 0.0
+      },
+      "stream": true
+    },
+    "endpoint": "/api/generate",
+    "model": "llama3.2:3b-instruct-fp16"
+  },
+  "response": {
+    "body": [
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:14.122273Z",
+        "done": false,
+        "done_reason": null,
+        "total_duration": null,
+        "load_duration": null,
+        "prompt_eval_count": null,
+        "prompt_eval_duration": null,
+        "eval_count": null,
+        "eval_duration": null,
+        "response": "[]",
+        "thinking": null,
+        "context": null
+      },
+      {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T04:17:14.165968Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 663520959,
+        "load_duration": 67474917,
+        "prompt_eval_count": 386,
+        "prompt_eval_duration": 545132042,
+        "eval_count": 2,
+        "eval_duration": 50234083,
+        "response": "",
+        "thinking": null,
+        "context": null
+      }
+    ],
+    "is_streaming": true
+  }
+}
--- a/tests/integration/test_inference_recordings.py
+++ b/tests/integration/test_inference_recordings.py
@ -105,8 +105,8 @@ class TestInferenceRecording:
        assert hash1 != hash3

    def test_request_normalization_edge_cases(self):
-        """Test request normalization handles edge cases correctly."""
-        # Test whitespace normalization
+        """Test request normalization is precise about request content."""
+        # Test that different whitespace produces different hashes (no normalization)
        hash1 = normalize_request(
            "POST",
            "http://test/v1/chat/completions",
@ -116,16 +116,17 @@ class TestInferenceRecording:
        hash2 = normalize_request(
            "POST", "http://test/v1/chat/completions", {}, {"messages": [{"role": "user", "content": "Hello world"}]}
        )
-        assert hash1 == hash2
+        assert hash1 != hash2  # Different whitespace should produce different hashes

-        # Test float precision normalization
+        # Test that different float precision produces different hashes (no rounding)
        hash3 = normalize_request("POST", "http://test/v1/chat/completions", {}, {"temperature": 0.7000001})
        hash4 = normalize_request("POST", "http://test/v1/chat/completions", {}, {"temperature": 0.7})
-        assert hash3 == hash4
+        assert hash3 != hash4  # Different precision should produce different hashes

    def test_response_storage(self, temp_storage_dir):
        """Test the ResponseStorage class."""
-        storage = ResponseStorage(temp_storage_dir, "test_storage")
+        temp_storage_dir = temp_storage_dir / "test_response_storage"
+        storage = ResponseStorage(temp_storage_dir)

        # Test directory creation
        assert storage.test_dir.exists()
@ -161,13 +162,13 @@ class TestInferenceRecording:

    async def test_recording_mode(self, temp_storage_dir, mock_openai_response):
        """Test that recording mode captures and stores responses."""
-        test_id = "test_recording_mode"

        async def mock_create(*args, **kwargs):
            return mock_openai_response

+        temp_storage_dir = temp_storage_dir / "test_recording_mode"
        with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
-            with inference_recording(mode="record", test_id=test_id, storage_dir=str(temp_storage_dir)):
+            with inference_recording(mode="record", storage_dir=str(temp_storage_dir)):
                client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")

                response = await client.chat.completions.create(
@ -181,7 +182,7 @@ class TestInferenceRecording:
                assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."

        # Verify recording was stored
-        storage = ResponseStorage(temp_storage_dir, test_id)
+        storage = ResponseStorage(temp_storage_dir)
        with sqlite3.connect(storage.db_path) as conn:
            recordings = conn.execute("SELECT COUNT(*) FROM recordings").fetchone()[0]

@ -189,14 +190,14 @@ class TestInferenceRecording:

    async def test_replay_mode(self, temp_storage_dir, mock_openai_response):
        """Test that replay mode returns stored responses without making real calls."""
-        test_id = "test_replay_mode"

        async def mock_create(*args, **kwargs):
            return mock_openai_response

+        temp_storage_dir = temp_storage_dir / "test_replay_mode"
        # First, record a response
        with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
-            with inference_recording(mode="record", test_id=test_id, storage_dir=str(temp_storage_dir)):
+            with inference_recording(mode="record", storage_dir=str(temp_storage_dir)):
                client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")

                await client.chat.completions.create(
@ -208,7 +209,7 @@ class TestInferenceRecording:

        # Now test replay mode - should not call the original method
        with patch("openai.resources.chat.completions.AsyncCompletions.create") as mock_create_patch:
-            with inference_recording(mode="replay", test_id=test_id, storage_dir=str(temp_storage_dir)):
+            with inference_recording(mode="replay", storage_dir=str(temp_storage_dir)):
                client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")

                response = await client.chat.completions.create(
@ -226,10 +227,9 @@ class TestInferenceRecording:

    async def test_replay_missing_recording(self, temp_storage_dir):
        """Test that replay mode fails when no recording is found."""
-        test_id = "test_missing_recording"
-
+        temp_storage_dir = temp_storage_dir / "test_replay_missing_recording"
        with patch("openai.resources.chat.completions.AsyncCompletions.create"):
-            with inference_recording(mode="replay", test_id=test_id, storage_dir=str(temp_storage_dir)):
+            with inference_recording(mode="replay", storage_dir=str(temp_storage_dir)):
                client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")

                with pytest.raises(RuntimeError, match="No recorded response found"):
@ -239,14 +239,14 @@ class TestInferenceRecording:

    async def test_embeddings_recording(self, temp_storage_dir, mock_embeddings_response):
        """Test recording and replay of embeddings calls."""
-        test_id = "test_embeddings"

        async def mock_create(*args, **kwargs):
            return mock_embeddings_response

+        temp_storage_dir = temp_storage_dir / "test_embeddings_recording"
        # Record
        with patch("openai.resources.embeddings.AsyncEmbeddings.create", side_effect=mock_create):
-            with inference_recording(mode="record", test_id=test_id, storage_dir=str(temp_storage_dir)):
+            with inference_recording(mode="record", storage_dir=str(temp_storage_dir)):
                client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")

                response = await client.embeddings.create(
@ -257,7 +257,7 @@ class TestInferenceRecording:

        # Replay
        with patch("openai.resources.embeddings.AsyncEmbeddings.create") as mock_create_patch:
-            with inference_recording(mode="replay", test_id=test_id, storage_dir=str(temp_storage_dir)):
+            with inference_recording(mode="replay", storage_dir=str(temp_storage_dir)):
                client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")

                response = await client.embeddings.create(