improvements to include other ollama methods like ps, list and pull

2025-12-22 16:42:28 +00:00 · 2025-07-29 10:56:57 -07:00 · 2025-07-29 10:56:57 -07:00 · b578f9aec1
commit b578f9aec1
parent 1a21c4b695
23 changed files with 6748 additions and 5711 deletions
--- a/llama_stack/distribution/routers/inference.py
+++ b/llama_stack/distribution/routers/inference.py
@ -77,13 +77,13 @@ class InferenceRouter(Inference):
            self.tokenizer = Tokenizer.get_instance()
            self.formatter = ChatFormat(self.tokenizer)

+        self.recording_context = None
+
    async def initialize(self) -> None:
        logger.debug("InferenceRouter.initialize")
-        pass

    async def shutdown(self) -> None:
        logger.debug("InferenceRouter.shutdown")
-        pass

    async def register_model(
        self,
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@ -94,6 +94,7 @@ RESOURCES = [

 REGISTRY_REFRESH_INTERVAL_SECONDS = 300
 REGISTRY_REFRESH_TASK = None
+TEST_RECORDING_CONTEXT = None


 async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
@ -307,6 +308,15 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
 async def construct_stack(
    run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None
 ) -> dict[Api, Any]:
+    inference_mode = os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE", "live").lower()
+    if inference_mode in ["record", "replay"]:
+        global TEST_RECORDING_CONTEXT
+        from llama_stack.testing.inference_recorder import setup_inference_recording
+
+        TEST_RECORDING_CONTEXT = setup_inference_recording()
+        TEST_RECORDING_CONTEXT.__enter__()
+        logger.info(f"Inference recording enabled: mode={inference_mode}")
+
    dist_registry, _ = await create_dist_registry(run_config.metadata_store, run_config.image_name)
    policy = run_config.server.auth.access_policy if run_config.server.auth else []
    impls = await resolve_impls(
@ -352,6 +362,13 @@ async def shutdown_stack(impls: dict[Api, Any]):
        except (Exception, asyncio.CancelledError) as e:
            logger.exception(f"Failed to shutdown {impl_name}: {e}")

+    global TEST_RECORDING_CONTEXT
+    if TEST_RECORDING_CONTEXT:
+        try:
+            TEST_RECORDING_CONTEXT.__exit__(None, None, None)
+        except Exception as e:
+            logger.error(f"Error during inference recording cleanup: {e}")
+
    global REGISTRY_REFRESH_TASK
    if REGISTRY_REFRESH_TASK:
        REGISTRY_REFRESH_TASK.cancel()
--- a/llama_stack/testing/inference_recorder.py
+++ b/llama_stack/testing/inference_recorder.py
@ -13,13 +13,23 @@ import sqlite3
 from collections.abc import Generator
 from contextlib import contextmanager
 from pathlib import Path
-from typing import Any, cast
+from typing import Any, Literal, cast
+
+from llama_stack.log import get_logger
+
+logger = get_logger(__name__, category="testing")

 # Global state for the recording system
 _current_mode: str | None = None
 _current_storage: ResponseStorage | None = None
 _original_methods: dict[str, Any] = {}

+from openai.types.completion_choice import CompletionChoice
+
+# update the "finish_reason" field, since its type definition is wrong (no None is accepted)
+CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
+CompletionChoice.model_rebuild()
+

 def normalize_request(method: str, url: str, headers: dict[str, Any], body: dict[str, Any]) -> str:
    """Create a normalized hash of the request for consistent matching."""
@ -35,14 +45,14 @@ def normalize_request(method: str, url: str, headers: dict[str, Any], body: dict


 def get_inference_mode() -> str:
-    return os.environ.get("LLAMA_STACK_INFERENCE_MODE", "live").lower()
+    return os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE", "live").lower()


 def setup_inference_recording():
    mode = get_inference_mode()

    if mode not in ["live", "record", "replay"]:
-        raise ValueError(f"Invalid LLAMA_STACK_INFERENCE_MODE: {mode}. Must be 'live', 'record', or 'replay'")
+        raise ValueError(f"Invalid LLAMA_STACK_TEST_INFERENCE_MODE: {mode}. Must be 'live', 'record', or 'replay'")

    if mode == "live":
        # Return a no-op context manager for live mode
@ -52,23 +62,43 @@ def setup_inference_recording():

        return live_mode()

-    if "LLAMA_STACK_RECORDING_DIR" not in os.environ:
-        raise ValueError("LLAMA_STACK_RECORDING_DIR must be set for recording or replaying")
-    storage_dir = os.environ["LLAMA_STACK_RECORDING_DIR"]
+    if "LLAMA_STACK_TEST_RECORDING_DIR" not in os.environ:
+        raise ValueError("LLAMA_STACK_TEST_RECORDING_DIR must be set for recording or replaying")
+    storage_dir = os.environ["LLAMA_STACK_TEST_RECORDING_DIR"]

    return inference_recording(mode=mode, storage_dir=storage_dir)


 def _serialize_response(response: Any) -> Any:
    if hasattr(response, "model_dump"):
-        return response.model_dump()
+        data = response.model_dump(mode="json")
+        return {
+            "__type__": f"{response.__class__.__module__}.{response.__class__.__qualname__}",
+            "__data__": data,
+        }
    elif hasattr(response, "__dict__"):
        return dict(response.__dict__)
    else:
        return response


-def _deserialize_response(data: dict[str, Any]) -> dict[str, Any]:
+def _deserialize_response(data: dict[str, Any]) -> Any:
+    # Check if this is a serialized Pydantic model with type information
+    if isinstance(data, dict) and "__type__" in data and "__data__" in data:
+        try:
+            # Import the original class and reconstruct the object
+            module_path, class_name = data["__type__"].rsplit(".", 1)
+            module = __import__(module_path, fromlist=[class_name])
+            cls = getattr(module, class_name)
+
+            if not hasattr(cls, "model_validate"):
+                raise ValueError(f"Pydantic class {cls} does not support model_validate?")
+
+            return cls.model_validate(data["__data__"])
+        except (ImportError, AttributeError, TypeError, ValueError) as e:
+            logger.warning(f"Failed to deserialize object of type {data['__type__']}: {e}")
+            return data["__data__"]
+
    return data


@ -120,6 +150,7 @@ class ResponseStorage:
        with open(response_path, "w") as f:
            json.dump({"request": request, "response": serialized_response}, f, indent=2)
            f.write("\n")
+            f.flush()

        # Update SQLite index
        with sqlite3.connect(self.db_path) as conn:
@ -214,6 +245,8 @@ async def _patched_inference_method(original_method, self, client_type, method_n
            endpoint = "/api/chat"
        elif method_name == "embed":
            endpoint = "/api/embeddings"
+        elif method_name == "list":
+            endpoint = "/api/tags"
        else:
            endpoint = f"/api/{method_name}"
    else:
@ -295,8 +328,6 @@ def patch_inference_clients():
    """Install monkey patches for OpenAI client methods and Ollama AsyncClient methods."""
    global _original_methods

-    # Import here to avoid circular imports
-    # Also import Ollama AsyncClient
    from ollama import AsyncClient as OllamaAsyncClient
    from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
    from openai.resources.completions import AsyncCompletions
@ -310,6 +341,9 @@ def patch_inference_clients():
        "ollama_generate": OllamaAsyncClient.generate,
        "ollama_chat": OllamaAsyncClient.chat,
        "ollama_embed": OllamaAsyncClient.embed,
+        "ollama_ps": OllamaAsyncClient.ps,
+        "ollama_pull": OllamaAsyncClient.pull,
+        "ollama_list": OllamaAsyncClient.list,
    }

    # Create patched methods for OpenAI client
@ -339,10 +373,24 @@ def patch_inference_clients():
    async def patched_ollama_embed(self, **kwargs):
        return await _patched_inference_method(_original_methods["ollama_embed"], self, "ollama", "embed", **kwargs)

+    async def patched_ollama_ps(self, **kwargs):
+        logger.info("replay mode: ollama.ps() reporting success")
+        return []
+
+    async def patched_ollama_pull(self, *args, **kwargs):
+        logger.info("replay mode: ollama.pull() not actually pulling the model")
+        return None
+
+    async def patched_ollama_list(self, **kwargs):
+        return await _patched_inference_method(_original_methods["ollama_list"], self, "ollama", "list", **kwargs)
+
    # Apply Ollama patches
    OllamaAsyncClient.generate = patched_ollama_generate
    OllamaAsyncClient.chat = patched_ollama_chat
    OllamaAsyncClient.embed = patched_ollama_embed
+    OllamaAsyncClient.ps = patched_ollama_ps
+    OllamaAsyncClient.pull = patched_ollama_pull
+    OllamaAsyncClient.list = patched_ollama_list


 def unpatch_inference_clients():
@ -367,6 +415,8 @@ def unpatch_inference_clients():
    OllamaAsyncClient.generate = _original_methods["ollama_generate"]
    OllamaAsyncClient.chat = _original_methods["ollama_chat"]
    OllamaAsyncClient.embed = _original_methods["ollama_embed"]
+    OllamaAsyncClient.ps = _original_methods["ollama_ps"]
+    OllamaAsyncClient.list = _original_methods["ollama_list"]

    _original_methods.clear()

--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -185,95 +185,70 @@ def llama_stack_client(request, provider_data):
    if not config:
        raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG")

-    # Set up inference recording if enabled
-    inference_mode = os.environ.get("LLAMA_STACK_INFERENCE_MODE", "live").lower()
-    recording_context = None
+    # Handle server:<config_name> format or server:<config_name>:<port>
+    if config.startswith("server:"):
+        parts = config.split(":")
+        config_name = parts[1]
+        port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
+        base_url = f"http://localhost:{port}"

-    if inference_mode in ["record", "replay"]:
-        from llama_stack.testing.inference_recorder import setup_inference_recording
+        # Check if port is available
+        if is_port_available(port):
+            print(f"Starting llama stack server with config '{config_name}' on port {port}...")

-        recording_context = setup_inference_recording()
-        recording_context.__enter__()
-        print(f"Inference recording enabled: mode={inference_mode}")
+            # Start server
+            server_process = start_llama_stack_server(config_name)

-    try:
-        # Handle server:<config_name> format or server:<config_name>:<port>
-        if config.startswith("server:"):
-            parts = config.split(":")
-            config_name = parts[1]
-            port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
-            base_url = f"http://localhost:{port}"
-
-            # Check if port is available
-            if is_port_available(port):
-                print(f"Starting llama stack server with config '{config_name}' on port {port}...")
-
-                # Start server
-                server_process = start_llama_stack_server(config_name)
-
-                # Wait for server to be ready
-                if not wait_for_server_ready(base_url, timeout=120, process=server_process):
-                    print("Server failed to start within timeout")
-                    server_process.terminate()
-                    raise RuntimeError(
-                        f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid. "
-                        f"See server.log for details."
-                    )
-
-                print(f"Server is ready at {base_url}")
-
-                # Store process for potential cleanup (pytest will handle termination at session end)
-                request.session._llama_stack_server_process = server_process
-            else:
-                print(f"Port {port} is already in use, assuming server is already running...")
-
-            client = LlamaStackClient(
-                base_url=base_url,
-                provider_data=provider_data,
-                timeout=int(os.environ.get("LLAMA_STACK_CLIENT_TIMEOUT", "30")),
-            )
-        else:
-            # check if this looks like a URL using proper URL parsing
-            try:
-                parsed_url = urlparse(config)
-                if parsed_url.scheme and parsed_url.netloc:
-                    client = LlamaStackClient(
-                        base_url=config,
-                        provider_data=provider_data,
-                    )
-                else:
-                    raise ValueError("Not a URL")
-            except Exception as e:
-                # If URL parsing fails, treat as library config
-                if "=" in config:
-                    run_config = run_config_from_adhoc_config_spec(config)
-                    run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml")
-                    with open(run_config_file.name, "w") as f:
-                        yaml.dump(run_config.model_dump(), f)
-                    config = run_config_file.name
-
-                client = LlamaStackAsLibraryClient(
-                    config,
-                    provider_data=provider_data,
-                    skip_logger_removal=True,
+            # Wait for server to be ready
+            if not wait_for_server_ready(base_url, timeout=120, process=server_process):
+                print("Server failed to start within timeout")
+                server_process.terminate()
+                raise RuntimeError(
+                    f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid. "
+                    f"See server.log for details."
                )
-                if not client.initialize():
-                    raise RuntimeError("Initialization failed") from e

-        # Store recording context for cleanup
-        if recording_context:
-            request.session._inference_recording_context = recording_context
+            print(f"Server is ready at {base_url}")

-        return client
+            # Store process for potential cleanup (pytest will handle termination at session end)
+            request.session._llama_stack_server_process = server_process
+        else:
+            print(f"Port {port} is already in use, assuming server is already running...")

+        return LlamaStackClient(
+            base_url=base_url,
+            provider_data=provider_data,
+            timeout=int(os.environ.get("LLAMA_STACK_CLIENT_TIMEOUT", "30")),
+        )
+
+    # check if this looks like a URL using proper URL parsing
+    try:
+        parsed_url = urlparse(config)
+        if parsed_url.scheme and parsed_url.netloc:
+            return LlamaStackClient(
+                base_url=config,
+                provider_data=provider_data,
+            )
    except Exception:
-        # Clean up recording context on error
-        if recording_context:
-            try:
-                recording_context.__exit__(None, None, None)
-            except Exception as cleanup_error:
-                print(f"Warning: Error cleaning up recording context: {cleanup_error}")
-        raise
+        # If URL parsing fails, treat as non-URL config
+        pass
+
+    if "=" in config:
+        run_config = run_config_from_adhoc_config_spec(config)
+        run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml")
+        with open(run_config_file.name, "w") as f:
+            yaml.dump(run_config.model_dump(), f)
+        config = run_config_file.name
+
+    client = LlamaStackAsLibraryClient(
+        config,
+        provider_data=provider_data,
+        skip_logger_removal=True,
+    )
+    if not client.initialize():
+        raise RuntimeError("Initialization failed")
+
+    return client


@pytest.fixture(scope="session")
@ -289,20 +264,9 @@ def compat_client(request):

@pytest.fixture(scope="session", autouse=True)
 def cleanup_server_process(request):
-    """Cleanup server process and inference recording at the end of the test session."""
+    """Cleanup server process at the end of the test session."""
    yield  # Run tests

-    # Clean up inference recording context
-    if hasattr(request.session, "_inference_recording_context"):
-        recording_context = request.session._inference_recording_context
-        if recording_context:
-            try:
-                print("Cleaning up inference recording context...")
-                recording_context.__exit__(None, None, None)
-            except Exception as e:
-                print(f"Error during inference recording cleanup: {e}")
-
-    # Clean up server process
    if hasattr(request.session, "_llama_stack_server_process"):
        server_process = request.session._llama_stack_server_process
        if server_process:
--- a/tests/integration/inference/recordings/index.sqlite
+++ b/tests/integration/inference/recordings/index.sqlite
--- a/tests/integration/inference/recordings/responses/1b8394f90636.json
+++ b/tests/integration/inference/recordings/responses/1b8394f90636.json
@ -19,19 +19,22 @@
  },
  "response": {
    "body": {
-      "model": "llama3.2:3b-instruct-fp16",
-      "created_at": "2025-07-29T04:16:53.555099Z",
-      "done": true,
-      "done_reason": "stop",
-      "total_duration": 2124168875,
-      "load_duration": 58506875,
-      "prompt_eval_count": 18,
-      "prompt_eval_duration": 70072583,
-      "eval_count": 43,
-      "eval_duration": 1994446917,
-      "response": " _______.\n\nThe best answer is blue. The traditional nursery rhyme goes like this:\n\nRoses are red,\nViolets are blue,\nSugar is sweet,\nAnd so are you! (Or something similar.)",
-      "thinking": null,
-      "context": null
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T17:37:01.657977Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 1860746958,
+        "load_duration": 59632791,
+        "prompt_eval_count": 18,
+        "prompt_eval_duration": 66333291,
+        "eval_count": 43,
+        "eval_duration": 1734228875,
+        "response": " _______.\n\nThe best answer is blue. The traditional nursery rhyme goes like this:\n\nRoses are red,\nViolets are blue,\nSugar is sweet,\nAnd so are you! (Or something similar.)",
+        "thinking": null,
+        "context": null
+      }
    },
    "is_streaming": false
  }
--- a/tests/integration/inference/recordings/responses/211b1562d4e6.json
+++ b/tests/integration/inference/recordings/responses/211b1562d4e6.json
@ -17,19 +17,22 @@
  },
  "response": {
    "body": {
-      "model": "llama3.2:3b-instruct-fp16",
-      "created_at": "2025-07-29T04:16:57.535525Z",
-      "done": true,
-      "done_reason": "stop",
-      "total_duration": 358691334,
-      "load_duration": 76787334,
-      "prompt_eval_count": 23,
-      "prompt_eval_duration": 72235375,
-      "eval_count": 6,
-      "eval_duration": 208986666,
-      "response": "Humans live on Earth.",
-      "thinking": null,
-      "context": null
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T17:37:05.658757Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 327636667,
+        "load_duration": 55751292,
+        "prompt_eval_count": 23,
+        "prompt_eval_duration": 65865625,
+        "eval_count": 6,
+        "eval_duration": 205366667,
+        "response": "Humans live on Earth.",
+        "thinking": null,
+        "context": null
+      }
    },
    "is_streaming": false
  }
--- a/tests/integration/inference/recordings/responses/3c3f13cb7794.json
+++ b/tests/integration/inference/recordings/responses/3c3f13cb7794.json
@ -18,169 +18,202 @@
  "response": {
    "body": [
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:57.691771Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "The",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:05.833193Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "The",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:57.732262Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " Latin",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:05.874274Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " Latin",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:57.77294Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " word",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:05.915195Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " word",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:57.814484Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " for",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:05.955964Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " for",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:57.854875Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " \"",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:05.996679Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " \"",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:57.895957Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "Sun",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:06.037268Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "Sun",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:57.937445Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "\"",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:06.078124Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "\"",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:57.978832Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " is",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:06.118771Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " is",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:58.019242Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " Sol",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:06.159758Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " Sol",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:58.059902Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": ".",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:06.200659Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": ".",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:58.100535Z",
-        "done": true,
-        "done_reason": "stop",
-        "total_duration": 528254250,
-        "load_duration": 50177125,
-        "prompt_eval_count": 26,
-        "prompt_eval_duration": 68018458,
-        "eval_count": 11,
-        "eval_duration": 409555959,
-        "response": "",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:06.241464Z",
+          "done": true,
+          "done_reason": "stop",
+          "total_duration": 542773583,
+          "load_duration": 68681541,
+          "prompt_eval_count": 26,
+          "prompt_eval_duration": 64427833,
+          "eval_count": 11,
+          "eval_duration": 409175667,
+          "response": "",
+          "thinking": null,
+          "context": null
+        }
      }
    ],
    "is_streaming": true
--- a/tests/integration/inference/recordings/responses/40f524d1934a.json
+++ b/tests/integration/inference/recordings/responses/40f524d1934a.json
@ -18,169 +18,202 @@
  "response": {
    "body": [
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.480955Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "[",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.590293Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "[",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.527418Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "get",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.638879Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "get",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.571522Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "_weather",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.69047Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "_weather",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.615027Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "(location",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.738247Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "(location",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.660598Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "=\"",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.782316Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "=\"",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.705052Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "San",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.827405Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "San",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.754386Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " Francisco",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.872241Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " Francisco",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.796942Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": ",",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.91705Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": ",",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.845807Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " CA",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.962046Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " CA",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.891254Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "\")]",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.00691Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "\")]",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.934197Z",
-        "done": true,
-        "done_reason": "stop",
-        "total_duration": 574307083,
-        "load_duration": 72062083,
-        "prompt_eval_count": 324,
-        "prompt_eval_duration": 47115625,
-        "eval_count": 11,
-        "eval_duration": 454426708,
-        "response": "",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.052731Z",
+          "done": true,
+          "done_reason": "stop",
+          "total_duration": 649504167,
+          "load_duration": 138022250,
+          "prompt_eval_count": 324,
+          "prompt_eval_duration": 45176916,
+          "eval_count": 11,
+          "eval_duration": 464930417,
+          "response": "",
+          "thinking": null,
+          "context": null
+        }
      }
    ],
    "is_streaming": true
--- a/tests/integration/inference/recordings/responses/48d2fb183a2a.json
+++ b/tests/integration/inference/recordings/responses/48d2fb183a2a.json
@ -64,19 +64,22 @@
  },
  "response": {
    "body": {
-      "model": "llama3.2:3b-instruct-fp16",
-      "created_at": "2025-07-29T04:17:13.438182Z",
-      "done": true,
-      "done_reason": "stop",
-      "total_duration": 2975265833,
-      "load_duration": 95592083,
-      "prompt_eval_count": 259,
-      "prompt_eval_duration": 367103709,
-      "eval_count": 60,
-      "eval_duration": 2511576708,
-      "response": "{\n  \"first_name\": \"Michael\",\n  \"last_name\": \"Jordan\",\n  \"year_of_birth\": 1963,\n  \"nba_stats\": {\n    \"year_for_draft\": 1984,\n    \"num_seasons_in_nba\": 15\n  }\n}",
-      "thinking": null,
-      "context": null
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T17:37:21.486763Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 2935272125,
+        "load_duration": 51690583,
+        "prompt_eval_count": 259,
+        "prompt_eval_duration": 369929333,
+        "eval_count": 60,
+        "eval_duration": 2512928125,
+        "response": "{\n  \"first_name\": \"Michael\",\n  \"last_name\": \"Jordan\",\n  \"year_of_birth\": 1963,\n  \"nba_stats\": {\n    \"year_for_draft\": 1984,\n    \"num_seasons_in_nba\": 15\n  }\n}",
+        "thinking": null,
+        "context": null
+      }
    },
    "is_streaming": false
  }
--- a/tests/integration/inference/recordings/responses/4a3a4447b16b.json
+++ b/tests/integration/inference/recordings/responses/4a3a4447b16b.json
@ -0,0 +1,132 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/tags",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/tags",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ListResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "nomic-embed-text:latest",
+            "modified_at": "2025-07-29T10:36:48.647829-07:00",
+            "digest": "0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f",
+            "size": 274302450,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "nomic-bert",
+              "families": [
+                "nomic-bert"
+              ],
+              "parameter_size": "137M",
+              "quantization_level": "F16"
+            }
+          },
+          {
+            "model": "llama-guard3:1b",
+            "modified_at": "2025-07-25T14:39:44.978630-07:00",
+            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
+            "size": 1600181919,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.5B",
+              "quantization_level": "Q8_0"
+            }
+          },
+          {
+            "model": "all-minilm:l6-v2",
+            "modified_at": "2025-07-24T15:15:11.129290-07:00",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "size": 45960996,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            }
+          },
+          {
+            "model": "llama3.2:1b",
+            "modified_at": "2025-07-17T22:02:24.953208-07:00",
+            "digest": "baf6a787fdffd633537aa2eb51cfd54cb93ff08e28040095462bb63daf552878",
+            "size": 1321098329,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.2B",
+              "quantization_level": "Q8_0"
+            }
+          },
+          {
+            "model": "all-minilm:latest",
+            "modified_at": "2025-06-03T16:50:10.946583-07:00",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "size": 45960996,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            }
+          },
+          {
+            "model": "llama3.2:3b",
+            "modified_at": "2025-05-01T11:15:23.797447-07:00",
+            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
+            "size": 2019393189,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "Q4_K_M"
+            }
+          },
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "modified_at": "2025-04-30T15:33:48.939665-07:00",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "size": 6433703586,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            }
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/responses/6cc063bbd7d3.json
+++ b/tests/integration/inference/recordings/responses/6cc063bbd7d3.json
@ -18,304 +18,364 @@
  "response": {
    "body": [
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.227427Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "The",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:29.898943Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "The",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.275725Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " capital",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:29.940406Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " capital",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.316195Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " of",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:29.983928Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " of",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.356832Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " the",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.025678Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " the",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.397682Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " United",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.066554Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " United",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.438761Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " States",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.111853Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " States",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.480453Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " is",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.156263Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " is",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.523691Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " Washington",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.197342Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " Washington",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.565106Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": ",",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.238939Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": ",",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.606315Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " D",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.28041Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " D",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.647209Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": ".C",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.321152Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": ".C",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.687828Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": ".",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.362571Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": ".",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.728386Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " (",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.404107Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " (",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.769091Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "short",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.444632Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "short",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.809726Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " for",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.486331Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " for",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.850489Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " District",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.527309Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " District",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.89147Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " of",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.568556Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " of",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.932311Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " Columbia",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.610745Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " Columbia",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.973566Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": ").",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.654172Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": ").",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:22.014466Z",
-        "done": true,
-        "done_reason": "stop",
-        "total_duration": 1034011167,
-        "load_duration": 176591709,
-        "prompt_eval_count": 26,
-        "prompt_eval_duration": 68104583,
-        "eval_count": 20,
-        "eval_duration": 788670334,
-        "response": "",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.695146Z",
+          "done": true,
+          "done_reason": "stop",
+          "total_duration": 1078314875,
+          "load_duration": 203057166,
+          "prompt_eval_count": 26,
+          "prompt_eval_duration": 77142708,
+          "eval_count": 20,
+          "eval_duration": 797458917,
+          "response": "",
+          "thinking": null,
+          "context": null
+        }
      }
    ],
    "is_streaming": true
--- a/tests/integration/inference/recordings/responses/70adef2c30c4.json
+++ b/tests/integration/inference/recordings/responses/70adef2c30c4.json
@ -17,19 +17,22 @@
  },
  "response": {
    "body": {
-      "model": "llama3.2:3b-instruct-fp16",
-      "created_at": "2025-07-29T04:17:20.924128Z",
-      "done": true,
-      "done_reason": "stop",
-      "total_duration": 3308469666,
-      "load_duration": 66702250,
-      "prompt_eval_count": 30,
-      "prompt_eval_duration": 391410334,
-      "eval_count": 70,
-      "eval_duration": 2849497291,
-      "response": "The answer is Saturn! Saturn's ring system is one of the most iconic and well-known in our solar system. The rings are made up of ice particles, rock debris, and dust that orbit around the planet due to its gravitational pull.\n\nWould you like to know more about Saturn's rings or is there something else I can help you with?",
-      "thinking": null,
-      "context": null
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T17:37:29.59115Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 3560608959,
+        "load_duration": 57756625,
+        "prompt_eval_count": 30,
+        "prompt_eval_duration": 370892334,
+        "eval_count": 70,
+        "eval_duration": 3131360625,
+        "response": "The answer is Saturn! Saturn's ring system is one of the most iconic and well-known in our solar system. The rings are made up of ice particles, rock debris, and dust that orbit around the planet due to its gravitational pull.\n\nWould you like to know more about Saturn's rings or is there something else I can help you with?",
+        "thinking": null,
+        "context": null
+      }
    },
    "is_streaming": false
  }
--- a/tests/integration/inference/recordings/responses/75d0dd9d0fa3.json
+++ b/tests/integration/inference/recordings/responses/75d0dd9d0fa3.json
@ -42,19 +42,22 @@
  },
  "response": {
    "body": {
-      "model": "llama3.2:3b-instruct-fp16",
-      "created_at": "2025-07-29T04:16:57.15491Z",
-      "done": true,
-      "done_reason": "stop",
-      "total_duration": 1570055875,
-      "load_duration": 87677125,
-      "prompt_eval_count": 119,
-      "prompt_eval_duration": 190281458,
-      "eval_count": 29,
-      "eval_duration": 1291217083,
-      "response": "{ \"name\": \"Michael Jordan\", \"year_born\": \"1963\", \"year_retired\": \"2003\"}\n    ",
-      "thinking": null,
-      "context": null
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T17:37:05.293829Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 1587647875,
+        "load_duration": 58102458,
+        "prompt_eval_count": 119,
+        "prompt_eval_duration": 199832792,
+        "eval_count": 29,
+        "eval_duration": 1328951417,
+        "response": "{ \"name\": \"Michael Jordan\", \"year_born\": \"1963\", \"year_retired\": \"2003\"}\n    ",
+        "thinking": null,
+        "context": null
+      }
    },
    "is_streaming": false
  }
--- a/tests/integration/inference/recordings/responses/84cab42e1f5c.json
+++ b/tests/integration/inference/recordings/responses/84cab42e1f5c.json
--- a/tests/integration/inference/recordings/responses/9b812cbcb88d.json
+++ b/tests/integration/inference/recordings/responses/9b812cbcb88d.json
@ -17,19 +17,22 @@
  },
  "response": {
    "body": {
-      "model": "llama3.2:3b-instruct-fp16",
-      "created_at": "2025-07-29T04:16:59.104609Z",
-      "done": true,
-      "done_reason": "stop",
-      "total_duration": 948932208,
-      "load_duration": 68549542,
-      "prompt_eval_count": 324,
-      "prompt_eval_duration": 460136875,
-      "eval_count": 11,
-      "eval_duration": 419553208,
-      "response": "[get_weather(location=\"San Francisco, CA\")]",
-      "thinking": null,
-      "context": null
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T17:37:07.316473Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 1001978333,
+        "load_duration": 125002875,
+        "prompt_eval_count": 324,
+        "prompt_eval_duration": 451915875,
+        "eval_count": 11,
+        "eval_duration": 424435375,
+        "response": "[get_weather(location=\"San Francisco, CA\")]",
+        "thinking": null,
+        "context": null
+      }
    },
    "is_streaming": false
  }
--- a/tests/integration/inference/recordings/responses/9e7a83d3d596.json
+++ b/tests/integration/inference/recordings/responses/9e7a83d3d596.json
@ -13,25 +13,28 @@
  },
  "response": {
    "body": {
-      "id": "cmpl-68",
-      "choices": [
-        {
-          "finish_reason": "stop",
-          "index": 0,
-          "logprobs": null,
-          "text": "Blue.\n\nThe completed quote is a well-known poetic phrase often used as a tongue-in-cheek romantic gesture. However, it's worth noting that true violets are actually purple in color, not blue. This phrase is a playful variation of the traditional \"Roses are red, violets are blue,\" which typically goes like this:\n\n\"Roses are red, violets are blue,\nSugar is sweet, and so are you.\"\n\nThis original quote has been used for centuries to make a lighthearted, whimsical compliment in poetry, songs, and spoken words."
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "cmpl-256",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "text": "Blue.\n\nMy response is a play on words of the classic nursery rhyme \"Roses are red, violets are blue.\" In traditional rhymes, violets are typically depicted as being purple or blue in color. I've taken this convention and completed the sentence with the word \"blue\" to create a punny adaptation of the original phrase."
+          }
+        ],
+        "created": 1753810618,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 72,
+          "prompt_tokens": 50,
+          "total_tokens": 122,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
        }
-      ],
-      "created": 1753762608,
-      "model": "llama3.2:3b-instruct-fp16",
-      "object": "text_completion",
-      "system_fingerprint": "fp_ollama",
-      "usage": {
-        "completion_tokens": 120,
-        "prompt_tokens": 50,
-        "total_tokens": 170,
-        "completion_tokens_details": null,
-        "prompt_tokens_details": null
      }
    },
    "is_streaming": false
--- a/tests/integration/inference/recordings/responses/a6810c23eda8.json
+++ b/tests/integration/inference/recordings/responses/a6810c23eda8.json
--- a/tests/integration/inference/recordings/responses/ae6835cfe70e.json
+++ b/tests/integration/inference/recordings/responses/ae6835cfe70e.json
@ -17,19 +17,22 @@
  },
  "response": {
    "body": {
-      "model": "llama3.2:3b-instruct-fp16",
-      "created_at": "2025-07-29T04:17:22.73932Z",
-      "done": true,
-      "done_reason": "stop",
-      "total_duration": 660872000,
-      "load_duration": 76282083,
-      "prompt_eval_count": 386,
-      "prompt_eval_duration": 541896167,
-      "eval_count": 2,
-      "eval_duration": 42127791,
-      "response": "[]",
-      "thinking": null,
-      "context": null
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T17:37:31.563015Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 697289250,
+        "load_duration": 84472834,
+        "prompt_eval_count": 386,
+        "prompt_eval_duration": 564623708,
+        "eval_count": 2,
+        "eval_duration": 47500459,
+        "response": "[]",
+        "thinking": null,
+        "context": null
+      }
    },
    "is_streaming": false
  }
--- a/tests/integration/inference/recordings/responses/b91f1fb4aedb.json
+++ b/tests/integration/inference/recordings/responses/b91f1fb4aedb.json
@ -18,169 +18,202 @@
  "response": {
    "body": [
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.217546Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "[",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.301403Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "[",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.267879Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "get",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.344225Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "get",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.315525Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "_weather",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.38649Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "_weather",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.362669Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "(location",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.42879Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "(location",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.406139Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "=\"",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.470562Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "=\"",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.450302Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "San",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.512144Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "San",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.496893Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " Francisco",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.553706Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " Francisco",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.540977Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": ",",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.59536Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": ",",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.586272Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " CA",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.636886Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " CA",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.631743Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "\")]",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.678935Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "\")]",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.676251Z",
-        "done": true,
-        "done_reason": "stop",
-        "total_duration": 682827167,
-        "load_duration": 111852875,
-        "prompt_eval_count": 339,
-        "prompt_eval_duration": 109521833,
-        "eval_count": 11,
-        "eval_duration": 460495042,
-        "response": "",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.719919Z",
+          "done": true,
+          "done_reason": "stop",
+          "total_duration": 609334000,
+          "load_duration": 85744542,
+          "prompt_eval_count": 339,
+          "prompt_eval_duration": 102984708,
+          "eval_count": 11,
+          "eval_duration": 420012834,
+          "response": "",
+          "thinking": null,
+          "context": null
+        }
      }
    ],
    "is_streaming": true
--- a/tests/integration/inference/recordings/responses/bbd0637dce16.json
+++ b/tests/integration/inference/recordings/responses/bbd0637dce16.json
--- a/tests/integration/inference/recordings/responses/dd9e7d5913e9.json
+++ b/tests/integration/inference/recordings/responses/dd9e7d5913e9.json
@ -18,34 +18,40 @@
  "response": {
    "body": [
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:14.122273Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "[]",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:22.214492Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "[]",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:14.165968Z",
-        "done": true,
-        "done_reason": "stop",
-        "total_duration": 663520959,
-        "load_duration": 67474917,
-        "prompt_eval_count": 386,
-        "prompt_eval_duration": 545132042,
-        "eval_count": 2,
-        "eval_duration": 50234083,
-        "response": "",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:22.25685Z",
+          "done": true,
+          "done_reason": "stop",
+          "total_duration": 690090375,
+          "load_duration": 101933500,
+          "prompt_eval_count": 386,
+          "prompt_eval_duration": 544455708,
+          "eval_count": 2,
+          "eval_duration": 42919375,
+          "response": "",
+          "thinking": null,
+          "context": null
+        }
      }
    ],
    "is_streaming": true
--- a/tests/integration/test_inference_recordings.py
+++ b/tests/integration/test_inference_recordings.py
@ -7,11 +7,20 @@
 import sqlite3
 import tempfile
 from pathlib import Path
-from unittest.mock import Mock, patch
+from unittest.mock import patch

 import pytest
 from openai import AsyncOpenAI

+# Import the real Pydantic response types instead of using Mocks
+from llama_stack.apis.inference import (
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChoice,
+    OpenAIEmbeddingData,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+)
 from llama_stack.testing.inference_recorder import (
    ResponseStorage,
    inference_recording,
@ -27,44 +36,36 @@ def temp_storage_dir():


@pytest.fixture
-def mock_openai_response():
-    """Mock OpenAI response object."""
-    mock_response = Mock()
-    mock_response.choices = [Mock()]
-    mock_response.choices[0].message.content = "Hello! I'm doing well, thank you for asking."
-    mock_response.model_dump.return_value = {
-        "id": "chatcmpl-test123",
-        "object": "chat.completion",
-        "choices": [
-            {
-                "index": 0,
-                "message": {"role": "assistant", "content": "Hello! I'm doing well, thank you for asking."},
-                "finish_reason": "stop",
-            }
+def real_openai_chat_response():
+    """Real OpenAI chat completion response using proper Pydantic objects."""
+    return OpenAIChatCompletion(
+        id="chatcmpl-test123",
+        choices=[
+            OpenAIChoice(
+                index=0,
+                message=OpenAIAssistantMessageParam(
+                    role="assistant", content="Hello! I'm doing well, thank you for asking."
+                ),
+                finish_reason="stop",
+            )
        ],
-        "model": "llama3.2:3b",
-        "usage": {"prompt_tokens": 10, "completion_tokens": 15, "total_tokens": 25},
-    }
-
-    return mock_response
+        created=1234567890,
+        model="llama3.2:3b",
+    )


@pytest.fixture
-def mock_embeddings_response():
-    """Mock OpenAI embeddings response object."""
-    mock_response = Mock()
-    mock_response.data = [Mock(embedding=[0.1, 0.2, 0.3]), Mock(embedding=[0.4, 0.5, 0.6])]
-    mock_response.model_dump.return_value = {
-        "object": "list",
-        "data": [
-            {"object": "embedding", "embedding": [0.1, 0.2, 0.3], "index": 0},
-            {"object": "embedding", "embedding": [0.4, 0.5, 0.6], "index": 1},
+def real_embeddings_response():
+    """Real OpenAI embeddings response using proper Pydantic objects."""
+    return OpenAIEmbeddingsResponse(
+        object="list",
+        data=[
+            OpenAIEmbeddingData(object="embedding", embedding=[0.1, 0.2, 0.3], index=0),
+            OpenAIEmbeddingData(object="embedding", embedding=[0.4, 0.5, 0.6], index=1),
        ],
-        "model": "nomic-embed-text",
-        "usage": {"prompt_tokens": 6, "total_tokens": 6},
-    }
-
-    return mock_response
+        model="nomic-embed-text",
+        usage=OpenAIEmbeddingUsage(prompt_tokens=6, total_tokens=6),
+    )


 class TestInferenceRecording:
@ -160,11 +161,11 @@ class TestInferenceRecording:
        assert retrieved["request"]["model"] == "llama3.2:3b"
        assert retrieved["response"]["body"]["content"] == "test response"

-    async def test_recording_mode(self, temp_storage_dir, mock_openai_response):
+    async def test_recording_mode(self, temp_storage_dir, real_openai_chat_response):
        """Test that recording mode captures and stores responses."""

        async def mock_create(*args, **kwargs):
-            return mock_openai_response
+            return real_openai_chat_response

        temp_storage_dir = temp_storage_dir / "test_recording_mode"
        with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
@ -188,11 +189,11 @@ class TestInferenceRecording:

        assert recordings == 1

-    async def test_replay_mode(self, temp_storage_dir, mock_openai_response):
+    async def test_replay_mode(self, temp_storage_dir, real_openai_chat_response):
        """Test that replay mode returns stored responses without making real calls."""

        async def mock_create(*args, **kwargs):
-            return mock_openai_response
+            return real_openai_chat_response

        temp_storage_dir = temp_storage_dir / "test_replay_mode"
        # First, record a response
@ -200,7 +201,7 @@ class TestInferenceRecording:
            with inference_recording(mode="record", storage_dir=str(temp_storage_dir)):
                client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")

-                await client.chat.completions.create(
+                response = await client.chat.completions.create(
                    model="llama3.2:3b",
                    messages=[{"role": "user", "content": "Hello, how are you?"}],
                    temperature=0.7,
@ -220,7 +221,7 @@ class TestInferenceRecording:
                )

                # Verify we got the recorded response
-                assert response["choices"][0]["message"]["content"] == "Hello! I'm doing well, thank you for asking."
+                assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."

                # Verify the original method was NOT called
                mock_create_patch.assert_not_called()
@ -237,11 +238,11 @@ class TestInferenceRecording:
                        model="llama3.2:3b", messages=[{"role": "user", "content": "This was never recorded"}]
                    )

-    async def test_embeddings_recording(self, temp_storage_dir, mock_embeddings_response):
+    async def test_embeddings_recording(self, temp_storage_dir, real_embeddings_response):
        """Test recording and replay of embeddings calls."""

        async def mock_create(*args, **kwargs):
-            return mock_embeddings_response
+            return real_embeddings_response

        temp_storage_dir = temp_storage_dir / "test_embeddings_recording"
        # Record
@ -265,17 +266,17 @@ class TestInferenceRecording:
                )

                # Verify we got the recorded response
-                assert len(response["data"]) == 2
-                assert response["data"][0]["embedding"] == [0.1, 0.2, 0.3]
+                assert len(response.data) == 2
+                assert response.data[0].embedding == [0.1, 0.2, 0.3]

                # Verify original method was not called
                mock_create_patch.assert_not_called()

-    async def test_live_mode(self, mock_openai_response):
+    async def test_live_mode(self, real_openai_chat_response):
        """Test that live mode passes through to original methods."""

        async def mock_create(*args, **kwargs):
-            return mock_openai_response
+            return real_openai_chat_response

        with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
            with inference_recording(mode="live"):