improvements to include other ollama methods like ps, list and pull

2025-12-23 01:12:26 +00:00 · 2025-07-29 10:56:57 -07:00 · 2025-07-29 10:56:57 -07:00 · b578f9aec1
commit b578f9aec1
parent 1a21c4b695
23 changed files with 6748 additions and 5711 deletions
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -185,95 +185,70 @@ def llama_stack_client(request, provider_data):
    if not config:
        raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG")

-    # Set up inference recording if enabled
-    inference_mode = os.environ.get("LLAMA_STACK_INFERENCE_MODE", "live").lower()
-    recording_context = None
+    # Handle server:<config_name> format or server:<config_name>:<port>
+    if config.startswith("server:"):
+        parts = config.split(":")
+        config_name = parts[1]
+        port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
+        base_url = f"http://localhost:{port}"

-    if inference_mode in ["record", "replay"]:
-        from llama_stack.testing.inference_recorder import setup_inference_recording
+        # Check if port is available
+        if is_port_available(port):
+            print(f"Starting llama stack server with config '{config_name}' on port {port}...")

-        recording_context = setup_inference_recording()
-        recording_context.__enter__()
-        print(f"Inference recording enabled: mode={inference_mode}")
+            # Start server
+            server_process = start_llama_stack_server(config_name)

-    try:
-        # Handle server:<config_name> format or server:<config_name>:<port>
-        if config.startswith("server:"):
-            parts = config.split(":")
-            config_name = parts[1]
-            port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
-            base_url = f"http://localhost:{port}"
-
-            # Check if port is available
-            if is_port_available(port):
-                print(f"Starting llama stack server with config '{config_name}' on port {port}...")
-
-                # Start server
-                server_process = start_llama_stack_server(config_name)
-
-                # Wait for server to be ready
-                if not wait_for_server_ready(base_url, timeout=120, process=server_process):
-                    print("Server failed to start within timeout")
-                    server_process.terminate()
-                    raise RuntimeError(
-                        f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid. "
-                        f"See server.log for details."
-                    )
-
-                print(f"Server is ready at {base_url}")
-
-                # Store process for potential cleanup (pytest will handle termination at session end)
-                request.session._llama_stack_server_process = server_process
-            else:
-                print(f"Port {port} is already in use, assuming server is already running...")
-
-            client = LlamaStackClient(
-                base_url=base_url,
-                provider_data=provider_data,
-                timeout=int(os.environ.get("LLAMA_STACK_CLIENT_TIMEOUT", "30")),
-            )
-        else:
-            # check if this looks like a URL using proper URL parsing
-            try:
-                parsed_url = urlparse(config)
-                if parsed_url.scheme and parsed_url.netloc:
-                    client = LlamaStackClient(
-                        base_url=config,
-                        provider_data=provider_data,
-                    )
-                else:
-                    raise ValueError("Not a URL")
-            except Exception as e:
-                # If URL parsing fails, treat as library config
-                if "=" in config:
-                    run_config = run_config_from_adhoc_config_spec(config)
-                    run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml")
-                    with open(run_config_file.name, "w") as f:
-                        yaml.dump(run_config.model_dump(), f)
-                    config = run_config_file.name
-
-                client = LlamaStackAsLibraryClient(
-                    config,
-                    provider_data=provider_data,
-                    skip_logger_removal=True,
+            # Wait for server to be ready
+            if not wait_for_server_ready(base_url, timeout=120, process=server_process):
+                print("Server failed to start within timeout")
+                server_process.terminate()
+                raise RuntimeError(
+                    f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid. "
+                    f"See server.log for details."
                )
-                if not client.initialize():
-                    raise RuntimeError("Initialization failed") from e

-        # Store recording context for cleanup
-        if recording_context:
-            request.session._inference_recording_context = recording_context
+            print(f"Server is ready at {base_url}")

-        return client
+            # Store process for potential cleanup (pytest will handle termination at session end)
+            request.session._llama_stack_server_process = server_process
+        else:
+            print(f"Port {port} is already in use, assuming server is already running...")

+        return LlamaStackClient(
+            base_url=base_url,
+            provider_data=provider_data,
+            timeout=int(os.environ.get("LLAMA_STACK_CLIENT_TIMEOUT", "30")),
+        )
+
+    # check if this looks like a URL using proper URL parsing
+    try:
+        parsed_url = urlparse(config)
+        if parsed_url.scheme and parsed_url.netloc:
+            return LlamaStackClient(
+                base_url=config,
+                provider_data=provider_data,
+            )
    except Exception:
-        # Clean up recording context on error
-        if recording_context:
-            try:
-                recording_context.__exit__(None, None, None)
-            except Exception as cleanup_error:
-                print(f"Warning: Error cleaning up recording context: {cleanup_error}")
-        raise
+        # If URL parsing fails, treat as non-URL config
+        pass
+
+    if "=" in config:
+        run_config = run_config_from_adhoc_config_spec(config)
+        run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml")
+        with open(run_config_file.name, "w") as f:
+            yaml.dump(run_config.model_dump(), f)
+        config = run_config_file.name
+
+    client = LlamaStackAsLibraryClient(
+        config,
+        provider_data=provider_data,
+        skip_logger_removal=True,
+    )
+    if not client.initialize():
+        raise RuntimeError("Initialization failed")
+
+    return client


@pytest.fixture(scope="session")
@ -289,20 +264,9 @@ def compat_client(request):

@pytest.fixture(scope="session", autouse=True)
 def cleanup_server_process(request):
-    """Cleanup server process and inference recording at the end of the test session."""
+    """Cleanup server process at the end of the test session."""
    yield  # Run tests

-    # Clean up inference recording context
-    if hasattr(request.session, "_inference_recording_context"):
-        recording_context = request.session._inference_recording_context
-        if recording_context:
-            try:
-                print("Cleaning up inference recording context...")
-                recording_context.__exit__(None, None, None)
-            except Exception as e:
-                print(f"Error during inference recording cleanup: {e}")
-
-    # Clean up server process
    if hasattr(request.session, "_llama_stack_server_process"):
        server_process = request.session._llama_stack_server_process
        if server_process:
--- a/tests/integration/inference/recordings/index.sqlite
+++ b/tests/integration/inference/recordings/index.sqlite
--- a/tests/integration/inference/recordings/responses/1b8394f90636.json
+++ b/tests/integration/inference/recordings/responses/1b8394f90636.json
@ -19,19 +19,22 @@
  },
  "response": {
    "body": {
-      "model": "llama3.2:3b-instruct-fp16",
-      "created_at": "2025-07-29T04:16:53.555099Z",
-      "done": true,
-      "done_reason": "stop",
-      "total_duration": 2124168875,
-      "load_duration": 58506875,
-      "prompt_eval_count": 18,
-      "prompt_eval_duration": 70072583,
-      "eval_count": 43,
-      "eval_duration": 1994446917,
-      "response": " _______.\n\nThe best answer is blue. The traditional nursery rhyme goes like this:\n\nRoses are red,\nViolets are blue,\nSugar is sweet,\nAnd so are you! (Or something similar.)",
-      "thinking": null,
-      "context": null
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T17:37:01.657977Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 1860746958,
+        "load_duration": 59632791,
+        "prompt_eval_count": 18,
+        "prompt_eval_duration": 66333291,
+        "eval_count": 43,
+        "eval_duration": 1734228875,
+        "response": " _______.\n\nThe best answer is blue. The traditional nursery rhyme goes like this:\n\nRoses are red,\nViolets are blue,\nSugar is sweet,\nAnd so are you! (Or something similar.)",
+        "thinking": null,
+        "context": null
+      }
    },
    "is_streaming": false
  }
--- a/tests/integration/inference/recordings/responses/211b1562d4e6.json
+++ b/tests/integration/inference/recordings/responses/211b1562d4e6.json
@ -17,19 +17,22 @@
  },
  "response": {
    "body": {
-      "model": "llama3.2:3b-instruct-fp16",
-      "created_at": "2025-07-29T04:16:57.535525Z",
-      "done": true,
-      "done_reason": "stop",
-      "total_duration": 358691334,
-      "load_duration": 76787334,
-      "prompt_eval_count": 23,
-      "prompt_eval_duration": 72235375,
-      "eval_count": 6,
-      "eval_duration": 208986666,
-      "response": "Humans live on Earth.",
-      "thinking": null,
-      "context": null
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T17:37:05.658757Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 327636667,
+        "load_duration": 55751292,
+        "prompt_eval_count": 23,
+        "prompt_eval_duration": 65865625,
+        "eval_count": 6,
+        "eval_duration": 205366667,
+        "response": "Humans live on Earth.",
+        "thinking": null,
+        "context": null
+      }
    },
    "is_streaming": false
  }
--- a/tests/integration/inference/recordings/responses/3c3f13cb7794.json
+++ b/tests/integration/inference/recordings/responses/3c3f13cb7794.json
@ -18,169 +18,202 @@
  "response": {
    "body": [
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:57.691771Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "The",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:05.833193Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "The",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:57.732262Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " Latin",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:05.874274Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " Latin",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:57.77294Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " word",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:05.915195Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " word",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:57.814484Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " for",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:05.955964Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " for",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:57.854875Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " \"",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:05.996679Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " \"",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:57.895957Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "Sun",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:06.037268Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "Sun",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:57.937445Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "\"",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:06.078124Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "\"",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:57.978832Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " is",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:06.118771Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " is",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:58.019242Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " Sol",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:06.159758Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " Sol",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:58.059902Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": ".",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:06.200659Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": ".",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:58.100535Z",
-        "done": true,
-        "done_reason": "stop",
-        "total_duration": 528254250,
-        "load_duration": 50177125,
-        "prompt_eval_count": 26,
-        "prompt_eval_duration": 68018458,
-        "eval_count": 11,
-        "eval_duration": 409555959,
-        "response": "",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:06.241464Z",
+          "done": true,
+          "done_reason": "stop",
+          "total_duration": 542773583,
+          "load_duration": 68681541,
+          "prompt_eval_count": 26,
+          "prompt_eval_duration": 64427833,
+          "eval_count": 11,
+          "eval_duration": 409175667,
+          "response": "",
+          "thinking": null,
+          "context": null
+        }
      }
    ],
    "is_streaming": true
--- a/tests/integration/inference/recordings/responses/40f524d1934a.json
+++ b/tests/integration/inference/recordings/responses/40f524d1934a.json
@ -18,169 +18,202 @@
  "response": {
    "body": [
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.480955Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "[",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.590293Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "[",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.527418Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "get",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.638879Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "get",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.571522Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "_weather",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.69047Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "_weather",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.615027Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "(location",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.738247Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "(location",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.660598Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "=\"",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.782316Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "=\"",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.705052Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "San",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.827405Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "San",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.754386Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " Francisco",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.872241Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " Francisco",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.796942Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": ",",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.91705Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": ",",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.845807Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " CA",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:07.962046Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " CA",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.891254Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "\")]",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.00691Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "\")]",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:16:59.934197Z",
-        "done": true,
-        "done_reason": "stop",
-        "total_duration": 574307083,
-        "load_duration": 72062083,
-        "prompt_eval_count": 324,
-        "prompt_eval_duration": 47115625,
-        "eval_count": 11,
-        "eval_duration": 454426708,
-        "response": "",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.052731Z",
+          "done": true,
+          "done_reason": "stop",
+          "total_duration": 649504167,
+          "load_duration": 138022250,
+          "prompt_eval_count": 324,
+          "prompt_eval_duration": 45176916,
+          "eval_count": 11,
+          "eval_duration": 464930417,
+          "response": "",
+          "thinking": null,
+          "context": null
+        }
      }
    ],
    "is_streaming": true
--- a/tests/integration/inference/recordings/responses/48d2fb183a2a.json
+++ b/tests/integration/inference/recordings/responses/48d2fb183a2a.json
@ -64,19 +64,22 @@
  },
  "response": {
    "body": {
-      "model": "llama3.2:3b-instruct-fp16",
-      "created_at": "2025-07-29T04:17:13.438182Z",
-      "done": true,
-      "done_reason": "stop",
-      "total_duration": 2975265833,
-      "load_duration": 95592083,
-      "prompt_eval_count": 259,
-      "prompt_eval_duration": 367103709,
-      "eval_count": 60,
-      "eval_duration": 2511576708,
-      "response": "{\n  \"first_name\": \"Michael\",\n  \"last_name\": \"Jordan\",\n  \"year_of_birth\": 1963,\n  \"nba_stats\": {\n    \"year_for_draft\": 1984,\n    \"num_seasons_in_nba\": 15\n  }\n}",
-      "thinking": null,
-      "context": null
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T17:37:21.486763Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 2935272125,
+        "load_duration": 51690583,
+        "prompt_eval_count": 259,
+        "prompt_eval_duration": 369929333,
+        "eval_count": 60,
+        "eval_duration": 2512928125,
+        "response": "{\n  \"first_name\": \"Michael\",\n  \"last_name\": \"Jordan\",\n  \"year_of_birth\": 1963,\n  \"nba_stats\": {\n    \"year_for_draft\": 1984,\n    \"num_seasons_in_nba\": 15\n  }\n}",
+        "thinking": null,
+        "context": null
+      }
    },
    "is_streaming": false
  }
--- a/tests/integration/inference/recordings/responses/4a3a4447b16b.json
+++ b/tests/integration/inference/recordings/responses/4a3a4447b16b.json
@ -0,0 +1,132 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://localhost:11434/api/tags",
+    "headers": {},
+    "body": {},
+    "endpoint": "/api/tags",
+    "model": ""
+  },
+  "response": {
+    "body": {
+      "__type__": "ollama._types.ListResponse",
+      "__data__": {
+        "models": [
+          {
+            "model": "nomic-embed-text:latest",
+            "modified_at": "2025-07-29T10:36:48.647829-07:00",
+            "digest": "0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f",
+            "size": 274302450,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "nomic-bert",
+              "families": [
+                "nomic-bert"
+              ],
+              "parameter_size": "137M",
+              "quantization_level": "F16"
+            }
+          },
+          {
+            "model": "llama-guard3:1b",
+            "modified_at": "2025-07-25T14:39:44.978630-07:00",
+            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
+            "size": 1600181919,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.5B",
+              "quantization_level": "Q8_0"
+            }
+          },
+          {
+            "model": "all-minilm:l6-v2",
+            "modified_at": "2025-07-24T15:15:11.129290-07:00",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "size": 45960996,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            }
+          },
+          {
+            "model": "llama3.2:1b",
+            "modified_at": "2025-07-17T22:02:24.953208-07:00",
+            "digest": "baf6a787fdffd633537aa2eb51cfd54cb93ff08e28040095462bb63daf552878",
+            "size": 1321098329,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "1.2B",
+              "quantization_level": "Q8_0"
+            }
+          },
+          {
+            "model": "all-minilm:latest",
+            "modified_at": "2025-06-03T16:50:10.946583-07:00",
+            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
+            "size": 45960996,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "bert",
+              "families": [
+                "bert"
+              ],
+              "parameter_size": "23M",
+              "quantization_level": "F16"
+            }
+          },
+          {
+            "model": "llama3.2:3b",
+            "modified_at": "2025-05-01T11:15:23.797447-07:00",
+            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
+            "size": 2019393189,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "Q4_K_M"
+            }
+          },
+          {
+            "model": "llama3.2:3b-instruct-fp16",
+            "modified_at": "2025-04-30T15:33:48.939665-07:00",
+            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
+            "size": 6433703586,
+            "details": {
+              "parent_model": "",
+              "format": "gguf",
+              "family": "llama",
+              "families": [
+                "llama"
+              ],
+              "parameter_size": "3.2B",
+              "quantization_level": "F16"
+            }
+          }
+        ]
+      }
+    },
+    "is_streaming": false
+  }
+}
--- a/tests/integration/inference/recordings/responses/6cc063bbd7d3.json
+++ b/tests/integration/inference/recordings/responses/6cc063bbd7d3.json
@ -18,304 +18,364 @@
  "response": {
    "body": [
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.227427Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "The",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:29.898943Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "The",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.275725Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " capital",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:29.940406Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " capital",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.316195Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " of",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:29.983928Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " of",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.356832Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " the",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.025678Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " the",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.397682Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " United",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.066554Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " United",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.438761Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " States",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.111853Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " States",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.480453Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " is",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.156263Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " is",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.523691Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " Washington",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.197342Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " Washington",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.565106Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": ",",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.238939Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": ",",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.606315Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " D",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.28041Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " D",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.647209Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": ".C",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.321152Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": ".C",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.687828Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": ".",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.362571Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": ".",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.728386Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " (",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.404107Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " (",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.769091Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "short",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.444632Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "short",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.809726Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " for",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.486331Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " for",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.850489Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " District",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.527309Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " District",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.89147Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " of",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.568556Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " of",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.932311Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " Columbia",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.610745Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " Columbia",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:21.973566Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": ").",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.654172Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": ").",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:22.014466Z",
-        "done": true,
-        "done_reason": "stop",
-        "total_duration": 1034011167,
-        "load_duration": 176591709,
-        "prompt_eval_count": 26,
-        "prompt_eval_duration": 68104583,
-        "eval_count": 20,
-        "eval_duration": 788670334,
-        "response": "",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:30.695146Z",
+          "done": true,
+          "done_reason": "stop",
+          "total_duration": 1078314875,
+          "load_duration": 203057166,
+          "prompt_eval_count": 26,
+          "prompt_eval_duration": 77142708,
+          "eval_count": 20,
+          "eval_duration": 797458917,
+          "response": "",
+          "thinking": null,
+          "context": null
+        }
      }
    ],
    "is_streaming": true
--- a/tests/integration/inference/recordings/responses/70adef2c30c4.json
+++ b/tests/integration/inference/recordings/responses/70adef2c30c4.json
@ -17,19 +17,22 @@
  },
  "response": {
    "body": {
-      "model": "llama3.2:3b-instruct-fp16",
-      "created_at": "2025-07-29T04:17:20.924128Z",
-      "done": true,
-      "done_reason": "stop",
-      "total_duration": 3308469666,
-      "load_duration": 66702250,
-      "prompt_eval_count": 30,
-      "prompt_eval_duration": 391410334,
-      "eval_count": 70,
-      "eval_duration": 2849497291,
-      "response": "The answer is Saturn! Saturn's ring system is one of the most iconic and well-known in our solar system. The rings are made up of ice particles, rock debris, and dust that orbit around the planet due to its gravitational pull.\n\nWould you like to know more about Saturn's rings or is there something else I can help you with?",
-      "thinking": null,
-      "context": null
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T17:37:29.59115Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 3560608959,
+        "load_duration": 57756625,
+        "prompt_eval_count": 30,
+        "prompt_eval_duration": 370892334,
+        "eval_count": 70,
+        "eval_duration": 3131360625,
+        "response": "The answer is Saturn! Saturn's ring system is one of the most iconic and well-known in our solar system. The rings are made up of ice particles, rock debris, and dust that orbit around the planet due to its gravitational pull.\n\nWould you like to know more about Saturn's rings or is there something else I can help you with?",
+        "thinking": null,
+        "context": null
+      }
    },
    "is_streaming": false
  }
--- a/tests/integration/inference/recordings/responses/75d0dd9d0fa3.json
+++ b/tests/integration/inference/recordings/responses/75d0dd9d0fa3.json
@ -42,19 +42,22 @@
  },
  "response": {
    "body": {
-      "model": "llama3.2:3b-instruct-fp16",
-      "created_at": "2025-07-29T04:16:57.15491Z",
-      "done": true,
-      "done_reason": "stop",
-      "total_duration": 1570055875,
-      "load_duration": 87677125,
-      "prompt_eval_count": 119,
-      "prompt_eval_duration": 190281458,
-      "eval_count": 29,
-      "eval_duration": 1291217083,
-      "response": "{ \"name\": \"Michael Jordan\", \"year_born\": \"1963\", \"year_retired\": \"2003\"}\n    ",
-      "thinking": null,
-      "context": null
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T17:37:05.293829Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 1587647875,
+        "load_duration": 58102458,
+        "prompt_eval_count": 119,
+        "prompt_eval_duration": 199832792,
+        "eval_count": 29,
+        "eval_duration": 1328951417,
+        "response": "{ \"name\": \"Michael Jordan\", \"year_born\": \"1963\", \"year_retired\": \"2003\"}\n    ",
+        "thinking": null,
+        "context": null
+      }
    },
    "is_streaming": false
  }
--- a/tests/integration/inference/recordings/responses/84cab42e1f5c.json
+++ b/tests/integration/inference/recordings/responses/84cab42e1f5c.json
--- a/tests/integration/inference/recordings/responses/9b812cbcb88d.json
+++ b/tests/integration/inference/recordings/responses/9b812cbcb88d.json
@ -17,19 +17,22 @@
  },
  "response": {
    "body": {
-      "model": "llama3.2:3b-instruct-fp16",
-      "created_at": "2025-07-29T04:16:59.104609Z",
-      "done": true,
-      "done_reason": "stop",
-      "total_duration": 948932208,
-      "load_duration": 68549542,
-      "prompt_eval_count": 324,
-      "prompt_eval_duration": 460136875,
-      "eval_count": 11,
-      "eval_duration": 419553208,
-      "response": "[get_weather(location=\"San Francisco, CA\")]",
-      "thinking": null,
-      "context": null
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T17:37:07.316473Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 1001978333,
+        "load_duration": 125002875,
+        "prompt_eval_count": 324,
+        "prompt_eval_duration": 451915875,
+        "eval_count": 11,
+        "eval_duration": 424435375,
+        "response": "[get_weather(location=\"San Francisco, CA\")]",
+        "thinking": null,
+        "context": null
+      }
    },
    "is_streaming": false
  }
--- a/tests/integration/inference/recordings/responses/9e7a83d3d596.json
+++ b/tests/integration/inference/recordings/responses/9e7a83d3d596.json
@ -13,25 +13,28 @@
  },
  "response": {
    "body": {
-      "id": "cmpl-68",
-      "choices": [
-        {
-          "finish_reason": "stop",
-          "index": 0,
-          "logprobs": null,
-          "text": "Blue.\n\nThe completed quote is a well-known poetic phrase often used as a tongue-in-cheek romantic gesture. However, it's worth noting that true violets are actually purple in color, not blue. This phrase is a playful variation of the traditional \"Roses are red, violets are blue,\" which typically goes like this:\n\n\"Roses are red, violets are blue,\nSugar is sweet, and so are you.\"\n\nThis original quote has been used for centuries to make a lighthearted, whimsical compliment in poetry, songs, and spoken words."
+      "__type__": "openai.types.completion.Completion",
+      "__data__": {
+        "id": "cmpl-256",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "text": "Blue.\n\nMy response is a play on words of the classic nursery rhyme \"Roses are red, violets are blue.\" In traditional rhymes, violets are typically depicted as being purple or blue in color. I've taken this convention and completed the sentence with the word \"blue\" to create a punny adaptation of the original phrase."
+          }
+        ],
+        "created": 1753810618,
+        "model": "llama3.2:3b-instruct-fp16",
+        "object": "text_completion",
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 72,
+          "prompt_tokens": 50,
+          "total_tokens": 122,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
        }
-      ],
-      "created": 1753762608,
-      "model": "llama3.2:3b-instruct-fp16",
-      "object": "text_completion",
-      "system_fingerprint": "fp_ollama",
-      "usage": {
-        "completion_tokens": 120,
-        "prompt_tokens": 50,
-        "total_tokens": 170,
-        "completion_tokens_details": null,
-        "prompt_tokens_details": null
      }
    },
    "is_streaming": false
--- a/tests/integration/inference/recordings/responses/a6810c23eda8.json
+++ b/tests/integration/inference/recordings/responses/a6810c23eda8.json
--- a/tests/integration/inference/recordings/responses/ae6835cfe70e.json
+++ b/tests/integration/inference/recordings/responses/ae6835cfe70e.json
@ -17,19 +17,22 @@
  },
  "response": {
    "body": {
-      "model": "llama3.2:3b-instruct-fp16",
-      "created_at": "2025-07-29T04:17:22.73932Z",
-      "done": true,
-      "done_reason": "stop",
-      "total_duration": 660872000,
-      "load_duration": 76282083,
-      "prompt_eval_count": 386,
-      "prompt_eval_duration": 541896167,
-      "eval_count": 2,
-      "eval_duration": 42127791,
-      "response": "[]",
-      "thinking": null,
-      "context": null
+      "__type__": "ollama._types.GenerateResponse",
+      "__data__": {
+        "model": "llama3.2:3b-instruct-fp16",
+        "created_at": "2025-07-29T17:37:31.563015Z",
+        "done": true,
+        "done_reason": "stop",
+        "total_duration": 697289250,
+        "load_duration": 84472834,
+        "prompt_eval_count": 386,
+        "prompt_eval_duration": 564623708,
+        "eval_count": 2,
+        "eval_duration": 47500459,
+        "response": "[]",
+        "thinking": null,
+        "context": null
+      }
    },
    "is_streaming": false
  }
--- a/tests/integration/inference/recordings/responses/b91f1fb4aedb.json
+++ b/tests/integration/inference/recordings/responses/b91f1fb4aedb.json
@ -18,169 +18,202 @@
  "response": {
    "body": [
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.217546Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "[",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.301403Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "[",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.267879Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "get",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.344225Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "get",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.315525Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "_weather",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.38649Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "_weather",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.362669Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "(location",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.42879Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "(location",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.406139Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "=\"",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.470562Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "=\"",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.450302Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "San",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.512144Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "San",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.496893Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " Francisco",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.553706Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " Francisco",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.540977Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": ",",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.59536Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": ",",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.586272Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": " CA",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.636886Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": " CA",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.631743Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "\")]",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.678935Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "\")]",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:00.676251Z",
-        "done": true,
-        "done_reason": "stop",
-        "total_duration": 682827167,
-        "load_duration": 111852875,
-        "prompt_eval_count": 339,
-        "prompt_eval_duration": 109521833,
-        "eval_count": 11,
-        "eval_duration": 460495042,
-        "response": "",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:08.719919Z",
+          "done": true,
+          "done_reason": "stop",
+          "total_duration": 609334000,
+          "load_duration": 85744542,
+          "prompt_eval_count": 339,
+          "prompt_eval_duration": 102984708,
+          "eval_count": 11,
+          "eval_duration": 420012834,
+          "response": "",
+          "thinking": null,
+          "context": null
+        }
      }
    ],
    "is_streaming": true
--- a/tests/integration/inference/recordings/responses/bbd0637dce16.json
+++ b/tests/integration/inference/recordings/responses/bbd0637dce16.json
--- a/tests/integration/inference/recordings/responses/dd9e7d5913e9.json
+++ b/tests/integration/inference/recordings/responses/dd9e7d5913e9.json
@ -18,34 +18,40 @@
  "response": {
    "body": [
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:14.122273Z",
-        "done": false,
-        "done_reason": null,
-        "total_duration": null,
-        "load_duration": null,
-        "prompt_eval_count": null,
-        "prompt_eval_duration": null,
-        "eval_count": null,
-        "eval_duration": null,
-        "response": "[]",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:22.214492Z",
+          "done": false,
+          "done_reason": null,
+          "total_duration": null,
+          "load_duration": null,
+          "prompt_eval_count": null,
+          "prompt_eval_duration": null,
+          "eval_count": null,
+          "eval_duration": null,
+          "response": "[]",
+          "thinking": null,
+          "context": null
+        }
      },
      {
-        "model": "llama3.2:3b-instruct-fp16",
-        "created_at": "2025-07-29T04:17:14.165968Z",
-        "done": true,
-        "done_reason": "stop",
-        "total_duration": 663520959,
-        "load_duration": 67474917,
-        "prompt_eval_count": 386,
-        "prompt_eval_duration": 545132042,
-        "eval_count": 2,
-        "eval_duration": 50234083,
-        "response": "",
-        "thinking": null,
-        "context": null
+        "__type__": "ollama._types.GenerateResponse",
+        "__data__": {
+          "model": "llama3.2:3b-instruct-fp16",
+          "created_at": "2025-07-29T17:37:22.25685Z",
+          "done": true,
+          "done_reason": "stop",
+          "total_duration": 690090375,
+          "load_duration": 101933500,
+          "prompt_eval_count": 386,
+          "prompt_eval_duration": 544455708,
+          "eval_count": 2,
+          "eval_duration": 42919375,
+          "response": "",
+          "thinking": null,
+          "context": null
+        }
      }
    ],
    "is_streaming": true
--- a/tests/integration/test_inference_recordings.py
+++ b/tests/integration/test_inference_recordings.py
@ -7,11 +7,20 @@
 import sqlite3
 import tempfile
 from pathlib import Path
-from unittest.mock import Mock, patch
+from unittest.mock import patch

 import pytest
 from openai import AsyncOpenAI

+# Import the real Pydantic response types instead of using Mocks
+from llama_stack.apis.inference import (
+    OpenAIAssistantMessageParam,
+    OpenAIChatCompletion,
+    OpenAIChoice,
+    OpenAIEmbeddingData,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+)
 from llama_stack.testing.inference_recorder import (
    ResponseStorage,
    inference_recording,
@ -27,44 +36,36 @@ def temp_storage_dir():


@pytest.fixture
-def mock_openai_response():
-    """Mock OpenAI response object."""
-    mock_response = Mock()
-    mock_response.choices = [Mock()]
-    mock_response.choices[0].message.content = "Hello! I'm doing well, thank you for asking."
-    mock_response.model_dump.return_value = {
-        "id": "chatcmpl-test123",
-        "object": "chat.completion",
-        "choices": [
-            {
-                "index": 0,
-                "message": {"role": "assistant", "content": "Hello! I'm doing well, thank you for asking."},
-                "finish_reason": "stop",
-            }
+def real_openai_chat_response():
+    """Real OpenAI chat completion response using proper Pydantic objects."""
+    return OpenAIChatCompletion(
+        id="chatcmpl-test123",
+        choices=[
+            OpenAIChoice(
+                index=0,
+                message=OpenAIAssistantMessageParam(
+                    role="assistant", content="Hello! I'm doing well, thank you for asking."
+                ),
+                finish_reason="stop",
+            )
        ],
-        "model": "llama3.2:3b",
-        "usage": {"prompt_tokens": 10, "completion_tokens": 15, "total_tokens": 25},
-    }
-
-    return mock_response
+        created=1234567890,
+        model="llama3.2:3b",
+    )


@pytest.fixture
-def mock_embeddings_response():
-    """Mock OpenAI embeddings response object."""
-    mock_response = Mock()
-    mock_response.data = [Mock(embedding=[0.1, 0.2, 0.3]), Mock(embedding=[0.4, 0.5, 0.6])]
-    mock_response.model_dump.return_value = {
-        "object": "list",
-        "data": [
-            {"object": "embedding", "embedding": [0.1, 0.2, 0.3], "index": 0},
-            {"object": "embedding", "embedding": [0.4, 0.5, 0.6], "index": 1},
+def real_embeddings_response():
+    """Real OpenAI embeddings response using proper Pydantic objects."""
+    return OpenAIEmbeddingsResponse(
+        object="list",
+        data=[
+            OpenAIEmbeddingData(object="embedding", embedding=[0.1, 0.2, 0.3], index=0),
+            OpenAIEmbeddingData(object="embedding", embedding=[0.4, 0.5, 0.6], index=1),
        ],
-        "model": "nomic-embed-text",
-        "usage": {"prompt_tokens": 6, "total_tokens": 6},
-    }
-
-    return mock_response
+        model="nomic-embed-text",
+        usage=OpenAIEmbeddingUsage(prompt_tokens=6, total_tokens=6),
+    )


 class TestInferenceRecording:
@ -160,11 +161,11 @@ class TestInferenceRecording:
        assert retrieved["request"]["model"] == "llama3.2:3b"
        assert retrieved["response"]["body"]["content"] == "test response"

-    async def test_recording_mode(self, temp_storage_dir, mock_openai_response):
+    async def test_recording_mode(self, temp_storage_dir, real_openai_chat_response):
        """Test that recording mode captures and stores responses."""

        async def mock_create(*args, **kwargs):
-            return mock_openai_response
+            return real_openai_chat_response

        temp_storage_dir = temp_storage_dir / "test_recording_mode"
        with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
@ -188,11 +189,11 @@ class TestInferenceRecording:

        assert recordings == 1

-    async def test_replay_mode(self, temp_storage_dir, mock_openai_response):
+    async def test_replay_mode(self, temp_storage_dir, real_openai_chat_response):
        """Test that replay mode returns stored responses without making real calls."""

        async def mock_create(*args, **kwargs):
-            return mock_openai_response
+            return real_openai_chat_response

        temp_storage_dir = temp_storage_dir / "test_replay_mode"
        # First, record a response
@ -200,7 +201,7 @@ class TestInferenceRecording:
            with inference_recording(mode="record", storage_dir=str(temp_storage_dir)):
                client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")

-                await client.chat.completions.create(
+                response = await client.chat.completions.create(
                    model="llama3.2:3b",
                    messages=[{"role": "user", "content": "Hello, how are you?"}],
                    temperature=0.7,
@ -220,7 +221,7 @@ class TestInferenceRecording:
                )

                # Verify we got the recorded response
-                assert response["choices"][0]["message"]["content"] == "Hello! I'm doing well, thank you for asking."
+                assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."

                # Verify the original method was NOT called
                mock_create_patch.assert_not_called()
@ -237,11 +238,11 @@ class TestInferenceRecording:
                        model="llama3.2:3b", messages=[{"role": "user", "content": "This was never recorded"}]
                    )

-    async def test_embeddings_recording(self, temp_storage_dir, mock_embeddings_response):
+    async def test_embeddings_recording(self, temp_storage_dir, real_embeddings_response):
        """Test recording and replay of embeddings calls."""

        async def mock_create(*args, **kwargs):
-            return mock_embeddings_response
+            return real_embeddings_response

        temp_storage_dir = temp_storage_dir / "test_embeddings_recording"
        # Record
@ -265,17 +266,17 @@ class TestInferenceRecording:
                )

                # Verify we got the recorded response
-                assert len(response["data"]) == 2
-                assert response["data"][0]["embedding"] == [0.1, 0.2, 0.3]
+                assert len(response.data) == 2
+                assert response.data[0].embedding == [0.1, 0.2, 0.3]

                # Verify original method was not called
                mock_create_patch.assert_not_called()

-    async def test_live_mode(self, mock_openai_response):
+    async def test_live_mode(self, real_openai_chat_response):
        """Test that live mode passes through to original methods."""

        async def mock_create(*args, **kwargs):
-            return mock_openai_response
+            return real_openai_chat_response

        with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
            with inference_recording(mode="live"):