improvements to include other ollama methods like ps, list and pull

This commit is contained in:
Ashwin Bharambe 2025-07-29 10:56:57 -07:00
parent 1a21c4b695
commit b578f9aec1
23 changed files with 6748 additions and 5711 deletions

View file

@ -77,13 +77,13 @@ class InferenceRouter(Inference):
self.tokenizer = Tokenizer.get_instance()
self.formatter = ChatFormat(self.tokenizer)
self.recording_context = None
async def initialize(self) -> None:
logger.debug("InferenceRouter.initialize")
pass
async def shutdown(self) -> None:
logger.debug("InferenceRouter.shutdown")
pass
async def register_model(
self,

View file

@ -94,6 +94,7 @@ RESOURCES = [
REGISTRY_REFRESH_INTERVAL_SECONDS = 300
REGISTRY_REFRESH_TASK = None
TEST_RECORDING_CONTEXT = None
async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
@ -307,6 +308,15 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
async def construct_stack(
run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None
) -> dict[Api, Any]:
inference_mode = os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE", "live").lower()
if inference_mode in ["record", "replay"]:
global TEST_RECORDING_CONTEXT
from llama_stack.testing.inference_recorder import setup_inference_recording
TEST_RECORDING_CONTEXT = setup_inference_recording()
TEST_RECORDING_CONTEXT.__enter__()
logger.info(f"Inference recording enabled: mode={inference_mode}")
dist_registry, _ = await create_dist_registry(run_config.metadata_store, run_config.image_name)
policy = run_config.server.auth.access_policy if run_config.server.auth else []
impls = await resolve_impls(
@ -352,6 +362,13 @@ async def shutdown_stack(impls: dict[Api, Any]):
except (Exception, asyncio.CancelledError) as e:
logger.exception(f"Failed to shutdown {impl_name}: {e}")
global TEST_RECORDING_CONTEXT
if TEST_RECORDING_CONTEXT:
try:
TEST_RECORDING_CONTEXT.__exit__(None, None, None)
except Exception as e:
logger.error(f"Error during inference recording cleanup: {e}")
global REGISTRY_REFRESH_TASK
if REGISTRY_REFRESH_TASK:
REGISTRY_REFRESH_TASK.cancel()

View file

@ -13,13 +13,23 @@ import sqlite3
from collections.abc import Generator
from contextlib import contextmanager
from pathlib import Path
from typing import Any, cast
from typing import Any, Literal, cast
from llama_stack.log import get_logger
logger = get_logger(__name__, category="testing")
# Global state for the recording system
_current_mode: str | None = None
_current_storage: ResponseStorage | None = None
_original_methods: dict[str, Any] = {}
from openai.types.completion_choice import CompletionChoice
# update the "finish_reason" field, since its type definition is wrong (no None is accepted)
CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
CompletionChoice.model_rebuild()
def normalize_request(method: str, url: str, headers: dict[str, Any], body: dict[str, Any]) -> str:
"""Create a normalized hash of the request for consistent matching."""
@ -35,14 +45,14 @@ def normalize_request(method: str, url: str, headers: dict[str, Any], body: dict
def get_inference_mode() -> str:
return os.environ.get("LLAMA_STACK_INFERENCE_MODE", "live").lower()
return os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE", "live").lower()
def setup_inference_recording():
mode = get_inference_mode()
if mode not in ["live", "record", "replay"]:
raise ValueError(f"Invalid LLAMA_STACK_INFERENCE_MODE: {mode}. Must be 'live', 'record', or 'replay'")
raise ValueError(f"Invalid LLAMA_STACK_TEST_INFERENCE_MODE: {mode}. Must be 'live', 'record', or 'replay'")
if mode == "live":
# Return a no-op context manager for live mode
@ -52,23 +62,43 @@ def setup_inference_recording():
return live_mode()
if "LLAMA_STACK_RECORDING_DIR" not in os.environ:
raise ValueError("LLAMA_STACK_RECORDING_DIR must be set for recording or replaying")
storage_dir = os.environ["LLAMA_STACK_RECORDING_DIR"]
if "LLAMA_STACK_TEST_RECORDING_DIR" not in os.environ:
raise ValueError("LLAMA_STACK_TEST_RECORDING_DIR must be set for recording or replaying")
storage_dir = os.environ["LLAMA_STACK_TEST_RECORDING_DIR"]
return inference_recording(mode=mode, storage_dir=storage_dir)
def _serialize_response(response: Any) -> Any:
if hasattr(response, "model_dump"):
return response.model_dump()
data = response.model_dump(mode="json")
return {
"__type__": f"{response.__class__.__module__}.{response.__class__.__qualname__}",
"__data__": data,
}
elif hasattr(response, "__dict__"):
return dict(response.__dict__)
else:
return response
def _deserialize_response(data: dict[str, Any]) -> dict[str, Any]:
def _deserialize_response(data: dict[str, Any]) -> Any:
# Check if this is a serialized Pydantic model with type information
if isinstance(data, dict) and "__type__" in data and "__data__" in data:
try:
# Import the original class and reconstruct the object
module_path, class_name = data["__type__"].rsplit(".", 1)
module = __import__(module_path, fromlist=[class_name])
cls = getattr(module, class_name)
if not hasattr(cls, "model_validate"):
raise ValueError(f"Pydantic class {cls} does not support model_validate?")
return cls.model_validate(data["__data__"])
except (ImportError, AttributeError, TypeError, ValueError) as e:
logger.warning(f"Failed to deserialize object of type {data['__type__']}: {e}")
return data["__data__"]
return data
@ -120,6 +150,7 @@ class ResponseStorage:
with open(response_path, "w") as f:
json.dump({"request": request, "response": serialized_response}, f, indent=2)
f.write("\n")
f.flush()
# Update SQLite index
with sqlite3.connect(self.db_path) as conn:
@ -214,6 +245,8 @@ async def _patched_inference_method(original_method, self, client_type, method_n
endpoint = "/api/chat"
elif method_name == "embed":
endpoint = "/api/embeddings"
elif method_name == "list":
endpoint = "/api/tags"
else:
endpoint = f"/api/{method_name}"
else:
@ -295,8 +328,6 @@ def patch_inference_clients():
"""Install monkey patches for OpenAI client methods and Ollama AsyncClient methods."""
global _original_methods
# Import here to avoid circular imports
# Also import Ollama AsyncClient
from ollama import AsyncClient as OllamaAsyncClient
from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
from openai.resources.completions import AsyncCompletions
@ -310,6 +341,9 @@ def patch_inference_clients():
"ollama_generate": OllamaAsyncClient.generate,
"ollama_chat": OllamaAsyncClient.chat,
"ollama_embed": OllamaAsyncClient.embed,
"ollama_ps": OllamaAsyncClient.ps,
"ollama_pull": OllamaAsyncClient.pull,
"ollama_list": OllamaAsyncClient.list,
}
# Create patched methods for OpenAI client
@ -339,10 +373,24 @@ def patch_inference_clients():
async def patched_ollama_embed(self, **kwargs):
return await _patched_inference_method(_original_methods["ollama_embed"], self, "ollama", "embed", **kwargs)
async def patched_ollama_ps(self, **kwargs):
logger.info("replay mode: ollama.ps() reporting success")
return []
async def patched_ollama_pull(self, *args, **kwargs):
logger.info("replay mode: ollama.pull() not actually pulling the model")
return None
async def patched_ollama_list(self, **kwargs):
return await _patched_inference_method(_original_methods["ollama_list"], self, "ollama", "list", **kwargs)
# Apply Ollama patches
OllamaAsyncClient.generate = patched_ollama_generate
OllamaAsyncClient.chat = patched_ollama_chat
OllamaAsyncClient.embed = patched_ollama_embed
OllamaAsyncClient.ps = patched_ollama_ps
OllamaAsyncClient.pull = patched_ollama_pull
OllamaAsyncClient.list = patched_ollama_list
def unpatch_inference_clients():
@ -367,6 +415,8 @@ def unpatch_inference_clients():
OllamaAsyncClient.generate = _original_methods["ollama_generate"]
OllamaAsyncClient.chat = _original_methods["ollama_chat"]
OllamaAsyncClient.embed = _original_methods["ollama_embed"]
OllamaAsyncClient.ps = _original_methods["ollama_ps"]
OllamaAsyncClient.list = _original_methods["ollama_list"]
_original_methods.clear()

View file

@ -185,95 +185,70 @@ def llama_stack_client(request, provider_data):
if not config:
raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG")
# Set up inference recording if enabled
inference_mode = os.environ.get("LLAMA_STACK_INFERENCE_MODE", "live").lower()
recording_context = None
# Handle server:<config_name> format or server:<config_name>:<port>
if config.startswith("server:"):
parts = config.split(":")
config_name = parts[1]
port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
base_url = f"http://localhost:{port}"
if inference_mode in ["record", "replay"]:
from llama_stack.testing.inference_recorder import setup_inference_recording
# Check if port is available
if is_port_available(port):
print(f"Starting llama stack server with config '{config_name}' on port {port}...")
recording_context = setup_inference_recording()
recording_context.__enter__()
print(f"Inference recording enabled: mode={inference_mode}")
# Start server
server_process = start_llama_stack_server(config_name)
try:
# Handle server:<config_name> format or server:<config_name>:<port>
if config.startswith("server:"):
parts = config.split(":")
config_name = parts[1]
port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
base_url = f"http://localhost:{port}"
# Check if port is available
if is_port_available(port):
print(f"Starting llama stack server with config '{config_name}' on port {port}...")
# Start server
server_process = start_llama_stack_server(config_name)
# Wait for server to be ready
if not wait_for_server_ready(base_url, timeout=120, process=server_process):
print("Server failed to start within timeout")
server_process.terminate()
raise RuntimeError(
f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid. "
f"See server.log for details."
)
print(f"Server is ready at {base_url}")
# Store process for potential cleanup (pytest will handle termination at session end)
request.session._llama_stack_server_process = server_process
else:
print(f"Port {port} is already in use, assuming server is already running...")
client = LlamaStackClient(
base_url=base_url,
provider_data=provider_data,
timeout=int(os.environ.get("LLAMA_STACK_CLIENT_TIMEOUT", "30")),
)
else:
# check if this looks like a URL using proper URL parsing
try:
parsed_url = urlparse(config)
if parsed_url.scheme and parsed_url.netloc:
client = LlamaStackClient(
base_url=config,
provider_data=provider_data,
)
else:
raise ValueError("Not a URL")
except Exception as e:
# If URL parsing fails, treat as library config
if "=" in config:
run_config = run_config_from_adhoc_config_spec(config)
run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml")
with open(run_config_file.name, "w") as f:
yaml.dump(run_config.model_dump(), f)
config = run_config_file.name
client = LlamaStackAsLibraryClient(
config,
provider_data=provider_data,
skip_logger_removal=True,
# Wait for server to be ready
if not wait_for_server_ready(base_url, timeout=120, process=server_process):
print("Server failed to start within timeout")
server_process.terminate()
raise RuntimeError(
f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid. "
f"See server.log for details."
)
if not client.initialize():
raise RuntimeError("Initialization failed") from e
# Store recording context for cleanup
if recording_context:
request.session._inference_recording_context = recording_context
print(f"Server is ready at {base_url}")
return client
# Store process for potential cleanup (pytest will handle termination at session end)
request.session._llama_stack_server_process = server_process
else:
print(f"Port {port} is already in use, assuming server is already running...")
return LlamaStackClient(
base_url=base_url,
provider_data=provider_data,
timeout=int(os.environ.get("LLAMA_STACK_CLIENT_TIMEOUT", "30")),
)
# check if this looks like a URL using proper URL parsing
try:
parsed_url = urlparse(config)
if parsed_url.scheme and parsed_url.netloc:
return LlamaStackClient(
base_url=config,
provider_data=provider_data,
)
except Exception:
# Clean up recording context on error
if recording_context:
try:
recording_context.__exit__(None, None, None)
except Exception as cleanup_error:
print(f"Warning: Error cleaning up recording context: {cleanup_error}")
raise
# If URL parsing fails, treat as non-URL config
pass
if "=" in config:
run_config = run_config_from_adhoc_config_spec(config)
run_config_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yaml")
with open(run_config_file.name, "w") as f:
yaml.dump(run_config.model_dump(), f)
config = run_config_file.name
client = LlamaStackAsLibraryClient(
config,
provider_data=provider_data,
skip_logger_removal=True,
)
if not client.initialize():
raise RuntimeError("Initialization failed")
return client
@pytest.fixture(scope="session")
@ -289,20 +264,9 @@ def compat_client(request):
@pytest.fixture(scope="session", autouse=True)
def cleanup_server_process(request):
"""Cleanup server process and inference recording at the end of the test session."""
"""Cleanup server process at the end of the test session."""
yield # Run tests
# Clean up inference recording context
if hasattr(request.session, "_inference_recording_context"):
recording_context = request.session._inference_recording_context
if recording_context:
try:
print("Cleaning up inference recording context...")
recording_context.__exit__(None, None, None)
except Exception as e:
print(f"Error during inference recording cleanup: {e}")
# Clean up server process
if hasattr(request.session, "_llama_stack_server_process"):
server_process = request.session._llama_stack_server_process
if server_process:

View file

@ -19,19 +19,22 @@
},
"response": {
"body": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:53.555099Z",
"done": true,
"done_reason": "stop",
"total_duration": 2124168875,
"load_duration": 58506875,
"prompt_eval_count": 18,
"prompt_eval_duration": 70072583,
"eval_count": 43,
"eval_duration": 1994446917,
"response": " _______.\n\nThe best answer is blue. The traditional nursery rhyme goes like this:\n\nRoses are red,\nViolets are blue,\nSugar is sweet,\nAnd so are you! (Or something similar.)",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:01.657977Z",
"done": true,
"done_reason": "stop",
"total_duration": 1860746958,
"load_duration": 59632791,
"prompt_eval_count": 18,
"prompt_eval_duration": 66333291,
"eval_count": 43,
"eval_duration": 1734228875,
"response": " _______.\n\nThe best answer is blue. The traditional nursery rhyme goes like this:\n\nRoses are red,\nViolets are blue,\nSugar is sweet,\nAnd so are you! (Or something similar.)",
"thinking": null,
"context": null
}
},
"is_streaming": false
}

View file

@ -17,19 +17,22 @@
},
"response": {
"body": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.535525Z",
"done": true,
"done_reason": "stop",
"total_duration": 358691334,
"load_duration": 76787334,
"prompt_eval_count": 23,
"prompt_eval_duration": 72235375,
"eval_count": 6,
"eval_duration": 208986666,
"response": "Humans live on Earth.",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:05.658757Z",
"done": true,
"done_reason": "stop",
"total_duration": 327636667,
"load_duration": 55751292,
"prompt_eval_count": 23,
"prompt_eval_duration": 65865625,
"eval_count": 6,
"eval_duration": 205366667,
"response": "Humans live on Earth.",
"thinking": null,
"context": null
}
},
"is_streaming": false
}

View file

@ -18,169 +18,202 @@
"response": {
"body": [
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.691771Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "The",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:05.833193Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "The",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.732262Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Latin",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:05.874274Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Latin",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.77294Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " word",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:05.915195Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " word",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.814484Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " for",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:05.955964Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " for",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.854875Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " \"",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:05.996679Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " \"",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.895957Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "Sun",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:06.037268Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "Sun",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.937445Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "\"",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:06.078124Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "\"",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.978832Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " is",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:06.118771Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " is",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:58.019242Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Sol",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:06.159758Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Sol",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:58.059902Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ".",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:06.200659Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ".",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:58.100535Z",
"done": true,
"done_reason": "stop",
"total_duration": 528254250,
"load_duration": 50177125,
"prompt_eval_count": 26,
"prompt_eval_duration": 68018458,
"eval_count": 11,
"eval_duration": 409555959,
"response": "",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:06.241464Z",
"done": true,
"done_reason": "stop",
"total_duration": 542773583,
"load_duration": 68681541,
"prompt_eval_count": 26,
"prompt_eval_duration": 64427833,
"eval_count": 11,
"eval_duration": 409175667,
"response": "",
"thinking": null,
"context": null
}
}
],
"is_streaming": true

View file

@ -18,169 +18,202 @@
"response": {
"body": [
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.480955Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "[",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:07.590293Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "[",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.527418Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "get",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:07.638879Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "get",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.571522Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "_weather",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:07.69047Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "_weather",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.615027Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "(location",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:07.738247Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "(location",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.660598Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "=\"",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:07.782316Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "=\"",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.705052Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "San",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:07.827405Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "San",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.754386Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Francisco",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:07.872241Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Francisco",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.796942Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ",",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:07.91705Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ",",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.845807Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " CA",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:07.962046Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " CA",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.891254Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "\")]",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:08.00691Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "\")]",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.934197Z",
"done": true,
"done_reason": "stop",
"total_duration": 574307083,
"load_duration": 72062083,
"prompt_eval_count": 324,
"prompt_eval_duration": 47115625,
"eval_count": 11,
"eval_duration": 454426708,
"response": "",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:08.052731Z",
"done": true,
"done_reason": "stop",
"total_duration": 649504167,
"load_duration": 138022250,
"prompt_eval_count": 324,
"prompt_eval_duration": 45176916,
"eval_count": 11,
"eval_duration": 464930417,
"response": "",
"thinking": null,
"context": null
}
}
],
"is_streaming": true

View file

@ -64,19 +64,22 @@
},
"response": {
"body": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:13.438182Z",
"done": true,
"done_reason": "stop",
"total_duration": 2975265833,
"load_duration": 95592083,
"prompt_eval_count": 259,
"prompt_eval_duration": 367103709,
"eval_count": 60,
"eval_duration": 2511576708,
"response": "{\n \"first_name\": \"Michael\",\n \"last_name\": \"Jordan\",\n \"year_of_birth\": 1963,\n \"nba_stats\": {\n \"year_for_draft\": 1984,\n \"num_seasons_in_nba\": 15\n }\n}",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:21.486763Z",
"done": true,
"done_reason": "stop",
"total_duration": 2935272125,
"load_duration": 51690583,
"prompt_eval_count": 259,
"prompt_eval_duration": 369929333,
"eval_count": 60,
"eval_duration": 2512928125,
"response": "{\n \"first_name\": \"Michael\",\n \"last_name\": \"Jordan\",\n \"year_of_birth\": 1963,\n \"nba_stats\": {\n \"year_for_draft\": 1984,\n \"num_seasons_in_nba\": 15\n }\n}",
"thinking": null,
"context": null
}
},
"is_streaming": false
}

View file

@ -0,0 +1,132 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/tags",
"headers": {},
"body": {},
"endpoint": "/api/tags",
"model": ""
},
"response": {
"body": {
"__type__": "ollama._types.ListResponse",
"__data__": {
"models": [
{
"model": "nomic-embed-text:latest",
"modified_at": "2025-07-29T10:36:48.647829-07:00",
"digest": "0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f",
"size": 274302450,
"details": {
"parent_model": "",
"format": "gguf",
"family": "nomic-bert",
"families": [
"nomic-bert"
],
"parameter_size": "137M",
"quantization_level": "F16"
}
},
{
"model": "llama-guard3:1b",
"modified_at": "2025-07-25T14:39:44.978630-07:00",
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
"size": 1600181919,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "1.5B",
"quantization_level": "Q8_0"
}
},
{
"model": "all-minilm:l6-v2",
"modified_at": "2025-07-24T15:15:11.129290-07:00",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"size": 45960996,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama3.2:1b",
"modified_at": "2025-07-17T22:02:24.953208-07:00",
"digest": "baf6a787fdffd633537aa2eb51cfd54cb93ff08e28040095462bb63daf552878",
"size": 1321098329,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "1.2B",
"quantization_level": "Q8_0"
}
},
{
"model": "all-minilm:latest",
"modified_at": "2025-06-03T16:50:10.946583-07:00",
"digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
"size": 45960996,
"details": {
"parent_model": "",
"format": "gguf",
"family": "bert",
"families": [
"bert"
],
"parameter_size": "23M",
"quantization_level": "F16"
}
},
{
"model": "llama3.2:3b",
"modified_at": "2025-05-01T11:15:23.797447-07:00",
"digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
"size": 2019393189,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "Q4_K_M"
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"modified_at": "2025-04-30T15:33:48.939665-07:00",
"digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
"size": 6433703586,
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "3.2B",
"quantization_level": "F16"
}
}
]
}
},
"is_streaming": false
}
}

View file

@ -18,304 +18,364 @@
"response": {
"body": [
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.227427Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "The",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:29.898943Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "The",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.275725Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " capital",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:29.940406Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " capital",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.316195Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " of",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:29.983928Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " of",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.356832Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " the",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.025678Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " the",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.397682Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " United",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.066554Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " United",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.438761Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " States",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.111853Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " States",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.480453Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " is",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.156263Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " is",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.523691Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Washington",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.197342Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Washington",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.565106Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ",",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.238939Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ",",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.606315Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " D",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.28041Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " D",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.647209Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ".C",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.321152Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ".C",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.687828Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ".",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.362571Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ".",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.728386Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " (",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.404107Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " (",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.769091Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "short",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.444632Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "short",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.809726Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " for",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.486331Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " for",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.850489Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " District",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.527309Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " District",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.89147Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " of",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.568556Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " of",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.932311Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Columbia",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.610745Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Columbia",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.973566Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ").",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.654172Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ").",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:22.014466Z",
"done": true,
"done_reason": "stop",
"total_duration": 1034011167,
"load_duration": 176591709,
"prompt_eval_count": 26,
"prompt_eval_duration": 68104583,
"eval_count": 20,
"eval_duration": 788670334,
"response": "",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:30.695146Z",
"done": true,
"done_reason": "stop",
"total_duration": 1078314875,
"load_duration": 203057166,
"prompt_eval_count": 26,
"prompt_eval_duration": 77142708,
"eval_count": 20,
"eval_duration": 797458917,
"response": "",
"thinking": null,
"context": null
}
}
],
"is_streaming": true

View file

@ -17,19 +17,22 @@
},
"response": {
"body": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:20.924128Z",
"done": true,
"done_reason": "stop",
"total_duration": 3308469666,
"load_duration": 66702250,
"prompt_eval_count": 30,
"prompt_eval_duration": 391410334,
"eval_count": 70,
"eval_duration": 2849497291,
"response": "The answer is Saturn! Saturn's ring system is one of the most iconic and well-known in our solar system. The rings are made up of ice particles, rock debris, and dust that orbit around the planet due to its gravitational pull.\n\nWould you like to know more about Saturn's rings or is there something else I can help you with?",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:29.59115Z",
"done": true,
"done_reason": "stop",
"total_duration": 3560608959,
"load_duration": 57756625,
"prompt_eval_count": 30,
"prompt_eval_duration": 370892334,
"eval_count": 70,
"eval_duration": 3131360625,
"response": "The answer is Saturn! Saturn's ring system is one of the most iconic and well-known in our solar system. The rings are made up of ice particles, rock debris, and dust that orbit around the planet due to its gravitational pull.\n\nWould you like to know more about Saturn's rings or is there something else I can help you with?",
"thinking": null,
"context": null
}
},
"is_streaming": false
}

View file

@ -42,19 +42,22 @@
},
"response": {
"body": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.15491Z",
"done": true,
"done_reason": "stop",
"total_duration": 1570055875,
"load_duration": 87677125,
"prompt_eval_count": 119,
"prompt_eval_duration": 190281458,
"eval_count": 29,
"eval_duration": 1291217083,
"response": "{ \"name\": \"Michael Jordan\", \"year_born\": \"1963\", \"year_retired\": \"2003\"}\n ",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:05.293829Z",
"done": true,
"done_reason": "stop",
"total_duration": 1587647875,
"load_duration": 58102458,
"prompt_eval_count": 119,
"prompt_eval_duration": 199832792,
"eval_count": 29,
"eval_duration": 1328951417,
"response": "{ \"name\": \"Michael Jordan\", \"year_born\": \"1963\", \"year_retired\": \"2003\"}\n ",
"thinking": null,
"context": null
}
},
"is_streaming": false
}

View file

@ -17,19 +17,22 @@
},
"response": {
"body": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.104609Z",
"done": true,
"done_reason": "stop",
"total_duration": 948932208,
"load_duration": 68549542,
"prompt_eval_count": 324,
"prompt_eval_duration": 460136875,
"eval_count": 11,
"eval_duration": 419553208,
"response": "[get_weather(location=\"San Francisco, CA\")]",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:07.316473Z",
"done": true,
"done_reason": "stop",
"total_duration": 1001978333,
"load_duration": 125002875,
"prompt_eval_count": 324,
"prompt_eval_duration": 451915875,
"eval_count": 11,
"eval_duration": 424435375,
"response": "[get_weather(location=\"San Francisco, CA\")]",
"thinking": null,
"context": null
}
},
"is_streaming": false
}

View file

@ -13,25 +13,28 @@
},
"response": {
"body": {
"id": "cmpl-68",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "Blue.\n\nThe completed quote is a well-known poetic phrase often used as a tongue-in-cheek romantic gesture. However, it's worth noting that true violets are actually purple in color, not blue. This phrase is a playful variation of the traditional \"Roses are red, violets are blue,\" which typically goes like this:\n\n\"Roses are red, violets are blue,\nSugar is sweet, and so are you.\"\n\nThis original quote has been used for centuries to make a lighthearted, whimsical compliment in poetry, songs, and spoken words."
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "cmpl-256",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "Blue.\n\nMy response is a play on words of the classic nursery rhyme \"Roses are red, violets are blue.\" In traditional rhymes, violets are typically depicted as being purple or blue in color. I've taken this convention and completed the sentence with the word \"blue\" to create a punny adaptation of the original phrase."
}
],
"created": 1753810618,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 72,
"prompt_tokens": 50,
"total_tokens": 122,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
],
"created": 1753762608,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 120,
"prompt_tokens": 50,
"total_tokens": 170,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
},
"is_streaming": false

View file

@ -17,19 +17,22 @@
},
"response": {
"body": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:22.73932Z",
"done": true,
"done_reason": "stop",
"total_duration": 660872000,
"load_duration": 76282083,
"prompt_eval_count": 386,
"prompt_eval_duration": 541896167,
"eval_count": 2,
"eval_duration": 42127791,
"response": "[]",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:31.563015Z",
"done": true,
"done_reason": "stop",
"total_duration": 697289250,
"load_duration": 84472834,
"prompt_eval_count": 386,
"prompt_eval_duration": 564623708,
"eval_count": 2,
"eval_duration": 47500459,
"response": "[]",
"thinking": null,
"context": null
}
},
"is_streaming": false
}

View file

@ -18,169 +18,202 @@
"response": {
"body": [
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.217546Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "[",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:08.301403Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "[",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.267879Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "get",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:08.344225Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "get",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.315525Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "_weather",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:08.38649Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "_weather",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.362669Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "(location",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:08.42879Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "(location",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.406139Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "=\"",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:08.470562Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "=\"",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.450302Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "San",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:08.512144Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "San",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.496893Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Francisco",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:08.553706Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Francisco",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.540977Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ",",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:08.59536Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ",",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.586272Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " CA",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:08.636886Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " CA",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.631743Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "\")]",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:08.678935Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "\")]",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.676251Z",
"done": true,
"done_reason": "stop",
"total_duration": 682827167,
"load_duration": 111852875,
"prompt_eval_count": 339,
"prompt_eval_duration": 109521833,
"eval_count": 11,
"eval_duration": 460495042,
"response": "",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:08.719919Z",
"done": true,
"done_reason": "stop",
"total_duration": 609334000,
"load_duration": 85744542,
"prompt_eval_count": 339,
"prompt_eval_duration": 102984708,
"eval_count": 11,
"eval_duration": 420012834,
"response": "",
"thinking": null,
"context": null
}
}
],
"is_streaming": true

View file

@ -18,34 +18,40 @@
"response": {
"body": [
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:14.122273Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "[]",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:22.214492Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "[]",
"thinking": null,
"context": null
}
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:14.165968Z",
"done": true,
"done_reason": "stop",
"total_duration": 663520959,
"load_duration": 67474917,
"prompt_eval_count": 386,
"prompt_eval_duration": 545132042,
"eval_count": 2,
"eval_duration": 50234083,
"response": "",
"thinking": null,
"context": null
"__type__": "ollama._types.GenerateResponse",
"__data__": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T17:37:22.25685Z",
"done": true,
"done_reason": "stop",
"total_duration": 690090375,
"load_duration": 101933500,
"prompt_eval_count": 386,
"prompt_eval_duration": 544455708,
"eval_count": 2,
"eval_duration": 42919375,
"response": "",
"thinking": null,
"context": null
}
}
],
"is_streaming": true

View file

@ -7,11 +7,20 @@
import sqlite3
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch
from unittest.mock import patch
import pytest
from openai import AsyncOpenAI
# Import the real Pydantic response types instead of using Mocks
from llama_stack.apis.inference import (
OpenAIAssistantMessageParam,
OpenAIChatCompletion,
OpenAIChoice,
OpenAIEmbeddingData,
OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage,
)
from llama_stack.testing.inference_recorder import (
ResponseStorage,
inference_recording,
@ -27,44 +36,36 @@ def temp_storage_dir():
@pytest.fixture
def mock_openai_response():
"""Mock OpenAI response object."""
mock_response = Mock()
mock_response.choices = [Mock()]
mock_response.choices[0].message.content = "Hello! I'm doing well, thank you for asking."
mock_response.model_dump.return_value = {
"id": "chatcmpl-test123",
"object": "chat.completion",
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": "Hello! I'm doing well, thank you for asking."},
"finish_reason": "stop",
}
def real_openai_chat_response():
"""Real OpenAI chat completion response using proper Pydantic objects."""
return OpenAIChatCompletion(
id="chatcmpl-test123",
choices=[
OpenAIChoice(
index=0,
message=OpenAIAssistantMessageParam(
role="assistant", content="Hello! I'm doing well, thank you for asking."
),
finish_reason="stop",
)
],
"model": "llama3.2:3b",
"usage": {"prompt_tokens": 10, "completion_tokens": 15, "total_tokens": 25},
}
return mock_response
created=1234567890,
model="llama3.2:3b",
)
@pytest.fixture
def mock_embeddings_response():
"""Mock OpenAI embeddings response object."""
mock_response = Mock()
mock_response.data = [Mock(embedding=[0.1, 0.2, 0.3]), Mock(embedding=[0.4, 0.5, 0.6])]
mock_response.model_dump.return_value = {
"object": "list",
"data": [
{"object": "embedding", "embedding": [0.1, 0.2, 0.3], "index": 0},
{"object": "embedding", "embedding": [0.4, 0.5, 0.6], "index": 1},
def real_embeddings_response():
"""Real OpenAI embeddings response using proper Pydantic objects."""
return OpenAIEmbeddingsResponse(
object="list",
data=[
OpenAIEmbeddingData(object="embedding", embedding=[0.1, 0.2, 0.3], index=0),
OpenAIEmbeddingData(object="embedding", embedding=[0.4, 0.5, 0.6], index=1),
],
"model": "nomic-embed-text",
"usage": {"prompt_tokens": 6, "total_tokens": 6},
}
return mock_response
model="nomic-embed-text",
usage=OpenAIEmbeddingUsage(prompt_tokens=6, total_tokens=6),
)
class TestInferenceRecording:
@ -160,11 +161,11 @@ class TestInferenceRecording:
assert retrieved["request"]["model"] == "llama3.2:3b"
assert retrieved["response"]["body"]["content"] == "test response"
async def test_recording_mode(self, temp_storage_dir, mock_openai_response):
async def test_recording_mode(self, temp_storage_dir, real_openai_chat_response):
"""Test that recording mode captures and stores responses."""
async def mock_create(*args, **kwargs):
return mock_openai_response
return real_openai_chat_response
temp_storage_dir = temp_storage_dir / "test_recording_mode"
with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
@ -188,11 +189,11 @@ class TestInferenceRecording:
assert recordings == 1
async def test_replay_mode(self, temp_storage_dir, mock_openai_response):
async def test_replay_mode(self, temp_storage_dir, real_openai_chat_response):
"""Test that replay mode returns stored responses without making real calls."""
async def mock_create(*args, **kwargs):
return mock_openai_response
return real_openai_chat_response
temp_storage_dir = temp_storage_dir / "test_replay_mode"
# First, record a response
@ -200,7 +201,7 @@ class TestInferenceRecording:
with inference_recording(mode="record", storage_dir=str(temp_storage_dir)):
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
await client.chat.completions.create(
response = await client.chat.completions.create(
model="llama3.2:3b",
messages=[{"role": "user", "content": "Hello, how are you?"}],
temperature=0.7,
@ -220,7 +221,7 @@ class TestInferenceRecording:
)
# Verify we got the recorded response
assert response["choices"][0]["message"]["content"] == "Hello! I'm doing well, thank you for asking."
assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
# Verify the original method was NOT called
mock_create_patch.assert_not_called()
@ -237,11 +238,11 @@ class TestInferenceRecording:
model="llama3.2:3b", messages=[{"role": "user", "content": "This was never recorded"}]
)
async def test_embeddings_recording(self, temp_storage_dir, mock_embeddings_response):
async def test_embeddings_recording(self, temp_storage_dir, real_embeddings_response):
"""Test recording and replay of embeddings calls."""
async def mock_create(*args, **kwargs):
return mock_embeddings_response
return real_embeddings_response
temp_storage_dir = temp_storage_dir / "test_embeddings_recording"
# Record
@ -265,17 +266,17 @@ class TestInferenceRecording:
)
# Verify we got the recorded response
assert len(response["data"]) == 2
assert response["data"][0]["embedding"] == [0.1, 0.2, 0.3]
assert len(response.data) == 2
assert response.data[0].embedding == [0.1, 0.2, 0.3]
# Verify original method was not called
mock_create_patch.assert_not_called()
async def test_live_mode(self, mock_openai_response):
async def test_live_mode(self, real_openai_chat_response):
"""Test that live mode passes through to original methods."""
async def mock_create(*args, **kwargs):
return mock_openai_response
return real_openai_chat_response
with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
with inference_recording(mode="live"):