much simpler

This commit is contained in:
Ashwin Bharambe 2025-07-28 20:30:38 -07:00
parent e59c13f2b8
commit 481a893eb7
19 changed files with 6365 additions and 302 deletions

View file

@ -4,13 +4,12 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from __future__ import annotations
from __future__ import annotations # for forward references
import hashlib
import json
import os
import sqlite3
import uuid
from collections.abc import Generator
from contextlib import contextmanager
from pathlib import Path
@ -28,78 +27,18 @@ def normalize_request(method: str, url: str, headers: dict[str, Any], body: dict
from urllib.parse import urlparse
parsed = urlparse(url)
endpoint = parsed.path
normalized = {"method": method.upper(), "endpoint": parsed.path, "body": body}
# Create normalized request dict
normalized: dict[str, Any] = {
"method": method.upper(),
"endpoint": endpoint,
}
# Normalize body parameters
if body:
# Handle model parameter
if "model" in body:
normalized["model"] = body["model"]
# Handle messages (normalize whitespace)
if "messages" in body:
normalized_messages = []
for msg in body["messages"]:
normalized_msg = dict(msg)
if "content" in normalized_msg and isinstance(normalized_msg["content"], str):
# Normalize whitespace
normalized_msg["content"] = " ".join(normalized_msg["content"].split())
normalized_messages.append(normalized_msg)
normalized["messages"] = normalized_messages
# Handle other parameters (sort for consistency)
other_params = {}
for key, value in body.items():
if key not in ["model", "messages"]:
if isinstance(value, float):
# Round floats to 6 decimal places
other_params[key] = round(value, 6)
else:
other_params[key] = value
if other_params:
# Sort dictionary keys for consistent hashing
normalized["parameters"] = dict(sorted(other_params.items()))
# Create hash
# Create hash - sort_keys=True ensures deterministic ordering
normalized_json = json.dumps(normalized, sort_keys=True)
return hashlib.sha256(normalized_json.encode()).hexdigest()
def get_current_test_id() -> str:
"""Extract test ID from pytest context or fall back to environment/generated ID."""
# Try to get from pytest context
try:
import _pytest.fixtures
if hasattr(_pytest.fixtures, "_current_request") and _pytest.fixtures._current_request:
request = _pytest.fixtures._current_request
if hasattr(request, "node"):
# Use the test node ID as our test identifier
node_id: str = request.node.nodeid
# Clean up the node ID to be filesystem-safe
test_id = node_id.replace("/", "_").replace("::", "_").replace(".py", "")
return test_id
except AttributeError:
pass
# Fall back to environment-based or generated ID
return os.environ.get("LLAMA_STACK_TEST_ID", f"test_{uuid.uuid4().hex[:8]}")
def get_inference_mode() -> str:
"""Get the inference recording mode from environment variables."""
return os.environ.get("LLAMA_STACK_INFERENCE_MODE", "live").lower()
def setup_inference_recording():
"""Convenience function to set up inference recording based on environment variables."""
mode = get_inference_mode()
if mode not in ["live", "record", "replay"]:
@ -113,14 +52,14 @@ def setup_inference_recording():
return live_mode()
test_id = get_current_test_id()
storage_dir = os.environ.get("LLAMA_STACK_RECORDING_DIR", str(Path.home() / ".llama" / "recordings"))
if "LLAMA_STACK_RECORDING_DIR" not in os.environ:
raise ValueError("LLAMA_STACK_RECORDING_DIR must be set for recording or replaying")
storage_dir = os.environ["LLAMA_STACK_RECORDING_DIR"]
return inference_recording(mode=mode, test_id=test_id, storage_dir=storage_dir)
return inference_recording(mode=mode, storage_dir=storage_dir)
def _serialize_response(response: Any) -> Any:
"""Serialize OpenAI response objects to JSON-compatible format."""
if hasattr(response, "model_dump"):
return response.model_dump()
elif hasattr(response, "__dict__"):
@ -130,19 +69,14 @@ def _serialize_response(response: Any) -> Any:
def _deserialize_response(data: dict[str, Any]) -> dict[str, Any]:
"""Deserialize response data back to a dict format."""
# For simplicity, just return the dict - this preserves all the data
# The original response structure is sufficient for replaying
return data
class ResponseStorage:
"""Handles SQLite index + JSON file storage/retrieval for inference recordings."""
def __init__(self, base_dir: Path, test_id: str):
self.base_dir = base_dir
self.test_id = test_id
self.test_dir = base_dir / test_id
def __init__(self, test_dir: Path):
self.test_dir = test_dir
self.responses_dir = self.test_dir / "responses"
self.db_path = self.test_dir / "index.sqlite"
@ -234,37 +168,55 @@ class ResponseStorage:
return cast(dict[str, Any], data)
async def _patched_create_method(original_method, self, **kwargs):
"""Patched version of OpenAI client create methods."""
async def _patched_inference_method(original_method, self, client_type, method_name=None, **kwargs):
global _current_mode, _current_storage
if _current_mode == "live" or _current_storage is None:
# Normal operation
return await original_method(self, **kwargs)
# Get base URL from the client
base_url = str(self._client.base_url)
# Get base URL and endpoint based on client type
if client_type == "openai":
base_url = str(self._client.base_url)
# Determine endpoint based on the method's module/class path
method_str = str(original_method)
if "chat.completions" in method_str:
endpoint = "/v1/chat/completions"
elif "embeddings" in method_str:
endpoint = "/v1/embeddings"
elif "completions" in method_str:
endpoint = "/v1/completions"
else:
# Fallback - try to guess from the self object
if hasattr(self, "_resource") and hasattr(self._resource, "_resource"):
resource_name = getattr(self._resource._resource, "_resource", "unknown")
if "chat" in str(resource_name):
endpoint = "/v1/chat/completions"
elif "embeddings" in str(resource_name):
endpoint = "/v1/embeddings"
# Determine endpoint based on the method's module/class path
method_str = str(original_method)
if "chat.completions" in method_str:
endpoint = "/v1/chat/completions"
elif "embeddings" in method_str:
endpoint = "/v1/embeddings"
elif "completions" in method_str:
endpoint = "/v1/completions"
else:
# Fallback - try to guess from the self object
if hasattr(self, "_resource") and hasattr(self._resource, "_resource"):
resource_name = getattr(self._resource._resource, "_resource", "unknown")
if "chat" in str(resource_name):
endpoint = "/v1/chat/completions"
elif "embeddings" in str(resource_name):
endpoint = "/v1/embeddings"
else:
endpoint = "/v1/completions"
else:
endpoint = "/v1/completions"
elif client_type == "ollama":
# Get base URL from the client (Ollama client uses host attribute)
base_url = getattr(self, "host", "http://localhost:11434")
if not base_url.startswith("http"):
base_url = f"http://{base_url}"
# Determine endpoint based on method name
if method_name == "generate":
endpoint = "/api/generate"
elif method_name == "chat":
endpoint = "/api/chat"
elif method_name == "embed":
endpoint = "/api/embeddings"
else:
endpoint = "/v1/completions"
endpoint = f"/api/{method_name}"
else:
raise ValueError(f"Unknown client type: {client_type}")
url = base_url.rstrip("/") + endpoint
@ -276,15 +228,12 @@ async def _patched_create_method(original_method, self, **kwargs):
request_hash = normalize_request(method, url, headers, body)
if _current_mode == "replay":
# Try to find recorded response
recording = _current_storage.find_recording(request_hash)
if recording:
# Return recorded response
response_body = recording["response"]["body"]
# Handle streaming responses
if recording["response"].get("is_streaming", False):
# For streaming, we need to return an async iterator
async def replay_stream():
for chunk in response_body:
yield chunk
@ -301,110 +250,8 @@ async def _patched_create_method(original_method, self, **kwargs):
)
elif _current_mode == "record":
# Make real request and record it
response = await original_method(self, **kwargs)
# Store the recording
request_data = {
"method": method,
"url": url,
"headers": headers,
"body": body,
"endpoint": endpoint,
"model": body.get("model", ""),
}
# Determine if this is a streaming request based on request parameters
is_streaming = body.get("stream", False)
if is_streaming:
# For streaming responses, we need to collect all chunks immediately before yielding
# This ensures the recording is saved even if the generator isn't fully consumed
chunks = []
async for chunk in response:
chunks.append(chunk)
# Store the recording immediately
response_data = {"body": chunks, "is_streaming": True}
_current_storage.store_recording(request_hash, request_data, response_data)
# Return a generator that replays the stored chunks
async def replay_recorded_stream():
for chunk in chunks:
yield chunk
return replay_recorded_stream()
else:
response_data = {"body": response, "is_streaming": False}
_current_storage.store_recording(request_hash, request_data, response_data)
return response
else:
return await original_method(self, **kwargs)
async def _patched_ollama_method(original_method, self, method_name, **kwargs):
"""Patched version of Ollama AsyncClient methods."""
global _current_mode, _current_storage
if _current_mode == "live" or _current_storage is None:
# Normal operation
return await original_method(self, **kwargs)
# Get base URL from the client (Ollama client uses host attribute)
base_url = getattr(self, "host", "http://localhost:11434")
if not base_url.startswith("http"):
base_url = f"http://{base_url}"
# Determine endpoint based on method name
if method_name == "generate":
endpoint = "/api/generate"
elif method_name == "chat":
endpoint = "/api/chat"
elif method_name == "embed":
endpoint = "/api/embeddings"
else:
endpoint = f"/api/{method_name}"
url = base_url.rstrip("/") + endpoint
# Normalize request for matching
method = "POST"
headers = {}
body = kwargs
request_hash = normalize_request(method, url, headers, body)
if _current_mode == "replay":
# Try to find recorded response
recording = _current_storage.find_recording(request_hash)
if recording:
# Return recorded response
response_body = recording["response"]["body"]
# Handle streaming responses for Ollama
if recording["response"].get("is_streaming", False):
# For streaming, we need to return an async iterator
async def replay_ollama_stream():
for chunk in response_body:
yield chunk
return replay_ollama_stream()
else:
return response_body
else:
raise RuntimeError(
f"No recorded response found for request hash: {request_hash}\n"
f"Endpoint: {endpoint}\n"
f"Model: {body.get('model', 'unknown')}\n"
f"To record this response, run with LLAMA_STACK_INFERENCE_MODE=record"
)
elif _current_mode == "record":
# Make real request and record it
response = await original_method(self, **kwargs)
# Store the recording
request_data = {
"method": method,
"url": url,
@ -448,45 +295,31 @@ def patch_inference_clients():
global _original_methods
# Import here to avoid circular imports
from openai import AsyncOpenAI
# Also import Ollama AsyncClient
from ollama import AsyncClient as OllamaAsyncClient
from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
from openai.resources.completions import AsyncCompletions
from openai.resources.embeddings import AsyncEmbeddings
# Also import Ollama AsyncClient
try:
from ollama import AsyncClient as OllamaAsyncClient
except ImportError:
ollama_async_client = None
else:
ollama_async_client = OllamaAsyncClient
# Store original methods for both OpenAI and Ollama clients
_original_methods = {
"chat_completions_create": AsyncChatCompletions.create,
"completions_create": AsyncCompletions.create,
"embeddings_create": AsyncEmbeddings.create,
"ollama_generate": OllamaAsyncClient.generate,
"ollama_chat": OllamaAsyncClient.chat,
"ollama_embed": OllamaAsyncClient.embed,
}
# Add Ollama client methods if available
if ollama_async_client:
_original_methods.update(
{
"ollama_generate": ollama_async_client.generate,
"ollama_chat": ollama_async_client.chat,
"ollama_embed": ollama_async_client.embed,
}
)
# Create patched methods for OpenAI client
async def patched_chat_completions_create(self, **kwargs):
return await _patched_create_method(_original_methods["chat_completions_create"], self, **kwargs)
return await _patched_inference_method(_original_methods["chat_completions_create"], self, "openai", **kwargs)
async def patched_completions_create(self, **kwargs):
return await _patched_create_method(_original_methods["completions_create"], self, **kwargs)
return await _patched_inference_method(_original_methods["completions_create"], self, "openai", **kwargs)
async def patched_embeddings_create(self, **kwargs):
return await _patched_create_method(_original_methods["embeddings_create"], self, **kwargs)
return await _patched_inference_method(_original_methods["embeddings_create"], self, "openai", **kwargs)
# Apply OpenAI patches
AsyncChatCompletions.create = patched_chat_completions_create
@ -494,40 +327,21 @@ def patch_inference_clients():
AsyncEmbeddings.create = patched_embeddings_create
# Create patched methods for Ollama client
if ollama_async_client:
async def patched_ollama_generate(self, **kwargs):
return await _patched_inference_method(
_original_methods["ollama_generate"], self, "ollama", "generate", **kwargs
)
async def patched_ollama_generate(self, **kwargs):
return await _patched_ollama_method(_original_methods["ollama_generate"], self, "generate", **kwargs)
async def patched_ollama_chat(self, **kwargs):
return await _patched_inference_method(_original_methods["ollama_chat"], self, "ollama", "chat", **kwargs)
async def patched_ollama_chat(self, **kwargs):
return await _patched_ollama_method(_original_methods["ollama_chat"], self, "chat", **kwargs)
async def patched_ollama_embed(self, **kwargs):
return await _patched_inference_method(_original_methods["ollama_embed"], self, "ollama", "embed", **kwargs)
async def patched_ollama_embed(self, **kwargs):
return await _patched_ollama_method(_original_methods["ollama_embed"], self, "embed", **kwargs)
# Apply Ollama patches
ollama_async_client.generate = patched_ollama_generate
ollama_async_client.chat = patched_ollama_chat
ollama_async_client.embed = patched_ollama_embed
# Also try to patch the AsyncOpenAI __init__ to trace client creation
original_openai_init = AsyncOpenAI.__init__
def patched_openai_init(self, *args, **kwargs):
result = original_openai_init(self, *args, **kwargs)
# After client is created, try to re-patch its methods
if hasattr(self, "chat") and hasattr(self.chat, "completions"):
original_chat_create = self.chat.completions.create
async def instance_patched_chat_create(**kwargs):
return await _patched_create_method(original_chat_create, self.chat.completions, **kwargs)
self.chat.completions.create = instance_patched_chat_create
return result
AsyncOpenAI.__init__ = patched_openai_init
# Apply Ollama patches
OllamaAsyncClient.generate = patched_ollama_generate
OllamaAsyncClient.chat = patched_ollama_chat
OllamaAsyncClient.embed = patched_ollama_embed
def unpatch_inference_clients():
@ -538,43 +352,26 @@ def unpatch_inference_clients():
return
# Import here to avoid circular imports
from ollama import AsyncClient as OllamaAsyncClient
from openai.resources.chat.completions import AsyncCompletions as AsyncChatCompletions
from openai.resources.completions import AsyncCompletions
from openai.resources.embeddings import AsyncEmbeddings
# Restore OpenAI client methods
if "chat_completions_create" in _original_methods:
AsyncChatCompletions.create = _original_methods["chat_completions_create"]
if "completions_create" in _original_methods:
AsyncCompletions.create = _original_methods["completions_create"]
if "embeddings_create" in _original_methods:
AsyncEmbeddings.create = _original_methods["embeddings_create"]
AsyncChatCompletions.create = _original_methods["chat_completions_create"]
AsyncCompletions.create = _original_methods["completions_create"]
AsyncEmbeddings.create = _original_methods["embeddings_create"]
# Restore Ollama client methods if they were patched
try:
from ollama import AsyncClient as OllamaAsyncClient
if "ollama_generate" in _original_methods:
OllamaAsyncClient.generate = _original_methods["ollama_generate"]
if "ollama_chat" in _original_methods:
OllamaAsyncClient.chat = _original_methods["ollama_chat"]
if "ollama_embed" in _original_methods:
OllamaAsyncClient.embed = _original_methods["ollama_embed"]
except ImportError:
pass
OllamaAsyncClient.generate = _original_methods["ollama_generate"]
OllamaAsyncClient.chat = _original_methods["ollama_chat"]
OllamaAsyncClient.embed = _original_methods["ollama_embed"]
_original_methods.clear()
@contextmanager
def inference_recording(
mode: str = "live", test_id: str | None = None, storage_dir: str | Path | None = None
) -> Generator[None, None, None]:
def inference_recording(mode: str = "live", storage_dir: str | Path | None = None) -> Generator[None, None, None]:
"""Context manager for inference recording/replaying."""
global _current_mode, _current_storage
@ -584,9 +381,6 @@ def inference_recording(
else:
storage_dir_path = Path(storage_dir)
if test_id is None:
test_id = f"test_{uuid.uuid4().hex[:8]}"
# Store previous state
prev_mode = _current_mode
prev_storage = _current_storage
@ -595,7 +389,7 @@ def inference_recording(
_current_mode = mode
if mode in ["record", "replay"]:
_current_storage = ResponseStorage(storage_dir_path, test_id)
_current_storage = ResponseStorage(storage_dir_path)
patch_inference_clients()
yield

Binary file not shown.

View file

@ -0,0 +1,38 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "<|begin_of_text|>Complete the sentence using one word: Roses are red, violets are ",
"raw": true,
"options": {
"temperature": 0.0,
"max_tokens": 50,
"num_predict": 50
},
"stream": false
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:53.555099Z",
"done": true,
"done_reason": "stop",
"total_duration": 2124168875,
"load_duration": 58506875,
"prompt_eval_count": 18,
"prompt_eval_duration": 70072583,
"eval_count": 43,
"eval_duration": 1994446917,
"response": " _______.\n\nThe best answer is blue. The traditional nursery rhyme goes like this:\n\nRoses are red,\nViolets are blue,\nSugar is sweet,\nAnd so are you! (Or something similar.)",
"thinking": null,
"context": null
},
"is_streaming": false
}
}

View file

@ -0,0 +1,36 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"raw": true,
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhich planet do humans live on?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"options": {
"temperature": 0.0
},
"stream": false
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.535525Z",
"done": true,
"done_reason": "stop",
"total_duration": 358691334,
"load_duration": 76787334,
"prompt_eval_count": 23,
"prompt_eval_duration": 72235375,
"eval_count": 6,
"eval_duration": 208986666,
"response": "Humans live on Earth.",
"thinking": null,
"context": null
},
"is_streaming": false
}
}

View file

@ -0,0 +1,188 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"raw": true,
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat's the name of the Sun in latin?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"options": {
"temperature": 0.0
},
"stream": true
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.691771Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "The",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.732262Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Latin",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.77294Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " word",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.814484Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " for",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.854875Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " \"",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.895957Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "Sun",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.937445Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "\"",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.978832Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " is",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:58.019242Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Sol",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:58.059902Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ".",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:58.100535Z",
"done": true,
"done_reason": "stop",
"total_duration": 528254250,
"load_duration": 50177125,
"prompt_eval_count": 26,
"prompt_eval_duration": 68018458,
"eval_count": 11,
"eval_duration": 409555959,
"response": "",
"thinking": null,
"context": null
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,188 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"raw": true,
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n {\n \"name\": \"get_weather\",\n \"description\": \"Get the current weather\",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"location\"],\n \"properties\": {\n \"location\": {\n \"type\": \"string\",\n \"description\": \"The city and state (both required), e.g. San Francisco, CA.\"\n }\n }\n }\n }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nPretend you are a weather assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat's the weather like in San Francisco?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"options": {
"temperature": 0.0
},
"stream": true
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.480955Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "[",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.527418Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "get",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.571522Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "_weather",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.615027Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "(location",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.660598Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "=\"",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.705052Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "San",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.754386Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Francisco",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.796942Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ",",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.845807Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " CA",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.891254Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "\")]",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.934197Z",
"done": true,
"done_reason": "stop",
"total_duration": 574307083,
"load_duration": 72062083,
"prompt_eval_count": 324,
"prompt_eval_duration": 47115625,
"eval_count": 11,
"eval_duration": 454426708,
"response": "",
"thinking": null,
"context": null
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,83 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"raw": true,
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nPlease give me information about Michael Jordan.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nPlease respond in JSON format with the schema: {\"$defs\": {\"NBAStats\": {\"properties\": {\"year_for_draft\": {\"title\": \"Year For Draft\", \"type\": \"integer\"}, \"num_seasons_in_nba\": {\"title\": \"Num Seasons In Nba\", \"type\": \"integer\"}}, \"required\": [\"year_for_draft\", \"num_seasons_in_nba\"], \"title\": \"NBAStats\", \"type\": \"object\"}}, \"properties\": {\"first_name\": {\"title\": \"First Name\", \"type\": \"string\"}, \"last_name\": {\"title\": \"Last Name\", \"type\": \"string\"}, \"year_of_birth\": {\"title\": \"Year Of Birth\", \"type\": \"integer\"}, \"nba_stats\": {\"$ref\": \"#/$defs/NBAStats\"}}, \"required\": [\"first_name\", \"last_name\", \"year_of_birth\", \"nba_stats\"], \"title\": \"AnswerFormat\", \"type\": \"object\"}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"format": {
"$defs": {
"NBAStats": {
"properties": {
"year_for_draft": {
"title": "Year For Draft",
"type": "integer"
},
"num_seasons_in_nba": {
"title": "Num Seasons In Nba",
"type": "integer"
}
},
"required": [
"year_for_draft",
"num_seasons_in_nba"
],
"title": "NBAStats",
"type": "object"
}
},
"properties": {
"first_name": {
"title": "First Name",
"type": "string"
},
"last_name": {
"title": "Last Name",
"type": "string"
},
"year_of_birth": {
"title": "Year Of Birth",
"type": "integer"
},
"nba_stats": {
"$ref": "#/$defs/NBAStats"
}
},
"required": [
"first_name",
"last_name",
"year_of_birth",
"nba_stats"
],
"title": "AnswerFormat",
"type": "object"
},
"options": {
"temperature": 0.0
},
"stream": false
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:13.438182Z",
"done": true,
"done_reason": "stop",
"total_duration": 2975265833,
"load_duration": 95592083,
"prompt_eval_count": 259,
"prompt_eval_duration": 367103709,
"eval_count": 60,
"eval_duration": 2511576708,
"response": "{\n \"first_name\": \"Michael\",\n \"last_name\": \"Jordan\",\n \"year_of_birth\": 1963,\n \"nba_stats\": {\n \"year_for_draft\": 1984,\n \"num_seasons_in_nba\": 15\n }\n}",
"thinking": null,
"context": null
},
"is_streaming": false
}
}

View file

@ -0,0 +1,323 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"raw": true,
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the name of the US captial?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"options": {
"temperature": 0.0
},
"stream": true
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.227427Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "The",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.275725Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " capital",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.316195Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " of",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.356832Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " the",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.397682Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " United",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.438761Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " States",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.480453Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " is",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.523691Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Washington",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.565106Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ",",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.606315Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " D",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.647209Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ".C",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.687828Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ".",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.728386Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " (",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.769091Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "short",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.809726Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " for",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.850489Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " District",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.89147Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " of",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.932311Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Columbia",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:21.973566Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ").",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:22.014466Z",
"done": true,
"done_reason": "stop",
"total_duration": 1034011167,
"load_duration": 176591709,
"prompt_eval_count": 26,
"prompt_eval_duration": 68104583,
"eval_count": 20,
"eval_duration": 788670334,
"response": "",
"thinking": null,
"context": null
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,36 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"raw": true,
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhich planet has rings around it with a name starting with letter S?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"options": {
"temperature": 0.0
},
"stream": false
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:20.924128Z",
"done": true,
"done_reason": "stop",
"total_duration": 3308469666,
"load_duration": 66702250,
"prompt_eval_count": 30,
"prompt_eval_duration": 391410334,
"eval_count": 70,
"eval_duration": 2849497291,
"response": "The answer is Saturn! Saturn's ring system is one of the most iconic and well-known in our solar system. The rings are made up of ice particles, rock debris, and dust that orbit around the planet due to its gravitational pull.\n\nWould you like to know more about Saturn's rings or is there something else I can help you with?",
"thinking": null,
"context": null
},
"is_streaming": false
}
}

View file

@ -0,0 +1,61 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "<|begin_of_text|>Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003.Please respond in JSON format with the schema: {\"properties\": {\"name\": {\"title\": \"Name\", \"type\": \"string\"}, \"year_born\": {\"title\": \"Year Born\", \"type\": \"string\"}, \"year_retired\": {\"title\": \"Year Retired\", \"type\": \"string\"}}, \"required\": [\"name\", \"year_born\", \"year_retired\"], \"title\": \"AnswerFormat\", \"type\": \"object\"}",
"raw": true,
"format": {
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"year_born": {
"title": "Year Born",
"type": "string"
},
"year_retired": {
"title": "Year Retired",
"type": "string"
}
},
"required": [
"name",
"year_born",
"year_retired"
],
"title": "AnswerFormat",
"type": "object"
},
"options": {
"temperature": 0.0,
"max_tokens": 50,
"num_predict": 50
},
"stream": false
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:57.15491Z",
"done": true,
"done_reason": "stop",
"total_duration": 1570055875,
"load_duration": 87677125,
"prompt_eval_count": 119,
"prompt_eval_duration": 190281458,
"eval_count": 29,
"eval_duration": 1291217083,
"response": "{ \"name\": \"Michael Jordan\", \"year_born\": \"1963\", \"year_retired\": \"2003\"}\n ",
"thinking": null,
"context": null
},
"is_streaming": false
}
}

View file

@ -0,0 +1,836 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/v1/v1/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
"max_tokens": 50,
"stream": true
},
"endpoint": "/v1/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "blue"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ".\n\n"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "This"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " is"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " a"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " reference"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " to"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " the"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " traditional"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " English"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " nursery"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " rhyme"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ","
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " \""
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "R"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "oses"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " Are"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " Red"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ","
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " V"
}
],
"created": 1753762609,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "io"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "lets"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " Are"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " Blue"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ".\""
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " The"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " completed"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " version"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " of"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " the"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " rhyme"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " typically"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " goes"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " like"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " this"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ":\n\n"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "R"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "oses"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " are"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " red"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ",\n"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "V"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "io"
}
],
"created": 1753762610,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "lets"
}
],
"created": 1753762611,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " are"
}
],
"created": 1753762611,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " blue"
}
],
"created": 1753762611,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ".\n"
}
],
"created": 1753762611,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "Sugar"
}
],
"created": 1753762611,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " is"
}
],
"created": 1753762611,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " sweet"
}
],
"created": 1753762611,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
},
{
"id": "cmpl-808",
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"text": ""
}
],
"created": 1753762611,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": null
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,36 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"raw": true,
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n {\n \"name\": \"get_weather\",\n \"description\": \"Get the current weather\",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"location\"],\n \"properties\": {\n \"location\": {\n \"type\": \"string\",\n \"description\": \"The city and state (both required), e.g. San Francisco, CA.\"\n }\n }\n }\n }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nPretend you are a weather assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat's the weather like in San Francisco?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"options": {
"temperature": 0.0
},
"stream": false
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:59.104609Z",
"done": true,
"done_reason": "stop",
"total_duration": 948932208,
"load_duration": 68549542,
"prompt_eval_count": 324,
"prompt_eval_duration": 460136875,
"eval_count": 11,
"eval_duration": 419553208,
"response": "[get_weather(location=\"San Francisco, CA\")]",
"thinking": null,
"context": null
},
"is_streaming": false
}
}

View file

@ -0,0 +1,39 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/v1/v1/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
"stream": false
},
"endpoint": "/v1/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"id": "cmpl-68",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "Blue.\n\nThe completed quote is a well-known poetic phrase often used as a tongue-in-cheek romantic gesture. However, it's worth noting that true violets are actually purple in color, not blue. This phrase is a playful variation of the traditional \"Roses are red, violets are blue,\" which typically goes like this:\n\n\"Roses are red, violets are blue,\nSugar is sweet, and so are you.\"\n\nThis original quote has been used for centuries to make a lighthearted, whimsical compliment in poetry, songs, and spoken words."
}
],
"created": 1753762608,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 120,
"prompt_tokens": 50,
"total_tokens": 170,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,670 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "<|begin_of_text|>Complete the sentence using one word: Roses are red, violets are ",
"raw": true,
"options": {
"temperature": 0.0,
"max_tokens": 50,
"num_predict": 50
},
"stream": true
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:53.717175Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " ______",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:53.759811Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "_",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:53.802135Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ".\n\n",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:53.843818Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "The",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:53.8848Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " best",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:53.926824Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " answer",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:53.96764Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " is",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.008868Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " blue",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.049584Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ".",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.090467Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " The",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.131216Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " traditional",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.171811Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " nursery",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.212449Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " rhyme",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.254055Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " goes",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.296182Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " like",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.339421Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " this",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.380632Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ":\n\n",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.423681Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "R",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.466032Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "oses",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.508317Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " are",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.551009Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " red",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.595853Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ",\n",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.638044Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "V",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.679396Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "io",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.72096Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "lets",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.763977Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " are",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.80598Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " blue",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.847977Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ",\n",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.890637Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "Sugar",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.931597Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " is",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:54.972266Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " sweet",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:55.01467Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ",\n",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:55.055561Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "And",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:55.097823Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " so",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:55.139389Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " are",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:55.181536Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " you",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:55.224644Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "!",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:55.267976Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " (",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:55.311629Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "Or",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:55.355343Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " something",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:55.396541Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " similar",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:55.437899Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ".)",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:16:55.479038Z",
"done": true,
"done_reason": "stop",
"total_duration": 1881416167,
"load_duration": 69754000,
"prompt_eval_count": 18,
"prompt_eval_duration": 46361125,
"eval_count": 43,
"eval_duration": 1762259458,
"response": "",
"thinking": null,
"context": null
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,36 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"raw": true,
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n {\n \"name\": \"get_object_namespace_list\",\n \"description\": \"Get the list of objects in a namespace\",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"kind\", \"namespace\"],\n \"properties\": {\n \"kind\": {\n \"type\": \"string\",\n \"description\": \"the type of object\"\n },\n \"namespace\": {\n \"type\": \"string\",\n \"description\": \"the name of the namespace\"\n }\n }\n }\n }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat pods are in the namespace openshift-lightspeed?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_object_namespace_list(kind=\"pod\", namespace=\"openshift-lightspeed\")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nthe objects are pod1, pod2, pod3<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"options": {
"temperature": 0.0
},
"stream": false
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:22.73932Z",
"done": true,
"done_reason": "stop",
"total_duration": 660872000,
"load_duration": 76282083,
"prompt_eval_count": 386,
"prompt_eval_duration": 541896167,
"eval_count": 2,
"eval_duration": 42127791,
"response": "[]",
"thinking": null,
"context": null
},
"is_streaming": false
}
}

View file

@ -0,0 +1,188 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"raw": true,
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n {\n \"name\": \"get_weather\",\n \"description\": \"Get the current weather\",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"location\"],\n \"properties\": {\n \"location\": {\n \"type\": \"string\",\n \"description\": \"The city and state (both required), e.g. San Francisco, CA.\"\n }\n }\n }\n }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nPretend you are a weather assistant.\nYou MUST use one of the provided functions/tools to answer the user query.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat's the weather like in San Francisco?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"options": {
"temperature": 0.0
},
"stream": true
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.217546Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "[",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.267879Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "get",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.315525Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "_weather",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.362669Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "(location",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.406139Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "=\"",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.450302Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "San",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.496893Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " Francisco",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.540977Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": ",",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.586272Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": " CA",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.631743Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "\")]",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:00.676251Z",
"done": true,
"done_reason": "stop",
"total_duration": 682827167,
"load_duration": 111852875,
"prompt_eval_count": 339,
"prompt_eval_duration": 109521833,
"eval_count": 11,
"eval_duration": 460495042,
"response": "",
"thinking": null,
"context": null
}
],
"is_streaming": true
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,53 @@
{
"request": {
"method": "POST",
"url": "http://localhost:11434/api/generate",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"raw": true,
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant. You have access to functions, but you should only use them if they are required.\nYou are an expert in composing functions. You are given a question and a set of possible functions.\nBased on the question, you may or may not need to make one function/tool call to achieve the purpose.\n\nIf you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\nIf you decide to invoke a function, you SHOULD NOT include any other text in the response. besides the function call in the above format.\nFor a boolean parameter, be sure to use `True` or `False` (capitalized) for the value.\n\n\nHere is a list of functions in JSON format that you can invoke.\n\n[\n {\n \"name\": \"get_object_namespace_list\",\n \"description\": \"Get the list of objects in a namespace\",\n \"parameters\": {\n \"type\": \"dict\",\n \"required\": [\"kind\", \"namespace\"],\n \"properties\": {\n \"kind\": {\n \"type\": \"string\",\n \"description\": \"the type of object\"\n },\n \"namespace\": {\n \"type\": \"string\",\n \"description\": \"the name of the namespace\"\n }\n }\n }\n }\n]\n\nYou can answer general questions or invoke tools when necessary.\nIn addition to tool calls, you should also augment your responses by using the tool outputs.\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat pods are in the namespace openshift-lightspeed?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_object_namespace_list(kind=\"pod\", namespace=\"openshift-lightspeed\")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nthe objects are pod1, pod2, pod3<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
"options": {
"temperature": 0.0
},
"stream": true
},
"endpoint": "/api/generate",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:14.122273Z",
"done": false,
"done_reason": null,
"total_duration": null,
"load_duration": null,
"prompt_eval_count": null,
"prompt_eval_duration": null,
"eval_count": null,
"eval_duration": null,
"response": "[]",
"thinking": null,
"context": null
},
{
"model": "llama3.2:3b-instruct-fp16",
"created_at": "2025-07-29T04:17:14.165968Z",
"done": true,
"done_reason": "stop",
"total_duration": 663520959,
"load_duration": 67474917,
"prompt_eval_count": 386,
"prompt_eval_duration": 545132042,
"eval_count": 2,
"eval_duration": 50234083,
"response": "",
"thinking": null,
"context": null
}
],
"is_streaming": true
}
}

View file

@ -105,8 +105,8 @@ class TestInferenceRecording:
assert hash1 != hash3
def test_request_normalization_edge_cases(self):
"""Test request normalization handles edge cases correctly."""
# Test whitespace normalization
"""Test request normalization is precise about request content."""
# Test that different whitespace produces different hashes (no normalization)
hash1 = normalize_request(
"POST",
"http://test/v1/chat/completions",
@ -116,16 +116,17 @@ class TestInferenceRecording:
hash2 = normalize_request(
"POST", "http://test/v1/chat/completions", {}, {"messages": [{"role": "user", "content": "Hello world"}]}
)
assert hash1 == hash2
assert hash1 != hash2 # Different whitespace should produce different hashes
# Test float precision normalization
# Test that different float precision produces different hashes (no rounding)
hash3 = normalize_request("POST", "http://test/v1/chat/completions", {}, {"temperature": 0.7000001})
hash4 = normalize_request("POST", "http://test/v1/chat/completions", {}, {"temperature": 0.7})
assert hash3 == hash4
assert hash3 != hash4 # Different precision should produce different hashes
def test_response_storage(self, temp_storage_dir):
"""Test the ResponseStorage class."""
storage = ResponseStorage(temp_storage_dir, "test_storage")
temp_storage_dir = temp_storage_dir / "test_response_storage"
storage = ResponseStorage(temp_storage_dir)
# Test directory creation
assert storage.test_dir.exists()
@ -161,13 +162,13 @@ class TestInferenceRecording:
async def test_recording_mode(self, temp_storage_dir, mock_openai_response):
"""Test that recording mode captures and stores responses."""
test_id = "test_recording_mode"
async def mock_create(*args, **kwargs):
return mock_openai_response
temp_storage_dir = temp_storage_dir / "test_recording_mode"
with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
with inference_recording(mode="record", test_id=test_id, storage_dir=str(temp_storage_dir)):
with inference_recording(mode="record", storage_dir=str(temp_storage_dir)):
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
response = await client.chat.completions.create(
@ -181,7 +182,7 @@ class TestInferenceRecording:
assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
# Verify recording was stored
storage = ResponseStorage(temp_storage_dir, test_id)
storage = ResponseStorage(temp_storage_dir)
with sqlite3.connect(storage.db_path) as conn:
recordings = conn.execute("SELECT COUNT(*) FROM recordings").fetchone()[0]
@ -189,14 +190,14 @@ class TestInferenceRecording:
async def test_replay_mode(self, temp_storage_dir, mock_openai_response):
"""Test that replay mode returns stored responses without making real calls."""
test_id = "test_replay_mode"
async def mock_create(*args, **kwargs):
return mock_openai_response
temp_storage_dir = temp_storage_dir / "test_replay_mode"
# First, record a response
with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
with inference_recording(mode="record", test_id=test_id, storage_dir=str(temp_storage_dir)):
with inference_recording(mode="record", storage_dir=str(temp_storage_dir)):
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
await client.chat.completions.create(
@ -208,7 +209,7 @@ class TestInferenceRecording:
# Now test replay mode - should not call the original method
with patch("openai.resources.chat.completions.AsyncCompletions.create") as mock_create_patch:
with inference_recording(mode="replay", test_id=test_id, storage_dir=str(temp_storage_dir)):
with inference_recording(mode="replay", storage_dir=str(temp_storage_dir)):
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
response = await client.chat.completions.create(
@ -226,10 +227,9 @@ class TestInferenceRecording:
async def test_replay_missing_recording(self, temp_storage_dir):
"""Test that replay mode fails when no recording is found."""
test_id = "test_missing_recording"
temp_storage_dir = temp_storage_dir / "test_replay_missing_recording"
with patch("openai.resources.chat.completions.AsyncCompletions.create"):
with inference_recording(mode="replay", test_id=test_id, storage_dir=str(temp_storage_dir)):
with inference_recording(mode="replay", storage_dir=str(temp_storage_dir)):
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
with pytest.raises(RuntimeError, match="No recorded response found"):
@ -239,14 +239,14 @@ class TestInferenceRecording:
async def test_embeddings_recording(self, temp_storage_dir, mock_embeddings_response):
"""Test recording and replay of embeddings calls."""
test_id = "test_embeddings"
async def mock_create(*args, **kwargs):
return mock_embeddings_response
temp_storage_dir = temp_storage_dir / "test_embeddings_recording"
# Record
with patch("openai.resources.embeddings.AsyncEmbeddings.create", side_effect=mock_create):
with inference_recording(mode="record", test_id=test_id, storage_dir=str(temp_storage_dir)):
with inference_recording(mode="record", storage_dir=str(temp_storage_dir)):
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
response = await client.embeddings.create(
@ -257,7 +257,7 @@ class TestInferenceRecording:
# Replay
with patch("openai.resources.embeddings.AsyncEmbeddings.create") as mock_create_patch:
with inference_recording(mode="replay", test_id=test_id, storage_dir=str(temp_storage_dir)):
with inference_recording(mode="replay", storage_dir=str(temp_storage_dir)):
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
response = await client.embeddings.create(