mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-24 16:57:21 +00:00 
			
		
		
		
	IDs are now deterministic hashes based on request content, and timestamps are normalized to constants, eliminating spurious changes when re-recording tests. ## Changes - Updated `inference_recorder.py` to normalize IDs and timestamps during recording - Added `scripts/normalize_recordings.py` utility to re-normalize existing recordings - Created documentation in `tests/integration/recordings/README.md` - Normalized 350 existing recording files
		
			
				
	
	
		
			58 lines
		
	
	
	
		
			1.9 KiB
		
	
	
	
		
			JSON
		
	
	
	
	
	
			
		
		
	
	
			58 lines
		
	
	
	
		
			1.9 KiB
		
	
	
	
		
			JSON
		
	
	
	
	
	
| {
 | |
|   "request": {
 | |
|     "method": "POST",
 | |
|     "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
 | |
|     "headers": {},
 | |
|     "body": {
 | |
|       "model": "llama3.2:3b-instruct-fp16",
 | |
|       "messages": [
 | |
|         {
 | |
|           "role": "user",
 | |
|           "content": "Test trace openai with temperature 0"
 | |
|         }
 | |
|       ],
 | |
|       "max_tokens": 100,
 | |
|       "stream": false,
 | |
|       "temperature": 0.7
 | |
|     },
 | |
|     "endpoint": "/v1/chat/completions",
 | |
|     "model": "llama3.2:3b-instruct-fp16"
 | |
|   },
 | |
|   "response": {
 | |
|     "body": {
 | |
|       "__type__": "openai.types.chat.chat_completion.ChatCompletion",
 | |
|       "__data__": {
 | |
|         "id": "rec-382c2f22274c",
 | |
|         "choices": [
 | |
|           {
 | |
|             "finish_reason": "length",
 | |
|             "index": 0,
 | |
|             "logprobs": null,
 | |
|             "message": {
 | |
|               "content": "I can guide you on how to use the `test-temperature` parameter with OpenAI's API, but please note that using a temperature of 0 may not produce meaningful results. Temperature is a hyperparameter that controls the level of randomness in the model's output.\n\nOpenAI's API uses a variant of the GPT-3 model, which is trained on a large corpus of text data. The `test-temperature` parameter allows you to adjust the level of randomness in the model's output",
 | |
|               "refusal": null,
 | |
|               "role": "assistant",
 | |
|               "annotations": null,
 | |
|               "audio": null,
 | |
|               "function_call": null,
 | |
|               "tool_calls": null
 | |
|             }
 | |
|           }
 | |
|         ],
 | |
|         "created": 0,
 | |
|         "model": "llama3.2:3b-instruct-fp16",
 | |
|         "object": "chat.completion",
 | |
|         "service_tier": null,
 | |
|         "system_fingerprint": "fp_ollama",
 | |
|         "usage": {
 | |
|           "completion_tokens": 100,
 | |
|           "prompt_tokens": 33,
 | |
|           "total_tokens": 133,
 | |
|           "completion_tokens_details": null,
 | |
|           "prompt_tokens_details": null
 | |
|         }
 | |
|       }
 | |
|     },
 | |
|     "is_streaming": false
 | |
|   }
 | |
| }
 |