mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-25 09:05:37 +00:00 
			
		
		
		
	IDs are now deterministic hashes based on request content, and timestamps are normalized to constants, eliminating spurious changes when re-recording tests. ## Changes - Updated `inference_recorder.py` to normalize IDs and timestamps during recording - Added `scripts/normalize_recordings.py` utility to re-normalize existing recordings - Created documentation in `tests/integration/recordings/README.md` - Normalized 350 existing recording files
		
			
				
	
	
		
			61 lines
		
	
	
	
		
			2.1 KiB
		
	
	
	
		
			JSON
		
	
	
	
	
		
			Generated
		
	
	
			
		
		
	
	
			61 lines
		
	
	
	
		
			2.1 KiB
		
	
	
	
		
			JSON
		
	
	
	
	
		
			Generated
		
	
	
| {
 | |
|   "request": {
 | |
|     "method": "POST",
 | |
|     "url": "https://api.together.xyz/v1/v1/chat/completions",
 | |
|     "headers": {},
 | |
|     "body": {
 | |
|       "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
 | |
|       "messages": [
 | |
|         {
 | |
|           "role": "user",
 | |
|           "content": "Test trace openai with temperature 0"
 | |
|         }
 | |
|       ],
 | |
|       "max_tokens": 100,
 | |
|       "stream": false,
 | |
|       "temperature": 0.7
 | |
|     },
 | |
|     "endpoint": "/v1/chat/completions",
 | |
|     "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
 | |
|   },
 | |
|   "response": {
 | |
|     "body": {
 | |
|       "__type__": "openai.types.chat.chat_completion.ChatCompletion",
 | |
|       "__data__": {
 | |
|         "id": "rec-6841bb14fa8d",
 | |
|         "choices": [
 | |
|           {
 | |
|             "finish_reason": "length",
 | |
|             "index": 0,
 | |
|             "logprobs": null,
 | |
|             "message": {
 | |
|               "content": "When using a language model like OpenAI with a temperature of 0, the model is essentially forced to produce the most likely next word in a sequence, given the context. This means that the output will be very deterministic and less diverse, as the model is not allowed to explore less likely options.\n\nHere's an example of how this could work in practice:\n\n**Prompt:** Write a short story about a character who discovers a hidden world.\n\n**Temperature 0 Response:**\nIn a small village nestled",
 | |
|               "refusal": null,
 | |
|               "role": "assistant",
 | |
|               "annotations": null,
 | |
|               "audio": null,
 | |
|               "function_call": null,
 | |
|               "tool_calls": []
 | |
|             },
 | |
|             "seed": 9269366008132817000
 | |
|           }
 | |
|         ],
 | |
|         "created": 0,
 | |
|         "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
 | |
|         "object": "chat.completion",
 | |
|         "service_tier": null,
 | |
|         "system_fingerprint": null,
 | |
|         "usage": {
 | |
|           "completion_tokens": 100,
 | |
|           "prompt_tokens": 43,
 | |
|           "total_tokens": 143,
 | |
|           "completion_tokens_details": null,
 | |
|           "prompt_tokens_details": null,
 | |
|           "cached_tokens": 0
 | |
|         },
 | |
|         "prompt": []
 | |
|       }
 | |
|     },
 | |
|     "is_streaming": false
 | |
|   }
 | |
| }
 |