mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-26 09:15:40 +00:00 
			
		
		
		
	Continue to build on top of https://github.com/meta-llama/llama-stack/pull/2941 ## Test Plan Run server with `LLAMA_STACK_TEST_INFERENCE_MODE=record` and then run the integration tests with `--stack-config=server:starter`. Then restart the server with `LLAMA_STACK_TEST_INFERENCE_MODE=replay` and re-run the tests. Verify that no request hit Ollama at any point.
		
			
				
	
	
		
			204 lines
		
	
	
	
		
			6.1 KiB
		
	
	
	
		
			JSON
		
	
	
	
	
	
			
		
		
	
	
			204 lines
		
	
	
	
		
			6.1 KiB
		
	
	
	
		
			JSON
		
	
	
	
	
	
| {
 | |
|   "request": {
 | |
|     "method": "POST",
 | |
|     "url": "http://localhost:11434/api/generate",
 | |
|     "headers": {},
 | |
|     "body": {
 | |
|       "model": "llama3.2:3b-instruct-fp16",
 | |
|       "raw": true,
 | |
|       "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nGive me a sentence that contains the word: hello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
 | |
|       "options": {
 | |
|         "temperature": 0.0001,
 | |
|         "top_p": 0.9
 | |
|       },
 | |
|       "stream": true
 | |
|     },
 | |
|     "endpoint": "/api/generate",
 | |
|     "model": "llama3.2:3b-instruct-fp16"
 | |
|   },
 | |
|   "response": {
 | |
|     "body": [
 | |
|       {
 | |
|         "__type__": "ollama._types.GenerateResponse",
 | |
|         "__data__": {
 | |
|           "model": "llama3.2:3b-instruct-fp16",
 | |
|           "created_at": "2025-07-29T20:04:05.720409Z",
 | |
|           "done": false,
 | |
|           "done_reason": null,
 | |
|           "total_duration": null,
 | |
|           "load_duration": null,
 | |
|           "prompt_eval_count": null,
 | |
|           "prompt_eval_duration": null,
 | |
|           "eval_count": null,
 | |
|           "eval_duration": null,
 | |
|           "response": "Hello",
 | |
|           "thinking": null,
 | |
|           "context": null
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "ollama._types.GenerateResponse",
 | |
|         "__data__": {
 | |
|           "model": "llama3.2:3b-instruct-fp16",
 | |
|           "created_at": "2025-07-29T20:04:05.76184Z",
 | |
|           "done": false,
 | |
|           "done_reason": null,
 | |
|           "total_duration": null,
 | |
|           "load_duration": null,
 | |
|           "prompt_eval_count": null,
 | |
|           "prompt_eval_duration": null,
 | |
|           "eval_count": null,
 | |
|           "eval_duration": null,
 | |
|           "response": ",",
 | |
|           "thinking": null,
 | |
|           "context": null
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "ollama._types.GenerateResponse",
 | |
|         "__data__": {
 | |
|           "model": "llama3.2:3b-instruct-fp16",
 | |
|           "created_at": "2025-07-29T20:04:05.803087Z",
 | |
|           "done": false,
 | |
|           "done_reason": null,
 | |
|           "total_duration": null,
 | |
|           "load_duration": null,
 | |
|           "prompt_eval_count": null,
 | |
|           "prompt_eval_duration": null,
 | |
|           "eval_count": null,
 | |
|           "eval_duration": null,
 | |
|           "response": " how",
 | |
|           "thinking": null,
 | |
|           "context": null
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "ollama._types.GenerateResponse",
 | |
|         "__data__": {
 | |
|           "model": "llama3.2:3b-instruct-fp16",
 | |
|           "created_at": "2025-07-29T20:04:05.844065Z",
 | |
|           "done": false,
 | |
|           "done_reason": null,
 | |
|           "total_duration": null,
 | |
|           "load_duration": null,
 | |
|           "prompt_eval_count": null,
 | |
|           "prompt_eval_duration": null,
 | |
|           "eval_count": null,
 | |
|           "eval_duration": null,
 | |
|           "response": " can",
 | |
|           "thinking": null,
 | |
|           "context": null
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "ollama._types.GenerateResponse",
 | |
|         "__data__": {
 | |
|           "model": "llama3.2:3b-instruct-fp16",
 | |
|           "created_at": "2025-07-29T20:04:05.885088Z",
 | |
|           "done": false,
 | |
|           "done_reason": null,
 | |
|           "total_duration": null,
 | |
|           "load_duration": null,
 | |
|           "prompt_eval_count": null,
 | |
|           "prompt_eval_duration": null,
 | |
|           "eval_count": null,
 | |
|           "eval_duration": null,
 | |
|           "response": " I",
 | |
|           "thinking": null,
 | |
|           "context": null
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "ollama._types.GenerateResponse",
 | |
|         "__data__": {
 | |
|           "model": "llama3.2:3b-instruct-fp16",
 | |
|           "created_at": "2025-07-29T20:04:05.926149Z",
 | |
|           "done": false,
 | |
|           "done_reason": null,
 | |
|           "total_duration": null,
 | |
|           "load_duration": null,
 | |
|           "prompt_eval_count": null,
 | |
|           "prompt_eval_duration": null,
 | |
|           "eval_count": null,
 | |
|           "eval_duration": null,
 | |
|           "response": " assist",
 | |
|           "thinking": null,
 | |
|           "context": null
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "ollama._types.GenerateResponse",
 | |
|         "__data__": {
 | |
|           "model": "llama3.2:3b-instruct-fp16",
 | |
|           "created_at": "2025-07-29T20:04:05.967374Z",
 | |
|           "done": false,
 | |
|           "done_reason": null,
 | |
|           "total_duration": null,
 | |
|           "load_duration": null,
 | |
|           "prompt_eval_count": null,
 | |
|           "prompt_eval_duration": null,
 | |
|           "eval_count": null,
 | |
|           "eval_duration": null,
 | |
|           "response": " you",
 | |
|           "thinking": null,
 | |
|           "context": null
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "ollama._types.GenerateResponse",
 | |
|         "__data__": {
 | |
|           "model": "llama3.2:3b-instruct-fp16",
 | |
|           "created_at": "2025-07-29T20:04:06.008267Z",
 | |
|           "done": false,
 | |
|           "done_reason": null,
 | |
|           "total_duration": null,
 | |
|           "load_duration": null,
 | |
|           "prompt_eval_count": null,
 | |
|           "prompt_eval_duration": null,
 | |
|           "eval_count": null,
 | |
|           "eval_duration": null,
 | |
|           "response": " today",
 | |
|           "thinking": null,
 | |
|           "context": null
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "ollama._types.GenerateResponse",
 | |
|         "__data__": {
 | |
|           "model": "llama3.2:3b-instruct-fp16",
 | |
|           "created_at": "2025-07-29T20:04:06.049424Z",
 | |
|           "done": false,
 | |
|           "done_reason": null,
 | |
|           "total_duration": null,
 | |
|           "load_duration": null,
 | |
|           "prompt_eval_count": null,
 | |
|           "prompt_eval_duration": null,
 | |
|           "eval_count": null,
 | |
|           "eval_duration": null,
 | |
|           "response": "?",
 | |
|           "thinking": null,
 | |
|           "context": null
 | |
|         }
 | |
|       },
 | |
|       {
 | |
|         "__type__": "ollama._types.GenerateResponse",
 | |
|         "__data__": {
 | |
|           "model": "llama3.2:3b-instruct-fp16",
 | |
|           "created_at": "2025-07-29T20:04:06.090397Z",
 | |
|           "done": true,
 | |
|           "done_reason": "stop",
 | |
|           "total_duration": 1022446958,
 | |
|           "load_duration": 63964125,
 | |
|           "prompt_eval_count": 31,
 | |
|           "prompt_eval_duration": 586685542,
 | |
|           "eval_count": 10,
 | |
|           "eval_duration": 371051666,
 | |
|           "response": "",
 | |
|           "thinking": null,
 | |
|           "context": null
 | |
|         }
 | |
|       }
 | |
|     ],
 | |
|     "is_streaming": true
 | |
|   }
 | |
| }
 |