mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-24 16:57:21 +00:00 
			
		
		
		
	
		
			Some checks failed
		
		
	
	Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 2s
				
			Integration Tests / test-matrix (http, 3.12, datasets) (push) Failing after 5s
				
			Integration Tests / test-matrix (http, 3.12, scoring) (push) Failing after 5s
				
			Integration Tests / test-matrix (http, 3.13, datasets) (push) Failing after 7s
				
			Integration Tests / test-matrix (http, 3.13, providers) (push) Failing after 7s
				
			Integration Tests / test-matrix (http, 3.12, inspect) (push) Failing after 11s
				
			Integration Tests / test-matrix (http, 3.12, inference) (push) Failing after 14s
				
			Integration Tests / test-matrix (http, 3.12, tool_runtime) (push) Failing after 12s
				
			Integration Tests / test-matrix (http, 3.12, vector_io) (push) Failing after 9s
				
			Integration Tests / test-matrix (http, 3.13, inference) (push) Failing after 12s
				
			Integration Tests / test-matrix (http, 3.12, post_training) (push) Failing after 10s
				
			Integration Tests / test-matrix (library, 3.12, datasets) (push) Failing after 8s
				
			Integration Tests / test-matrix (http, 3.13, inspect) (push) Failing after 9s
				
			Integration Tests / test-matrix (http, 3.13, tool_runtime) (push) Failing after 9s
				
			Integration Tests / test-matrix (http, 3.13, scoring) (push) Failing after 10s
				
			Integration Tests / test-matrix (library, 3.12, agents) (push) Failing after 8s
				
			Integration Tests / test-matrix (library, 3.12, inspect) (push) Failing after 8s
				
			Integration Tests / test-matrix (library, 3.12, post_training) (push) Failing after 6s
				
			Integration Tests / test-matrix (library, 3.12, inference) (push) Failing after 10s
				
			Integration Tests / test-matrix (http, 3.12, agents) (push) Failing after 21s
				
			Integration Tests / test-matrix (http, 3.12, providers) (push) Failing after 19s
				
			Integration Tests / test-matrix (library, 3.12, scoring) (push) Failing after 7s
				
			Integration Tests / test-matrix (http, 3.13, post_training) (push) Failing after 16s
				
			Integration Tests / test-matrix (http, 3.13, agents) (push) Failing after 19s
				
			Integration Tests / test-matrix (http, 3.13, vector_io) (push) Failing after 15s
				
			Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 8s
				
			Integration Tests / test-matrix (library, 3.12, providers) (push) Failing after 9s
				
			Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 9s
				
			Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 9s
				
			Integration Tests / test-matrix (library, 3.12, tool_runtime) (push) Failing after 7s
				
			Integration Tests / test-matrix (library, 3.12, vector_io) (push) Failing after 10s
				
			Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 10s
				
			Integration Tests / test-matrix (library, 3.13, inference) (push) Failing after 8s
				
			Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 10s
				
			Integration Tests / test-matrix (library, 3.13, tool_runtime) (push) Failing after 7s
				
			Integration Tests / test-matrix (library, 3.13, agents) (push) Failing after 11s
				
			Integration Tests / test-matrix (library, 3.13, vector_io) (push) Failing after 10s
				
			Integration Tests / test-matrix (library, 3.13, providers) (push) Failing after 8s
				
			Integration Tests / test-matrix (library, 3.13, datasets) (push) Failing after 9s
				
			Integration Tests / test-matrix (library, 3.13, inspect) (push) Failing after 13s
				
			Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 10s
				
			Integration Tests / test-matrix (library, 3.13, scoring) (push) Failing after 11s
				
			Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 9s
				
			Integration Tests / test-matrix (library, 3.13, post_training) (push) Failing after 8s
				
			Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 46s
				
			Python Package Build Test / build (3.12) (push) Failing after 43s
				
			Test External Providers / test-external-providers (venv) (push) Failing after 40s
				
			Python Package Build Test / build (3.13) (push) Failing after 42s
				
			Unit Tests / unit-tests (3.13) (push) Failing after 22s
				
			Unit Tests / unit-tests (3.12) (push) Failing after 25s
				
			Update ReadTheDocs / update-readthedocs (push) Failing after 20s
				
			Pre-commit / pre-commit (push) Successful in 2m13s
				
			## What does this PR do? Ollama does not support remote images. Only local file paths OR base64 inputs are supported. This PR ensures that the Stack downloads remote images and passes the base64 down to the inference engine. ## Test Plan Added a test cases for Responses and ran it for both `fireworks` and `ollama` providers.
		
			
				
	
	
		
			166 lines
		
	
	
	
		
			5.8 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			166 lines
		
	
	
	
		
			5.8 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| test_response_basic:
 | |
|   test_name: test_response_basic
 | |
|   test_params:
 | |
|     case:
 | |
|     - case_id: "earth"
 | |
|       input: "Which planet do humans live on?"
 | |
|       output: "earth"
 | |
|     - case_id: "saturn"
 | |
|       input: "Which planet has rings around it with a name starting with letter S?"
 | |
|       output: "saturn"
 | |
|     - case_id: "image_input"
 | |
|       input:
 | |
|       - role: user
 | |
|         content:
 | |
|         - type: input_text
 | |
|           text: "what teams are playing in this image?"
 | |
|       - role: user
 | |
|         content:
 | |
|         - type: input_image
 | |
|           image_url: "https://upload.wikimedia.org/wikipedia/commons/3/3b/LeBron_James_Layup_%28Cleveland_vs_Brooklyn_2018%29.jpg"
 | |
|       output: "brooklyn nets"
 | |
| 
 | |
| test_response_multi_turn:
 | |
|   test_name: test_response_multi_turn
 | |
|   test_params:
 | |
|     case:
 | |
|     - case_id: "earth"
 | |
|       turns:
 | |
|       - input: "Which planet do humans live on?"
 | |
|         output: "earth"
 | |
|       - input: "What is the name of the planet from your previous response?"
 | |
|         output: "earth"
 | |
| 
 | |
| test_response_web_search:
 | |
|   test_name: test_response_web_search
 | |
|   test_params:
 | |
|     case:
 | |
|     - case_id: "llama_experts"
 | |
|       input: "How many experts does the Llama 4 Maverick model have?"
 | |
|       tools:
 | |
|       - type: web_search
 | |
|         search_context_size: "low"
 | |
|       output: "128"
 | |
| 
 | |
| test_response_file_search:
 | |
|   test_name: test_response_file_search
 | |
|   test_params:
 | |
|     case:
 | |
|     - case_id: "llama_experts"
 | |
|       input: "How many experts does the Llama 4 Maverick model have?"
 | |
|       tools:
 | |
|       - type: file_search
 | |
|         # vector_store_ids param for file_search tool gets added by the test runner
 | |
|       file_content: "Llama 4 Maverick has 128 experts"
 | |
|       output: "128"
 | |
|     - case_id: "llama_experts_pdf"
 | |
|       input: "How many experts does the Llama 4 Maverick model have?"
 | |
|       tools:
 | |
|       - type: file_search
 | |
|         # vector_store_ids param for file_search toolgets added by the test runner
 | |
|       file_path: "pdfs/llama_stack_and_models.pdf"
 | |
|       output: "128"
 | |
| 
 | |
| test_response_mcp_tool:
 | |
|   test_name: test_response_mcp_tool
 | |
|   test_params:
 | |
|     case:
 | |
|     - case_id: "boiling_point_tool"
 | |
|       input: "What is the boiling point of myawesomeliquid in Celsius?"
 | |
|       tools:
 | |
|       - type: mcp
 | |
|         server_label: "localmcp"
 | |
|         server_url: "<FILLED_BY_TEST_RUNNER>"
 | |
|       output: "Hello, world!"
 | |
| 
 | |
| test_response_custom_tool:
 | |
|   test_name: test_response_custom_tool
 | |
|   test_params:
 | |
|     case:
 | |
|     - case_id: "sf_weather"
 | |
|       input: "What's the weather like in San Francisco?"
 | |
|       tools:
 | |
|       - type: function
 | |
|         name: get_weather
 | |
|         description: Get current temperature for a given location.
 | |
|         parameters:
 | |
|           additionalProperties: false
 | |
|           properties:
 | |
|             location:
 | |
|               description: "City and country e.g. Bogot\xE1, Colombia"
 | |
|               type: string
 | |
|           required:
 | |
|           - location
 | |
|           type: object
 | |
| 
 | |
| test_response_image:
 | |
|   test_name: test_response_image
 | |
|   test_params:
 | |
|     case:
 | |
|     - case_id: "llama_image"
 | |
|       input:
 | |
|       - role: user
 | |
|         content:
 | |
|         - type: input_text
 | |
|           text: "Identify the type of animal in this image."
 | |
|         - type: input_image
 | |
|           image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
 | |
|       output: "llama"
 | |
| 
 | |
| # the models are really poor at tool calling after seeing images :/
 | |
| test_response_multi_turn_image:
 | |
|   test_name: test_response_multi_turn_image
 | |
|   test_params:
 | |
|     case:
 | |
|     - case_id: "llama_image_understanding"
 | |
|       turns:
 | |
|       - input:
 | |
|         - role: user
 | |
|           content:
 | |
|           - type: input_text
 | |
|             text: "What type of animal is in this image? Please respond with a single word that starts with the letter 'L'."
 | |
|           - type: input_image
 | |
|             image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
 | |
|         output: "llama"
 | |
|       - input: "What country do you find this animal primarily in? What continent?"
 | |
|         output: "peru"
 | |
| 
 | |
| test_response_multi_turn_tool_execution:
 | |
|   test_name: test_response_multi_turn_tool_execution
 | |
|   test_params:
 | |
|     case:
 | |
|     - case_id: "user_file_access_check"
 | |
|       input: "I need to check if user 'alice' can access the file 'document.txt'. First, get alice's user ID, then check if that user ID can access the file 'document.txt'. Do this as a series of steps, where each step is a separate message. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
 | |
|       tools:
 | |
|       - type: mcp
 | |
|         server_label: "localmcp"
 | |
|         server_url: "<FILLED_BY_TEST_RUNNER>"
 | |
|       output: "yes"
 | |
|     - case_id: "experiment_results_lookup"
 | |
|       input: "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me what you found."
 | |
|       tools:
 | |
|       - type: mcp
 | |
|         server_label: "localmcp"
 | |
|         server_url: "<FILLED_BY_TEST_RUNNER>"
 | |
|       output: "100°C"
 | |
| 
 | |
| test_response_multi_turn_tool_execution_streaming:
 | |
|   test_name: test_response_multi_turn_tool_execution_streaming
 | |
|   test_params:
 | |
|     case:
 | |
|     - case_id: "user_permissions_workflow"
 | |
|       input: "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step."
 | |
|       tools:
 | |
|       - type: mcp
 | |
|         server_label: "localmcp"
 | |
|         server_url: "<FILLED_BY_TEST_RUNNER>"
 | |
|       stream: true
 | |
|       output: "no"
 | |
|     - case_id: "experiment_analysis_streaming"
 | |
|       input: "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Please stream your analysis process."
 | |
|       tools:
 | |
|       - type: mcp
 | |
|         server_label: "localmcp"
 | |
|         server_url: "<FILLED_BY_TEST_RUNNER>"
 | |
|       stream: true
 | |
|       output: "85%"
 |