llama-stack-mirror/tests/verifications/openai_api/fixtures/test_cases/responses.yaml

test_response_basic:
  test_name: test_response_basic
  test_params:
    case:
    - case_id: "earth"
      input: "Which planet do humans live on?"
      output: "earth"
    - case_id: "saturn"
      input: "Which planet has rings around it with a name starting with letter S?"
      output: "saturn"

test_response_multi_turn:
  test_name: test_response_multi_turn
  test_params:
    case:
    - case_id: "earth"
      turns:
      - input: "Which planet do humans live on?"
        output: "earth"
      - input: "What is the name of the planet from your previous response?"
        output: "earth"

test_response_web_search:
  test_name: test_response_web_search
  test_params:
    case:
    - case_id: "llama_experts"
      input: "How many experts does the Llama 4 Maverick model have?"
      tools:
      - type: web_search
        search_context_size: "low"
      output: "128"

test_response_mcp_tool:
  test_name: test_response_mcp_tool
  test_params:
    case:
    - case_id: "boiling_point_tool"
      input: "What is the boiling point of polyjuice?"
      tools:
      - type: mcp
        server_label: "localmcp"
        server_url: "<FILLED_BY_TEST_RUNNER>"
      output: "Hello, world!"

test_response_custom_tool:
  test_name: test_response_custom_tool
  test_params:
    case:
    - case_id: "sf_weather"
      input: "What's the weather like in San Francisco?"
      tools:
      - type: function
        name: get_weather
        description: Get current temperature for a given location.
        parameters:
          additionalProperties: false
          properties:
            location:
              description: "City and country e.g. Bogot\xE1, Colombia"
              type: string
          required:
          - location
          type: object

test_response_image:
  test_name: test_response_image
  test_params:
    case:
    - case_id: "llama_image"
      input:
      - role: user
        content:
        - type: input_text
          text: "Identify the type of animal in this image."
        - type: input_image
          image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
      output: "llama"

# the models are really poor at tool calling after seeing images :/
test_response_multi_turn_image:
  test_name: test_response_multi_turn_image
  test_params:
    case:
    - case_id: "llama_image_understanding"
      turns:
      - input:
        - role: user
          content:
          - type: input_text
            text: "What type of animal is in this image? Please respond with a single word that starts with the letter 'L'."
          - type: input_image
            image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
        output: "llama"
      - input: "What country do you find this animal primarily in? What continent?"
        output: "peru"

test_response_multi_turn_tool_execution:
  test_name: test_response_multi_turn_tool_execution
  test_params:
    case:
    - case_id: "user_file_access_check"
      input: "I need to check if user 'alice' can access the file 'document.txt'. First, get alice's user ID, then check if that user ID can access the file 'document.txt'. Tell me the final result."
      tools:
      - type: mcp
        server_label: "localmcp"
        server_url: "<FILLED_BY_TEST_RUNNER>"
      output: "yes"
    - case_id: "experiment_results_lookup"
      input: "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me what you found."
      tools:
      - type: mcp
        server_label: "localmcp"
        server_url: "<FILLED_BY_TEST_RUNNER>"
      output: "100°C"

test_response_multi_turn_tool_execution_streaming:
  test_name: test_response_multi_turn_tool_execution_streaming
  test_params:
    case:
    - case_id: "user_permissions_workflow"
      input: "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step."
      tools:
      - type: mcp
        server_label: "localmcp"
        server_url: "<FILLED_BY_TEST_RUNNER>"
      stream: true
      output: "no"
    - case_id: "experiment_analysis_streaming"
      input: "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Please stream your analysis process."
      tools:
      - type: mcp
        server_label: "localmcp"
        server_url: "<FILLED_BY_TEST_RUNNER>"
      stream: true
      output: "85%"