llama-stack-mirror/tests/verifications/openai_api/fixtures/test_cases/responses.yaml

test_response_basic:
  test_name: test_response_basic
  test_params:
    case:
    - case_id: "earth"
      input: "Which planet do humans live on?"
      output: "earth"
    - case_id: "saturn"
      input: "Which planet has rings around it with a name starting with letter S?"
      output: "saturn"

test_response_multi_turn:
  test_name: test_response_multi_turn
  test_params:
    case:
    - case_id: "earth"
      turns:
      - input: "Which planet do humans live on?"
        output: "earth"
      - input: "What is the name of the planet from your previous response?"
        output: "earth"

test_response_web_search:
  test_name: test_response_web_search
  test_params:
    case:
    - case_id: "llama_experts"
      input: "How many experts does the Llama 4 Maverick model have?"
      tools:
      - type: web_search
        search_context_size: "low"
      output: "128"

test_response_file_search:
  test_name: test_response_file_search
  test_params:
    case:
    - case_id: "llama_experts"
      input: "How many experts does the Llama 4 Maverick model have?"
      tools:
      - type: file_search
        # vector_store_ids param for file_search tool gets added by the test runner
      file_content: "Llama 4 Maverick has 128 experts"
      output: "128"
    - case_id: "llama_experts_pdf"
      input: "How many experts does the Llama 4 Maverick model have?"
      tools:
      - type: file_search
        # vector_store_ids param for file_search toolgets added by the test runner
      file_path: "pdfs/llama_stack_and_models.pdf"
      output: "128"

test_response_mcp_tool:
  test_name: test_response_mcp_tool
  test_params:
    case:
    - case_id: "boiling_point_tool"
      input: "What is the boiling point of myawesomeliquid in Celsius?"
      tools:
      - type: mcp
        server_label: "localmcp"
        server_url: "<FILLED_BY_TEST_RUNNER>"
      output: "Hello, world!"

test_response_custom_tool:
  test_name: test_response_custom_tool
  test_params:
    case:
    - case_id: "sf_weather"
      input: "What's the weather like in San Francisco?"
      tools:
      - type: function
        name: get_weather
        description: Get current temperature for a given location.
        parameters:
          additionalProperties: false
          properties:
            location:
              description: "City and country e.g. Bogot\xE1, Colombia"
              type: string
          required:
          - location
          type: object

test_response_image:
  test_name: test_response_image
  test_params:
    case:
    - case_id: "llama_image"
      input:
      - role: user
        content:
        - type: input_text
          text: "Identify the type of animal in this image."
        - type: input_image
          image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
      output: "llama"

# the models are really poor at tool calling after seeing images :/
test_response_multi_turn_image:
  test_name: test_response_multi_turn_image
  test_params:
    case:
    - case_id: "llama_image_understanding"
      turns:
      - input:
        - role: user
          content:
          - type: input_text
            text: "What type of animal is in this image? Please respond with a single word that starts with the letter 'L'."
          - type: input_image
            image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
        output: "llama"
      - input: "What country do you find this animal primarily in? What continent?"
        output: "peru"

test_response_multi_turn_tool_execution:
  test_name: test_response_multi_turn_tool_execution
  test_params:
    case:
    - case_id: "user_file_access_check"
      input: "I need to check if user 'alice' can access the file 'document.txt'. First, get alice's user ID, then check if that user ID can access the file 'document.txt'. Do this as a series of steps, where each step is a separate message. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
      tools:
      - type: mcp
        server_label: "localmcp"
        server_url: "<FILLED_BY_TEST_RUNNER>"
      output: "yes"
    - case_id: "experiment_results_lookup"
      input: "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me what you found."
      tools:
      - type: mcp
        server_label: "localmcp"
        server_url: "<FILLED_BY_TEST_RUNNER>"
      output: "100°C"

test_response_multi_turn_tool_execution_streaming:
  test_name: test_response_multi_turn_tool_execution_streaming
  test_params:
    case:
    - case_id: "user_permissions_workflow"
      input: "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step."
      tools:
      - type: mcp
        server_label: "localmcp"
        server_url: "<FILLED_BY_TEST_RUNNER>"
      stream: true
      output: "no"
    - case_id: "experiment_analysis_streaming"
      input: "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Please stream your analysis process."
      tools:
      - type: mcp
        server_label: "localmcp"
        server_url: "<FILLED_BY_TEST_RUNNER>"
      stream: true
      output: "85%"