mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-14 17:16:09 +00:00
feat(responses): add output_text delta events to responses (#2265)
This adds initial streaming support to the Responses API. This PR makes sure that the _first_ inference call made to chat completions streams out. There's more to be done: - tool call output tokens need to stream out when possible - we need to loop through multiple rounds of inference and they all need to stream out. ## Test Plan Added a test. Executed as: ``` FIREWORKS_API_KEY=... \ pytest -s -v 'tests/verifications/openai_api/test_responses.py' \ --provider=stack:fireworks --model meta-llama/Llama-4-Scout-17B-16E-Instruct ``` Then, started a llama stack fireworks distro and tested against it like this: ``` OPENAI_API_KEY=blah \ pytest -s -v 'tests/verifications/openai_api/test_responses.py' \ --base-url http://localhost:8321/v1/openai/v1 \ --model meta-llama/Llama-4-Scout-17B-16E-Instruct ```
This commit is contained in:
parent
6ee319ae08
commit
5cdb29758a
8 changed files with 493 additions and 160 deletions
|
@ -77,11 +77,12 @@ test_response_image:
|
|||
image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
|
||||
output: "llama"
|
||||
|
||||
# the models are really poor at tool calling after seeing images :/
|
||||
test_response_multi_turn_image:
|
||||
test_name: test_response_multi_turn_image
|
||||
test_params:
|
||||
case:
|
||||
- case_id: "llama_image_search"
|
||||
- case_id: "llama_image_understanding"
|
||||
turns:
|
||||
- input:
|
||||
- role: user
|
||||
|
@ -91,7 +92,5 @@ test_response_multi_turn_image:
|
|||
- type: input_image
|
||||
image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
|
||||
output: "llama"
|
||||
- input: "Search the web using the search tool for the animal from the previous response. Your search query should be a single phrase that includes the animal's name and the words 'maverick', 'scout' and 'llm'"
|
||||
tools:
|
||||
- type: web_search
|
||||
output: "model"
|
||||
- input: "What country do you find this animal primarily in? What continent?"
|
||||
output: "peru"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue