From b40e100a1726dc809fac4630d98ddc41c90fc719 Mon Sep 17 00:00:00 2001
From: Eric Huang <erichuang@meta.com>
Date: Thu, 4 Sep 2025 14:18:10 -0700
Subject: [PATCH] chore: unbreak inference store test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?
The inference store writes were moved to asyncio.create_task and not await anymore

## Test Plan

❯ OLLAMA_URL=http://localhost:11434  LLAMA_STACK_CONFIG=server:starter uv run --with pytest-repeat pytest tests/integration/inference --text-model="ollama/llama3.2:3b-instruct-fp16" -vvs -k "test_inference_store_tool_calls and 3b-instruct-fp16-True" --count=10
Uninstalled 2 packages in 102ms
Installed 2 packages in 138ms
INFO     2025-09-04 14:10:17,775 tests.integration.conftest:66 tests: Setting DISABLE_CODE_SANDBOX=1 for macOS
========================================================================================================== test session starts ===========================================================================================================
platform darwin -- Python 3.12.3, pytest-8.4.1, pluggy-1.6.0 -- /Users/erichuang/.cache/uv/builds-v0/.tmpSGMlgt/bin/python
cachedir: .pytest_cache
metadata: {'Python': '3.12.3', 'Platform': 'macOS-15.6.1-arm64-arm-64bit', 'Packages': {'pytest': '8.4.1', 'pluggy': '1.6.0'}, 'Plugins': {'repeat': '0.9.4', 'anyio': '4.9.0', 'html': '4.1.1', 'socket': '0.7.0', 'asyncio': '1.1.0', 'json-report': '1.5.0', 'timeout': '2.4.0', 'metadata': '3.1.1', 'cov': '6.2.1', 'nbval': '0.11.0'}}
rootdir: /Users/erichuang/projects/llama-stack-git
configfile: pyproject.toml
plugins: repeat-0.9.4, anyio-4.9.0, html-4.1.1, socket-0.7.0, asyncio-1.1.0, json-report-1.5.0, timeout-2.4.0, metadata-3.1.1, cov-6.2.1, nbval-0.11.0
asyncio: mode=Mode.AUTO, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function
collected 970 items / 950 deselected / 20 selected

tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=ollama/llama3.2:3b-instruct-fp16-True-1-10]
instantiating llama_stack_client
Starting llama stack server with config 'starter' on port 8321...
Waiting for server at http://localhost:8321... (0.0s elapsed)
Waiting for server at http://localhost:8321... (0.5s elapsed)
Waiting for server at http://localhost:8321... (5.1s elapsed)
Waiting for server at http://localhost:8321... (5.6s elapsed)
Waiting for server at http://localhost:8321... (10.1s elapsed)
Waiting for server at http://localhost:8321... (10.6s elapsed)
Waiting for server at http://localhost:8321... (15.2s elapsed)
Waiting for server at http://localhost:8321... (15.7s elapsed)
Server is ready at http://localhost:8321
llama_stack_client instantiated in 20.583s
PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=ollama/llama3.2:3b-instruct-fp16-True-2-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=ollama/llama3.2:3b-instruct-fp16-True-3-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=ollama/llama3.2:3b-instruct-fp16-True-4-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=ollama/llama3.2:3b-instruct-fp16-True-5-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=ollama/llama3.2:3b-instruct-fp16-True-6-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=ollama/llama3.2:3b-instruct-fp16-True-7-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=ollama/llama3.2:3b-instruct-fp16-True-8-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=ollama/llama3.2:3b-instruct-fp16-True-9-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[openai_client-txt=ollama/llama3.2:3b-instruct-fp16-True-10-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-True-1-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-True-2-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-True-3-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-True-4-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-True-5-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-True-6-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-True-7-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-True-8-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-True-9-10] PASSED
tests/integration/inference/test_openai_completion.py::test_inference_store_tool_calls[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-True-10-10] PASSEDTerminating llama stack server process...
Terminating process 53307 and its group...
Server process and children terminated gracefully
---
 .../inference/test_openai_completion.py       | 25 +++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py
index 72137662d..62185e470 100644
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@@ -5,6 +5,8 @@
 # the root directory of this source tree.
 
 
+import time
+
 import pytest
 
 from ..test_cases.test_case import TestCase
@@ -323,8 +325,15 @@ def test_inference_store(compat_client, client_with_models, text_model_id, strea
         response_id = response.id
         content = response.choices[0].message.content
 
-    responses = client.chat.completions.list(limit=1000)
-    assert response_id in [r.id for r in responses.data]
+    tries = 0
+    while tries < 10:
+        responses = client.chat.completions.list(limit=1000)
+        if response_id in [r.id for r in responses.data]:
+            break
+        else:
+            tries += 1
+            time.sleep(0.1)
+    assert tries < 10, f"Response {response_id} not found after 1 second"
 
     retrieved_response = client.chat.completions.retrieve(response_id)
     assert retrieved_response.id == response_id
@@ -388,6 +397,18 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
         response_id = response.id
         content = response.choices[0].message.content
 
+    # wait for the response to be stored
+    tries = 0
+    while tries < 10:
+        responses = client.chat.completions.list(limit=1000)
+        if response_id in [r.id for r in responses.data]:
+            break
+        else:
+            tries += 1
+            time.sleep(0.1)
+
+    assert tries < 10, f"Response {response_id} not found after 1 second"
+
     responses = client.chat.completions.list(limit=1000)
     assert response_id in [r.id for r in responses.data]