diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index 049f06fdb..8beffe8e2 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -1287,6 +1287,7 @@ class OpenAICompletionToLlamaStackMixin:
         user: str | None = None,
         guided_choice: list[str] | None = None,
         prompt_logprobs: int | None = None,
+        suffix: str | None = None,
     ) -> OpenAICompletion:
         if stream:
             raise ValueError(f"{self.__class__.__name__} doesn't support streaming openai completions")
diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py
index 7c33efd8e..3e43af272 100644
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@@ -237,34 +237,6 @@ def test_openai_chat_completion_non_streaming(compat_client, client_with_models,
     assert expected.lower() in message_content
 
 
-@pytest.mark.parametrize(
-    "test_case",
-    [
-        "inference:chat_completion:non_streaming_suffix_01",
-        "inference:chat_completion:non_streaming_suffix_02",
-    ],
-)
-def test_openai_chat_completion_non_streaming_suffix(compat_client, client_with_models, text_model_id, test_case):
-    skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id)
-    tc = TestCase(test_case)
-    question = tc["question"]
-    expected = tc["expected"]
-
-    response = compat_client.chat.completions.create(
-        model=text_model_id,
-        messages=[
-            {
-                "role": "user",
-                "content": question,
-            }
-        ],
-        stream=False,
-    )
-    message_content = response.choices[0].message.content.lower().strip()
-    assert len(message_content) > 0
-    assert expected.lower() in message_content
-
-
 @pytest.mark.parametrize(
     "test_case",
     [