diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index 049f06fdb..8beffe8e2 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -1287,6 +1287,7 @@ class OpenAICompletionToLlamaStackMixin: user: str | None = None, guided_choice: list[str] | None = None, prompt_logprobs: int | None = None, + suffix: str | None = None, ) -> OpenAICompletion: if stream: raise ValueError(f"{self.__class__.__name__} doesn't support streaming openai completions") diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py index 7c33efd8e..3e43af272 100644 --- a/tests/integration/inference/test_openai_completion.py +++ b/tests/integration/inference/test_openai_completion.py @@ -237,34 +237,6 @@ def test_openai_chat_completion_non_streaming(compat_client, client_with_models, assert expected.lower() in message_content -@pytest.mark.parametrize( - "test_case", - [ - "inference:chat_completion:non_streaming_suffix_01", - "inference:chat_completion:non_streaming_suffix_02", - ], -) -def test_openai_chat_completion_non_streaming_suffix(compat_client, client_with_models, text_model_id, test_case): - skip_if_model_doesnt_support_openai_chat_completion(client_with_models, text_model_id) - tc = TestCase(test_case) - question = tc["question"] - expected = tc["expected"] - - response = compat_client.chat.completions.create( - model=text_model_id, - messages=[ - { - "role": "user", - "content": question, - } - ], - stream=False, - ) - message_content = response.choices[0].message.content.lower().strip() - assert len(message_content) > 0 - assert expected.lower() in message_content - - @pytest.mark.parametrize( "test_case", [