fix(utils.py): ensure streaming output parsing only applied for hf / sagemaker models

selectively applies the <s> </s> checking
2025-04-27 19:54:13 +00:00 · 2024-04-17 17:43:41 -07:00 · 2024-04-17 17:43:41 -07:00 · 1b4462ee70
commit 1b4462ee70
parent 2a2b97f093
2 changed files with 23 additions and 0 deletions
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -220,6 +220,20 @@ tools_schema = [
 # test_completion_cohere_stream()


+def test_completion_azure_stream_special_char():
+    messages = [
+        {"role": "user", "content": "Respond with the '<' sign and nothing else."}
+    ]
+    response = completion(model="azure/chatgpt-v-2", messages=messages, stream=True)
+    response_str = ""
+    for part in response:
+        response_str += part.choices[0].delta.content or ""
+
+    print(f"response_str: {response_str}")
+    assert len(response_str) > 0
+    raise Exception("it worked")
+
+
 def test_completion_cohere_stream_bad_key():
    try:
        litellm.cache = None