diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index a18da18192..aa2a91b9f3 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -220,6 +220,20 @@ tools_schema = [ # test_completion_cohere_stream() +def test_completion_azure_stream_special_char(): + messages = [ + {"role": "user", "content": "Respond with the '<' sign and nothing else."} + ] + response = completion(model="azure/chatgpt-v-2", messages=messages, stream=True) + response_str = "" + for part in response: + response_str += part.choices[0].delta.content or "" + + print(f"response_str: {response_str}") + assert len(response_str) > 0 + raise Exception("it worked") + + def test_completion_cohere_stream_bad_key(): try: litellm.cache = None diff --git a/litellm/utils.py b/litellm/utils.py index 2ae1467d07..bea24c02fe 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -8856,7 +8856,16 @@ class CustomStreamWrapper: raise e def check_special_tokens(self, chunk: str, finish_reason: Optional[str]): + """ + Output parse / special tokens for sagemaker + hf streaming. + """ hold = False + if ( + self.custom_llm_provider != "huggingface" + and self.custom_llm_provider != "sagemaker" + ): + return hold, chunk + if finish_reason: for token in self.special_tokens: if token in chunk: