feat(utils.py): enable returning complete response when stream=true

2025-04-26 19:24:27 +00:00 · 2023-11-09 09:17:43 -08:00 · 2023-11-09 09:17:43 -08:00 · 8ee4b1f603
commit 8ee4b1f603
parent 3dae4e9cda
4 changed files with 22 additions and 7 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -949,16 +949,18 @@ def client(original_function):
            end_time = datetime.datetime.now()
            if "stream" in kwargs and kwargs["stream"] == True:
                # TODO: Add to cache for streaming
-                return result
+                if "complete_response" in kwargs and kwargs["complete_response"] == True: 
+                    chunks = []
+                    for idx, chunk in enumerate(result):
+                        chunks.append(chunk)
+                    return litellm.stream_chunk_builder(chunks)
+                else: 
+                    return result
        

            # [OPTIONAL] ADD TO CACHE
            if litellm.caching or litellm.caching_with_models or litellm.cache != None: # user init a cache object
                litellm.cache.add_cache(result, *args, **kwargs)
-            
-            # [OPTIONAL] Return LiteLLM call_id
-            if litellm.use_client == True:
-                result['litellm_call_id'] = litellm_call_id

            # LOG SUCCESS - handle streaming success logging in the _next_ object, remove `handle_success` once it's deprecated
            logging_obj.success_handler(result, start_time, end_time)