fix(sagemaker.py): debug streaming

2023-12-12 12:19:39 -08:00 · 2023-12-12 12:19:39 -08:00 · bdf29ca71f
commit bdf29ca71f
parent a251a52717
3 changed files with 4 additions and 6 deletions
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -791,11 +791,6 @@ def data_generator(response):
 async def async_data_generator(response, user_api_key_dict):
    print_verbose("inside generator")
    async for chunk in response:
        # try:
        #     await proxy_logging_obj.pre_call_hook(user_api_key_dict=user_api_key_dict, data=None, call_type="completion")   
        # except Exception as e:
        #     print(f"An exception occurred - {str(e)}")
        print_verbose(f"returned chunk: {chunk}")
        try:
            yield f"data: {json.dumps(chunk.dict())}\n\n"
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -1084,7 +1084,7 @@ def test_completion_chat_sagemaker():
        assert len(complete_response) > 0
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
-# test_completion_chat_sagemaker()
+test_completion_chat_sagemaker()
 def test_completion_chat_sagemaker_mistral(): 
    try: 
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -5554,6 +5554,7 @@ class CustomStreamWrapper:
                    model_response.choices[0].finish_reason = response_obj["finish_reason"]
                    self.sent_last_chunk = True
            elif self.custom_llm_provider == "sagemaker":
                print_verbose(f"ENTERS SAGEMAKER STREAMING")
                if len(self.completion_stream)==0:
                    if self.sent_last_chunk: 
                        raise StopIteration
@ -5561,6 +5562,7 @@ class CustomStreamWrapper:
                        model_response.choices[0].finish_reason = "stop"
                        self.sent_last_chunk = True
                new_chunk = self.completion_stream
                print_verbose(f"sagemaker chunk: {new_chunk}")
                completion_obj["content"] = new_chunk
                self.completion_stream = self.completion_stream[len(self.completion_stream):]
            elif self.custom_llm_provider == "petals":
@ -5723,6 +5725,7 @@ class CustomStreamWrapper:
                # example - boto3 bedrock llms
                processed_chunk = next(self)
                asyncio.create_task(self.logging_obj.async_success_handler(processed_chunk,))
                print_verbose(f"PROCESSED CHUNK IN __ANEXT__: {processed_chunk}")
                return processed_chunk
        except StopAsyncIteration:
            raise