diff --git a/dist/litellm-1.12.6.dev3-py3-none-any.whl b/dist/litellm-1.12.6.dev3-py3-none-any.whl new file mode 100644 index 000000000..81290067d Binary files /dev/null and b/dist/litellm-1.12.6.dev3-py3-none-any.whl differ diff --git a/dist/litellm-1.12.6.dev3.tar.gz b/dist/litellm-1.12.6.dev3.tar.gz new file mode 100644 index 000000000..2dbe390eb Binary files /dev/null and b/dist/litellm-1.12.6.dev3.tar.gz differ diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 5c63e00a6..7b9993286 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -1084,7 +1084,52 @@ def test_completion_chat_sagemaker(): assert len(complete_response) > 0 except Exception as e: pytest.fail(f"Error occurred: {e}") -test_completion_chat_sagemaker() +# test_completion_chat_sagemaker() + + +def test_completion_chat_sagemaker(): + try: + messages = [{"role": "user", "content": "Hey, how's it going?"}] + litellm.set_verbose=True + response = completion( + model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4", + messages=messages, + max_tokens=100, + temperature=0.7, + stream=True, + ) + # Add any assertions here to check the response + complete_response = "" + for chunk in response: + complete_response += chunk.choices[0].delta.content or "" + print(f"complete_response: {complete_response}") + assert len(complete_response) > 0 + except Exception as e: + pytest.fail(f"Error occurred: {e}") + +import asyncio +@pytest.mark.asyncio +async def test_completion_chat_sagemaker(): + try: + messages = [{"role": "user", "content": "Hey, how's it going?"}] + litellm.set_verbose=True + response = await litellm.acompletion( + model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4", + messages=messages, + max_tokens=100, + temperature=0.7, + stream=True, + ) + # Add any assertions here to check the response + complete_response = "" + async for chunk in response: + complete_response += chunk.choices[0].delta.content or "" + print(f"complete_response: {complete_response}") + assert len(complete_response) > 0 + except: + pass + +asyncio.run(test_completion_chat_sagemaker()) def test_completion_chat_sagemaker_mistral(): try: diff --git a/litellm/utils.py b/litellm/utils.py index 8f4f76db8..d25bf310a 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -5692,6 +5692,7 @@ class CustomStreamWrapper: if chunk is not None and chunk != b'': print_verbose(f"PROCESSED CHUNK PRE CHUNK CREATOR: {chunk}") response = self.chunk_creator(chunk=chunk) + print_verbose(f"PROCESSED CHUNK POST CHUNK CREATOR: {chunk}") if response is None: continue ## LOGGING @@ -5700,6 +5701,7 @@ class CustomStreamWrapper: except StopIteration: raise # Re-raise StopIteration except Exception as e: + print_verbose(f"HITS AN ERROR: {str(e)}") traceback_exception = traceback.format_exc() # LOG FAILURE - handle streaming failure logging in the _next_ object, remove `handle_failure` once it's deprecated threading.Thread(target=self.logging_obj.failure_handler, args=(e, traceback_exception)).start() @@ -5731,8 +5733,8 @@ class CustomStreamWrapper: # example - boto3 bedrock llms print_verbose(f"ENTERS __NEXT__ LOOP") processed_chunk = next(self) - asyncio.create_task(self.logging_obj.async_success_handler(processed_chunk,)) print_verbose(f"PROCESSED CHUNK IN __ANEXT__: {processed_chunk}") + asyncio.create_task(self.logging_obj.async_success_handler(processed_chunk,)) return processed_chunk except StopAsyncIteration: raise