diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 8fcdc9f255..2493524ea6 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -136,35 +136,35 @@ def streaming_format_tests(idx, chunk): print(f"extracted chunk: {extracted_chunk}") return extracted_chunk, finished -def test_completion_cohere_stream(): -# this is a flaky test due to the cohere API endpoint being unstable - try: - messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - { - "role": "user", - "content": "how does a court case get to the Supreme Court?", - }, - ] - response = completion( - model="command-nightly", messages=messages, stream=True, max_tokens=50, - ) - complete_response = "" - # Add any assertions here to check the response - has_finish_reason = False - for idx, chunk in enumerate(response): - chunk, finished = streaming_format_tests(idx, chunk) - has_finish_reason = finished - if finished: - break - complete_response += chunk - if has_finish_reason is False: - raise Exception("Finish reason not in final chunk") - if complete_response.strip() == "": - raise Exception("Empty response received") - print(f"completion_response: {complete_response}") - except Exception as e: - pytest.fail(f"Error occurred: {e}") +# def test_completion_cohere_stream(): +# # this is a flaky test due to the cohere API endpoint being unstable +# try: +# messages = [ +# {"role": "system", "content": "You are a helpful assistant."}, +# { +# "role": "user", +# "content": "how does a court case get to the Supreme Court?", +# }, +# ] +# response = completion( +# model="command-nightly", messages=messages, stream=True, max_tokens=50, +# ) +# complete_response = "" +# # Add any assertions here to check the response +# has_finish_reason = False +# for idx, chunk in enumerate(response): +# chunk, finished = streaming_format_tests(idx, chunk) +# has_finish_reason = finished +# if finished: +# break +# complete_response += chunk +# if has_finish_reason is False: +# raise Exception("Finish reason not in final chunk") +# if complete_response.strip() == "": +# raise Exception("Empty response received") +# print(f"completion_response: {complete_response}") +# except Exception as e: +# pytest.fail(f"Error occurred: {e}") # test_completion_cohere_stream() @@ -493,7 +493,7 @@ def test_completion_claude_stream_bad_key(): pytest.fail(f"Error occurred: {e}") -test_completion_claude_stream_bad_key() +# test_completion_claude_stream_bad_key() # test_completion_replicate_stream() # def test_completion_vertexai_stream(): @@ -767,8 +767,6 @@ def ai21_completion_call_bad_key(): if complete_response.strip() == "": raise Exception("Empty response received") print(f"completion_response: {complete_response}") - except Bad as e: - pass except: pytest.fail(f"error occurred: {traceback.format_exc()}") @@ -848,7 +846,7 @@ def test_openai_chat_completion_call(): print(f"error occurred: {traceback.format_exc()}") pass -test_openai_chat_completion_call() +# test_openai_chat_completion_call() def test_openai_chat_completion_complete_response_call(): try: diff --git a/litellm/utils.py b/litellm/utils.py index a5928d9576..630cb3b9f5 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1108,7 +1108,7 @@ def client(original_function): if cached_result != None: print_verbose(f"Cache Hit!") call_type = original_function.__name__ - if call_type == CallTypes.completion.value: + if call_type == CallTypes.completion.value and isinstance(cached_result, dict): return convert_to_model_response_object(response_object=cached_result, model_response_object=ModelResponse()) else: return cached_result