diff --git a/litellm/tests/test_async_fn.py b/litellm/tests/test_async_fn.py index cd459dc1d..920b7524e 100644 --- a/litellm/tests/test_async_fn.py +++ b/litellm/tests/test_async_fn.py @@ -47,6 +47,7 @@ def test_async_response_openai(): try: response = await acompletion(model="gpt-3.5-turbo", messages=messages, timeout=5) print(f"response: {response}") + print(f"response ms: {response._response_ms}") except litellm.Timeout as e: pass except Exception as e: @@ -55,7 +56,7 @@ def test_async_response_openai(): asyncio.run(test_get_response()) -# test_async_response_openai() +test_async_response_openai() def test_async_response_azure(): import asyncio @@ -160,4 +161,4 @@ def test_get_response_non_openai_streaming(): return response asyncio.run(test_async_call()) -test_get_response_non_openai_streaming() \ No newline at end of file +# test_get_response_non_openai_streaming() \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index ac8c9d8d2..10601b653 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1370,6 +1370,8 @@ def client(original_function): # LOG SUCCESS - handle streaming success logging in the _next_ object, remove `handle_success` once it's deprecated threading.Thread(target=logging_obj.success_handler, args=(result, start_time, end_time)).start() # RETURN RESULT + if isinstance(result, ModelResponse): + result._response_ms = (end_time - start_time).total_seconds() * 1000 # return response latency in ms like openai return result except Exception as e: call_type = original_function.__name__