fix(utils.py): add response ms for async calls

2025-04-24 18:24:20 +00:00 · 2023-11-21 19:58:52 -08:00 · 2023-11-21 19:58:52 -08:00 · 381fdcd37b
commit 381fdcd37b
parent 898f15af2b
2 changed files with 5 additions and 2 deletions
--- a/litellm/tests/test_async_fn.py
+++ b/litellm/tests/test_async_fn.py
@ -47,6 +47,7 @@ def test_async_response_openai():
        try:
            response = await acompletion(model="gpt-3.5-turbo", messages=messages, timeout=5)
            print(f"response: {response}")
+            print(f"response ms: {response._response_ms}")
        except litellm.Timeout as e: 
            pass
        except Exception as e:
@ -55,7 +56,7 @@ def test_async_response_openai():

    asyncio.run(test_get_response())

-# test_async_response_openai()
+test_async_response_openai()

 def test_async_response_azure():
    import asyncio
@ -160,4 +161,4 @@ def test_get_response_non_openai_streaming():
        return response
    asyncio.run(test_async_call())

-test_get_response_non_openai_streaming()
+# test_get_response_non_openai_streaming()
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1370,6 +1370,8 @@ def client(original_function):
            # LOG SUCCESS - handle streaming success logging in the _next_ object, remove `handle_success` once it's deprecated
            threading.Thread(target=logging_obj.success_handler, args=(result, start_time, end_time)).start()
            # RETURN RESULT
+            if isinstance(result, ModelResponse):
+                result._response_ms = (end_time - start_time).total_seconds() * 1000 # return response latency in ms like openai
            return result
        except Exception as e: 
            call_type = original_function.__name__