mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
fix(utils.py): add response ms for async calls
This commit is contained in:
parent
898f15af2b
commit
381fdcd37b
2 changed files with 5 additions and 2 deletions
|
@ -47,6 +47,7 @@ def test_async_response_openai():
|
||||||
try:
|
try:
|
||||||
response = await acompletion(model="gpt-3.5-turbo", messages=messages, timeout=5)
|
response = await acompletion(model="gpt-3.5-turbo", messages=messages, timeout=5)
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
|
print(f"response ms: {response._response_ms}")
|
||||||
except litellm.Timeout as e:
|
except litellm.Timeout as e:
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -55,7 +56,7 @@ def test_async_response_openai():
|
||||||
|
|
||||||
asyncio.run(test_get_response())
|
asyncio.run(test_get_response())
|
||||||
|
|
||||||
# test_async_response_openai()
|
test_async_response_openai()
|
||||||
|
|
||||||
def test_async_response_azure():
|
def test_async_response_azure():
|
||||||
import asyncio
|
import asyncio
|
||||||
|
@ -160,4 +161,4 @@ def test_get_response_non_openai_streaming():
|
||||||
return response
|
return response
|
||||||
asyncio.run(test_async_call())
|
asyncio.run(test_async_call())
|
||||||
|
|
||||||
test_get_response_non_openai_streaming()
|
# test_get_response_non_openai_streaming()
|
|
@ -1370,6 +1370,8 @@ def client(original_function):
|
||||||
# LOG SUCCESS - handle streaming success logging in the _next_ object, remove `handle_success` once it's deprecated
|
# LOG SUCCESS - handle streaming success logging in the _next_ object, remove `handle_success` once it's deprecated
|
||||||
threading.Thread(target=logging_obj.success_handler, args=(result, start_time, end_time)).start()
|
threading.Thread(target=logging_obj.success_handler, args=(result, start_time, end_time)).start()
|
||||||
# RETURN RESULT
|
# RETURN RESULT
|
||||||
|
if isinstance(result, ModelResponse):
|
||||||
|
result._response_ms = (end_time - start_time).total_seconds() * 1000 # return response latency in ms like openai
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
call_type = original_function.__name__
|
call_type = original_function.__name__
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue