mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
feat(utils.py): emit response cost as part of logs
This commit is contained in:
parent
32a62ac0e9
commit
e4fda7c840
3 changed files with 15 additions and 34 deletions
|
@ -584,13 +584,8 @@ async def track_cost_callback(
|
||||||
litellm_params = kwargs.get("litellm_params", {}) or {}
|
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||||
proxy_server_request = litellm_params.get("proxy_server_request") or {}
|
proxy_server_request = litellm_params.get("proxy_server_request") or {}
|
||||||
user_id = proxy_server_request.get("body", {}).get("user", None)
|
user_id = proxy_server_request.get("body", {}).get("user", None)
|
||||||
if "complete_streaming_response" in kwargs:
|
if "response_cost" in kwargs:
|
||||||
# for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost
|
response_cost = kwargs["response_cost"]
|
||||||
completion_response = kwargs["complete_streaming_response"]
|
|
||||||
response_cost = litellm.completion_cost(
|
|
||||||
completion_response=completion_response
|
|
||||||
)
|
|
||||||
|
|
||||||
user_api_key = kwargs["litellm_params"]["metadata"].get(
|
user_api_key = kwargs["litellm_params"]["metadata"].get(
|
||||||
"user_api_key", None
|
"user_api_key", None
|
||||||
)
|
)
|
||||||
|
@ -599,31 +594,6 @@ async def track_cost_callback(
|
||||||
"user_api_key_user_id", None
|
"user_api_key_user_id", None
|
||||||
)
|
)
|
||||||
|
|
||||||
verbose_proxy_logger.info(
|
|
||||||
f"streaming response_cost {response_cost}, for user_id {user_id}"
|
|
||||||
)
|
|
||||||
if user_api_key and (
|
|
||||||
prisma_client is not None or custom_db_client is not None
|
|
||||||
):
|
|
||||||
await update_database(
|
|
||||||
token=user_api_key,
|
|
||||||
response_cost=response_cost,
|
|
||||||
user_id=user_id,
|
|
||||||
kwargs=kwargs,
|
|
||||||
completion_response=completion_response,
|
|
||||||
start_time=start_time,
|
|
||||||
end_time=end_time,
|
|
||||||
)
|
|
||||||
elif kwargs["stream"] == False: # for non streaming responses
|
|
||||||
response_cost = litellm.completion_cost(
|
|
||||||
completion_response=completion_response
|
|
||||||
)
|
|
||||||
user_api_key = kwargs["litellm_params"]["metadata"].get(
|
|
||||||
"user_api_key", None
|
|
||||||
)
|
|
||||||
user_id = user_id or kwargs["litellm_params"]["metadata"].get(
|
|
||||||
"user_api_key_user_id", None
|
|
||||||
)
|
|
||||||
verbose_proxy_logger.info(
|
verbose_proxy_logger.info(
|
||||||
f"response_cost {response_cost}, for user_id {user_id}"
|
f"response_cost {response_cost}, for user_id {user_id}"
|
||||||
)
|
)
|
||||||
|
|
|
@ -170,6 +170,7 @@ class CompletionCustomHandler(
|
||||||
)
|
)
|
||||||
assert isinstance(kwargs["additional_args"], (dict, type(None)))
|
assert isinstance(kwargs["additional_args"], (dict, type(None)))
|
||||||
assert isinstance(kwargs["log_event_type"], str)
|
assert isinstance(kwargs["log_event_type"], str)
|
||||||
|
assert isinstance(kwargs["response_cost"], (float, type(None)))
|
||||||
except:
|
except:
|
||||||
print(f"Assertion Error: {traceback.format_exc()}")
|
print(f"Assertion Error: {traceback.format_exc()}")
|
||||||
self.errors.append(traceback.format_exc())
|
self.errors.append(traceback.format_exc())
|
||||||
|
@ -262,6 +263,7 @@ class CompletionCustomHandler(
|
||||||
assert isinstance(kwargs["additional_args"], (dict, type(None)))
|
assert isinstance(kwargs["additional_args"], (dict, type(None)))
|
||||||
assert isinstance(kwargs["log_event_type"], str)
|
assert isinstance(kwargs["log_event_type"], str)
|
||||||
assert kwargs["cache_hit"] is None or isinstance(kwargs["cache_hit"], bool)
|
assert kwargs["cache_hit"] is None or isinstance(kwargs["cache_hit"], bool)
|
||||||
|
assert isinstance(kwargs["response_cost"], (float, type(None)))
|
||||||
except:
|
except:
|
||||||
print(f"Assertion Error: {traceback.format_exc()}")
|
print(f"Assertion Error: {traceback.format_exc()}")
|
||||||
self.errors.append(traceback.format_exc())
|
self.errors.append(traceback.format_exc())
|
||||||
|
@ -545,8 +547,9 @@ async def test_async_chat_bedrock_stream():
|
||||||
|
|
||||||
# asyncio.run(test_async_chat_bedrock_stream())
|
# asyncio.run(test_async_chat_bedrock_stream())
|
||||||
|
|
||||||
# Text Completion
|
# Text Completion
|
||||||
|
|
||||||
|
|
||||||
## Test OpenAI text completion + Async
|
## Test OpenAI text completion + Async
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_async_text_completion_openai_stream():
|
async def test_async_text_completion_openai_stream():
|
||||||
|
@ -585,6 +588,7 @@ async def test_async_text_completion_openai_stream():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"An exception occurred: {str(e)}")
|
pytest.fail(f"An exception occurred: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
# EMBEDDING
|
# EMBEDDING
|
||||||
## Test OpenAI + Async
|
## Test OpenAI + Async
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
|
@ -1064,6 +1064,13 @@ class Logging:
|
||||||
self.model_call_details["log_event_type"] = "successful_api_call"
|
self.model_call_details["log_event_type"] = "successful_api_call"
|
||||||
self.model_call_details["end_time"] = end_time
|
self.model_call_details["end_time"] = end_time
|
||||||
self.model_call_details["cache_hit"] = cache_hit
|
self.model_call_details["cache_hit"] = cache_hit
|
||||||
|
if result is not None and (
|
||||||
|
isinstance(result, ModelResponse)
|
||||||
|
or isinstance(result, EmbeddingResponse)
|
||||||
|
):
|
||||||
|
self.model_call_details["response_cost"] = litellm.completion_cost(
|
||||||
|
completion_response=result,
|
||||||
|
)
|
||||||
|
|
||||||
if litellm.max_budget and self.stream:
|
if litellm.max_budget and self.stream:
|
||||||
time_diff = (end_time - start_time).total_seconds()
|
time_diff = (end_time - start_time).total_seconds()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue