feat(utils.py): emit response cost as part of logs

This commit is contained in:
Krrish Dholakia 2024-01-22 15:53:04 -08:00 committed by ishaan-jaff
parent 32a62ac0e9
commit e4fda7c840
3 changed files with 15 additions and 34 deletions

View file

@ -584,13 +584,8 @@ async def track_cost_callback(
litellm_params = kwargs.get("litellm_params", {}) or {} litellm_params = kwargs.get("litellm_params", {}) or {}
proxy_server_request = litellm_params.get("proxy_server_request") or {} proxy_server_request = litellm_params.get("proxy_server_request") or {}
user_id = proxy_server_request.get("body", {}).get("user", None) user_id = proxy_server_request.get("body", {}).get("user", None)
if "complete_streaming_response" in kwargs: if "response_cost" in kwargs:
# for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost response_cost = kwargs["response_cost"]
completion_response = kwargs["complete_streaming_response"]
response_cost = litellm.completion_cost(
completion_response=completion_response
)
user_api_key = kwargs["litellm_params"]["metadata"].get( user_api_key = kwargs["litellm_params"]["metadata"].get(
"user_api_key", None "user_api_key", None
) )
@ -599,31 +594,6 @@ async def track_cost_callback(
"user_api_key_user_id", None "user_api_key_user_id", None
) )
verbose_proxy_logger.info(
f"streaming response_cost {response_cost}, for user_id {user_id}"
)
if user_api_key and (
prisma_client is not None or custom_db_client is not None
):
await update_database(
token=user_api_key,
response_cost=response_cost,
user_id=user_id,
kwargs=kwargs,
completion_response=completion_response,
start_time=start_time,
end_time=end_time,
)
elif kwargs["stream"] == False: # for non streaming responses
response_cost = litellm.completion_cost(
completion_response=completion_response
)
user_api_key = kwargs["litellm_params"]["metadata"].get(
"user_api_key", None
)
user_id = user_id or kwargs["litellm_params"]["metadata"].get(
"user_api_key_user_id", None
)
verbose_proxy_logger.info( verbose_proxy_logger.info(
f"response_cost {response_cost}, for user_id {user_id}" f"response_cost {response_cost}, for user_id {user_id}"
) )

View file

@ -170,6 +170,7 @@ class CompletionCustomHandler(
) )
assert isinstance(kwargs["additional_args"], (dict, type(None))) assert isinstance(kwargs["additional_args"], (dict, type(None)))
assert isinstance(kwargs["log_event_type"], str) assert isinstance(kwargs["log_event_type"], str)
assert isinstance(kwargs["response_cost"], (float, type(None)))
except: except:
print(f"Assertion Error: {traceback.format_exc()}") print(f"Assertion Error: {traceback.format_exc()}")
self.errors.append(traceback.format_exc()) self.errors.append(traceback.format_exc())
@ -262,6 +263,7 @@ class CompletionCustomHandler(
assert isinstance(kwargs["additional_args"], (dict, type(None))) assert isinstance(kwargs["additional_args"], (dict, type(None)))
assert isinstance(kwargs["log_event_type"], str) assert isinstance(kwargs["log_event_type"], str)
assert kwargs["cache_hit"] is None or isinstance(kwargs["cache_hit"], bool) assert kwargs["cache_hit"] is None or isinstance(kwargs["cache_hit"], bool)
assert isinstance(kwargs["response_cost"], (float, type(None)))
except: except:
print(f"Assertion Error: {traceback.format_exc()}") print(f"Assertion Error: {traceback.format_exc()}")
self.errors.append(traceback.format_exc()) self.errors.append(traceback.format_exc())
@ -545,8 +547,9 @@ async def test_async_chat_bedrock_stream():
# asyncio.run(test_async_chat_bedrock_stream()) # asyncio.run(test_async_chat_bedrock_stream())
# Text Completion # Text Completion
## Test OpenAI text completion + Async ## Test OpenAI text completion + Async
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_async_text_completion_openai_stream(): async def test_async_text_completion_openai_stream():
@ -585,6 +588,7 @@ async def test_async_text_completion_openai_stream():
except Exception as e: except Exception as e:
pytest.fail(f"An exception occurred: {str(e)}") pytest.fail(f"An exception occurred: {str(e)}")
# EMBEDDING # EMBEDDING
## Test OpenAI + Async ## Test OpenAI + Async
@pytest.mark.asyncio @pytest.mark.asyncio

View file

@ -1064,6 +1064,13 @@ class Logging:
self.model_call_details["log_event_type"] = "successful_api_call" self.model_call_details["log_event_type"] = "successful_api_call"
self.model_call_details["end_time"] = end_time self.model_call_details["end_time"] = end_time
self.model_call_details["cache_hit"] = cache_hit self.model_call_details["cache_hit"] = cache_hit
if result is not None and (
isinstance(result, ModelResponse)
or isinstance(result, EmbeddingResponse)
):
self.model_call_details["response_cost"] = litellm.completion_cost(
completion_response=result,
)
if litellm.max_budget and self.stream: if litellm.max_budget and self.stream:
time_diff = (end_time - start_time).total_seconds() time_diff = (end_time - start_time).total_seconds()