mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
Merge branch 'main' into litellm_reset_key_budget
This commit is contained in:
commit
9784d03d65
13 changed files with 492 additions and 78 deletions
|
@ -1067,7 +1067,6 @@ class Logging:
|
|||
## if model in model cost map - log the response cost
|
||||
## else set cost to None
|
||||
verbose_logger.debug(f"Model={self.model}; result={result}")
|
||||
verbose_logger.debug(f"self.stream: {self.stream}")
|
||||
if (
|
||||
result is not None
|
||||
and (
|
||||
|
@ -1109,6 +1108,12 @@ class Logging:
|
|||
self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
|
||||
):
|
||||
verbose_logger.debug(f"Logging Details LiteLLM-Success Call")
|
||||
start_time, end_time, result = self._success_handler_helper_fn(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
result=result,
|
||||
cache_hit=cache_hit,
|
||||
)
|
||||
# print(f"original response in success handler: {self.model_call_details['original_response']}")
|
||||
try:
|
||||
verbose_logger.debug(f"success callbacks: {litellm.success_callback}")
|
||||
|
@ -1124,6 +1129,8 @@ class Logging:
|
|||
complete_streaming_response = litellm.stream_chunk_builder(
|
||||
self.sync_streaming_chunks,
|
||||
messages=self.model_call_details.get("messages", None),
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
)
|
||||
except:
|
||||
complete_streaming_response = None
|
||||
|
@ -1137,13 +1144,19 @@ class Logging:
|
|||
self.model_call_details[
|
||||
"complete_streaming_response"
|
||||
] = complete_streaming_response
|
||||
try:
|
||||
self.model_call_details["response_cost"] = litellm.completion_cost(
|
||||
completion_response=complete_streaming_response,
|
||||
)
|
||||
verbose_logger.debug(
|
||||
f"Model={self.model}; cost={self.model_call_details['response_cost']}"
|
||||
)
|
||||
except litellm.NotFoundError as e:
|
||||
verbose_logger.debug(
|
||||
f"Model={self.model} not found in completion cost map."
|
||||
)
|
||||
self.model_call_details["response_cost"] = None
|
||||
|
||||
start_time, end_time, result = self._success_handler_helper_fn(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
result=result,
|
||||
cache_hit=cache_hit,
|
||||
)
|
||||
for callback in litellm.success_callback:
|
||||
try:
|
||||
if callback == "lite_debugger":
|
||||
|
@ -1487,14 +1500,27 @@ class Logging:
|
|||
end_time=end_time,
|
||||
)
|
||||
if callable(callback): # custom logger functions
|
||||
await customLogger.async_log_event(
|
||||
kwargs=self.model_call_details,
|
||||
response_obj=result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
print_verbose=print_verbose,
|
||||
callback_func=callback,
|
||||
)
|
||||
if self.stream:
|
||||
if "complete_streaming_response" in self.model_call_details:
|
||||
await customLogger.async_log_event(
|
||||
kwargs=self.model_call_details,
|
||||
response_obj=self.model_call_details[
|
||||
"complete_streaming_response"
|
||||
],
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
print_verbose=print_verbose,
|
||||
callback_func=callback,
|
||||
)
|
||||
else:
|
||||
await customLogger.async_log_event(
|
||||
kwargs=self.model_call_details,
|
||||
response_obj=result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
print_verbose=print_verbose,
|
||||
callback_func=callback,
|
||||
)
|
||||
if callback == "dynamodb":
|
||||
global dynamoLogger
|
||||
if dynamoLogger is None:
|
||||
|
@ -2915,17 +2941,25 @@ def cost_per_token(
|
|||
)
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
elif model_with_provider in model_cost_ref:
|
||||
print_verbose(f"Looking up model={model_with_provider} in model_cost_map")
|
||||
verbose_logger.debug(
|
||||
f"Looking up model={model_with_provider} in model_cost_map"
|
||||
)
|
||||
verbose_logger.debug(
|
||||
f"applying cost={model_cost_ref[model_with_provider]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
|
||||
)
|
||||
prompt_tokens_cost_usd_dollar = (
|
||||
model_cost_ref[model_with_provider]["input_cost_per_token"] * prompt_tokens
|
||||
)
|
||||
verbose_logger.debug(
|
||||
f"applying cost={model_cost_ref[model_with_provider]['output_cost_per_token']} for completion_tokens={completion_tokens}"
|
||||
)
|
||||
completion_tokens_cost_usd_dollar = (
|
||||
model_cost_ref[model_with_provider]["output_cost_per_token"]
|
||||
* completion_tokens
|
||||
)
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
elif "ft:gpt-3.5-turbo" in model:
|
||||
print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
||||
verbose_logger.debug(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
|
||||
# fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
|
||||
prompt_tokens_cost_usd_dollar = (
|
||||
model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
|
||||
|
@ -2936,17 +2970,23 @@ def cost_per_token(
|
|||
)
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
elif model in litellm.azure_llms:
|
||||
print_verbose(f"Cost Tracking: {model} is an Azure LLM")
|
||||
verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM")
|
||||
model = litellm.azure_llms[model]
|
||||
verbose_logger.debug(
|
||||
f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
|
||||
)
|
||||
prompt_tokens_cost_usd_dollar = (
|
||||
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
||||
)
|
||||
verbose_logger.debug(
|
||||
f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}"
|
||||
)
|
||||
completion_tokens_cost_usd_dollar = (
|
||||
model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
||||
)
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
elif model in litellm.azure_embedding_models:
|
||||
print_verbose(f"Cost Tracking: {model} is an Azure Embedding Model")
|
||||
verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model")
|
||||
model = litellm.azure_embedding_models[model]
|
||||
prompt_tokens_cost_usd_dollar = (
|
||||
model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
||||
|
@ -7061,6 +7101,7 @@ class CustomStreamWrapper:
|
|||
self._hidden_params = {
|
||||
"model_id": (_model_info.get("id", None))
|
||||
} # returned as x-litellm-model-id response header in proxy
|
||||
self.response_id = None
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
@ -7633,6 +7674,10 @@ class CustomStreamWrapper:
|
|||
|
||||
def chunk_creator(self, chunk):
|
||||
model_response = ModelResponse(stream=True, model=self.model)
|
||||
if self.response_id is not None:
|
||||
model_response.id = self.response_id
|
||||
else:
|
||||
self.response_id = model_response.id
|
||||
model_response._hidden_params["custom_llm_provider"] = self.custom_llm_provider
|
||||
model_response.choices = [StreamingChoices()]
|
||||
model_response.choices[0].finish_reason = None
|
||||
|
@ -7752,10 +7797,8 @@ class CustomStreamWrapper:
|
|||
]
|
||||
self.sent_last_chunk = True
|
||||
elif self.custom_llm_provider == "sagemaker":
|
||||
print_verbose(f"ENTERS SAGEMAKER STREAMING")
|
||||
new_chunk = next(self.completion_stream)
|
||||
|
||||
completion_obj["content"] = new_chunk
|
||||
print_verbose(f"ENTERS SAGEMAKER STREAMING for chunk {chunk}")
|
||||
completion_obj["content"] = chunk
|
||||
elif self.custom_llm_provider == "petals":
|
||||
if len(self.completion_stream) == 0:
|
||||
if self.sent_last_chunk:
|
||||
|
@ -7874,7 +7917,7 @@ class CustomStreamWrapper:
|
|||
completion_obj["role"] = "assistant"
|
||||
self.sent_first_chunk = True
|
||||
model_response.choices[0].delta = Delta(**completion_obj)
|
||||
print_verbose(f"model_response: {model_response}")
|
||||
print_verbose(f"returning model_response: {model_response}")
|
||||
return model_response
|
||||
else:
|
||||
return
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue