forked from phoenix/litellm-mirror
LiteLLM Minor Fixes & Improvements (10/09/2024) (#6139)
* fix(utils.py): don't return 'none' response headers Fixes https://github.com/BerriAI/litellm/issues/6123 * fix(vertex_and_google_ai_studio_gemini.py): support parsing out additional properties and strict value for tool calls Fixes https://github.com/BerriAI/litellm/issues/6136 * fix(cost_calculator.py): set default character value to none Fixes https://github.com/BerriAI/litellm/issues/6133#issuecomment-2403290196 * fix(google.py): fix cost per token / cost per char conversion Fixes https://github.com/BerriAI/litellm/issues/6133#issuecomment-2403370287 * build(model_prices_and_context_window.json): update gemini pricing Fixes https://github.com/BerriAI/litellm/issues/6133 * build(model_prices_and_context_window.json): update gemini pricing * fix(litellm_logging.py): fix streaming caching logging when 'turn_off_message_logging' enabled Stores unredacted response in cache * build(model_prices_and_context_window.json): update gemini-1.5-flash pricing * fix(cost_calculator.py): fix default prompt_character count logic Fixes error in gemini cost calculation * fix(cost_calculator.py): fix cost calc for tts models
This commit is contained in:
parent
60baa65e0e
commit
6005450c8f
16 changed files with 788 additions and 534 deletions
|
@ -901,7 +901,9 @@ class Logging:
|
|||
complete_streaming_response = None
|
||||
else:
|
||||
self.sync_streaming_chunks.append(result)
|
||||
|
||||
_caching_complete_streaming_response: Optional[
|
||||
Union[ModelResponse, TextCompletionResponse]
|
||||
] = None
|
||||
if complete_streaming_response is not None:
|
||||
verbose_logger.debug(
|
||||
"Logging Details LiteLLM-Success Call streaming complete"
|
||||
|
@ -909,6 +911,9 @@ class Logging:
|
|||
self.model_call_details["complete_streaming_response"] = (
|
||||
complete_streaming_response
|
||||
)
|
||||
_caching_complete_streaming_response = copy.deepcopy(
|
||||
complete_streaming_response
|
||||
)
|
||||
self.model_call_details["response_cost"] = (
|
||||
self._response_cost_calculator(result=complete_streaming_response)
|
||||
)
|
||||
|
@ -937,6 +942,20 @@ class Logging:
|
|||
else:
|
||||
callbacks = litellm.success_callback
|
||||
|
||||
## STREAMING CACHING ##
|
||||
if "cache" in callbacks and litellm.cache is not None:
|
||||
# this only logs streaming once, complete_streaming_response exists i.e when stream ends
|
||||
print_verbose("success_callback: reaches cache for logging!")
|
||||
kwargs = self.model_call_details
|
||||
if self.stream and _caching_complete_streaming_response is not None:
|
||||
print_verbose(
|
||||
"success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
|
||||
)
|
||||
result = _caching_complete_streaming_response
|
||||
# only add to cache once we have a complete streaming response
|
||||
litellm.cache.add_cache(result, **kwargs)
|
||||
|
||||
## REDACT MESSAGES ##
|
||||
result = redact_message_input_output_from_logging(
|
||||
model_call_details=(
|
||||
self.model_call_details
|
||||
|
@ -1302,23 +1321,6 @@ class Logging:
|
|||
end_time=end_time,
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
if callback == "cache" and litellm.cache is not None:
|
||||
# this only logs streaming once, complete_streaming_response exists i.e when stream ends
|
||||
print_verbose("success_callback: reaches cache for logging!")
|
||||
kwargs = self.model_call_details
|
||||
if self.stream:
|
||||
if "complete_streaming_response" not in kwargs:
|
||||
print_verbose(
|
||||
f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
|
||||
)
|
||||
pass
|
||||
else:
|
||||
print_verbose(
|
||||
"success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
|
||||
)
|
||||
result = kwargs["complete_streaming_response"]
|
||||
# only add to cache once we have a complete streaming response
|
||||
litellm.cache.add_cache(result, **kwargs)
|
||||
if callback == "athina" and athinaLogger is not None:
|
||||
deep_copy = {}
|
||||
for k, v in self.model_call_details.items():
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue