mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
LiteLLM Minor Fixes + Improvements (#5474)
* feat(proxy/_types.py): add lago billing to callbacks ui Closes https://github.com/BerriAI/litellm/issues/5472 * fix(anthropic.py): return anthropic prompt caching information Fixes https://github.com/BerriAI/litellm/issues/5364 * feat(bedrock/chat.py): support 'json_schema' for bedrock models Closes https://github.com/BerriAI/litellm/issues/5434 * fix(bedrock/embed/embeddings.py): support async embeddings for amazon titan models * fix: linting fixes * fix: handle key errors * fix(bedrock/chat.py): fix bedrock ai21 streaming object * feat(bedrock/embed): support bedrock embedding optional params * fix(databricks.py): fix usage chunk * fix(internal_user_endpoints.py): apply internal user defaults, if user role updated Fixes issue where user update wouldn't apply defaults * feat(slack_alerting.py): provide multiple slack channels for a given alert type multiple channels might be interested in receiving an alert for a given type * docs(alerting.md): add multiple channel alerting to docs
This commit is contained in:
parent
02f288a8a3
commit
f9e6507cd1
22 changed files with 720 additions and 209 deletions
|
@ -5431,6 +5431,9 @@ def stream_chunk_builder(
|
|||
# # Update usage information if needed
|
||||
prompt_tokens = 0
|
||||
completion_tokens = 0
|
||||
## anthropic prompt caching information ##
|
||||
cache_creation_input_tokens: Optional[int] = None
|
||||
cache_read_input_tokens: Optional[int] = None
|
||||
for chunk in chunks:
|
||||
usage_chunk: Optional[Usage] = None
|
||||
if "usage" in chunk:
|
||||
|
@ -5442,6 +5445,13 @@ def stream_chunk_builder(
|
|||
prompt_tokens = usage_chunk.get("prompt_tokens", 0) or 0
|
||||
if "completion_tokens" in usage_chunk:
|
||||
completion_tokens = usage_chunk.get("completion_tokens", 0) or 0
|
||||
if "cache_creation_input_tokens" in usage_chunk:
|
||||
cache_creation_input_tokens = usage_chunk.get(
|
||||
"cache_creation_input_tokens"
|
||||
)
|
||||
if "cache_read_input_tokens" in usage_chunk:
|
||||
cache_read_input_tokens = usage_chunk.get("cache_read_input_tokens")
|
||||
|
||||
try:
|
||||
response["usage"]["prompt_tokens"] = prompt_tokens or token_counter(
|
||||
model=model, messages=messages
|
||||
|
@ -5460,6 +5470,13 @@ def stream_chunk_builder(
|
|||
response["usage"]["prompt_tokens"] + response["usage"]["completion_tokens"]
|
||||
)
|
||||
|
||||
if cache_creation_input_tokens is not None:
|
||||
response["usage"][
|
||||
"cache_creation_input_tokens"
|
||||
] = cache_creation_input_tokens
|
||||
if cache_read_input_tokens is not None:
|
||||
response["usage"]["cache_read_input_tokens"] = cache_read_input_tokens
|
||||
|
||||
return convert_to_model_response_object(
|
||||
response_object=response,
|
||||
model_response_object=model_response,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue