mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
(fix) vertex ai - use usage from response
This commit is contained in:
parent
e6a7212d10
commit
739d9e7a78
1 changed files with 37 additions and 10 deletions
|
@ -487,12 +487,25 @@ def completion(
|
|||
total_tokens=response_obj.usage_metadata.total_token_count,
|
||||
)
|
||||
else:
|
||||
prompt_tokens = len(encoding.encode(prompt))
|
||||
completion_tokens = len(
|
||||
encoding.encode(
|
||||
model_response["choices"][0]["message"].get("content", "")
|
||||
# init prompt tokens
|
||||
# this block attempts to get usage from response_obj if it exists, if not it uses the litellm token counter
|
||||
prompt_tokens, completion_tokens, total_tokens = 0, 0, 0
|
||||
if response_obj is not None:
|
||||
if hasattr(response_obj, "usage_metadata") and hasattr(
|
||||
response_obj.usage_metadata, "prompt_token_count"
|
||||
):
|
||||
prompt_tokens = response_obj.usage_metadata.prompt_token_count
|
||||
completion_tokens = (
|
||||
response_obj.usage_metadata.candidates_token_count
|
||||
)
|
||||
else:
|
||||
prompt_tokens = len(encoding.encode(prompt))
|
||||
completion_tokens = len(
|
||||
encoding.encode(
|
||||
model_response["choices"][0]["message"].get("content", "")
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
@ -630,12 +643,26 @@ async def async_completion(
|
|||
total_tokens=response_obj.usage_metadata.total_token_count,
|
||||
)
|
||||
else:
|
||||
prompt_tokens = len(encoding.encode(prompt))
|
||||
completion_tokens = len(
|
||||
encoding.encode(
|
||||
model_response["choices"][0]["message"].get("content", "")
|
||||
# init prompt tokens
|
||||
# this block attempts to get usage from response_obj if it exists, if not it uses the litellm token counter
|
||||
prompt_tokens, completion_tokens, total_tokens = 0, 0, 0
|
||||
if response_obj is not None:
|
||||
if hasattr(response_obj, "usage_metadata") and hasattr(
|
||||
response_obj.usage_metadata, "prompt_token_count"
|
||||
):
|
||||
prompt_tokens = response_obj.usage_metadata.prompt_token_count
|
||||
completion_tokens = (
|
||||
response_obj.usage_metadata.candidates_token_count
|
||||
)
|
||||
else:
|
||||
prompt_tokens = len(encoding.encode(prompt))
|
||||
completion_tokens = len(
|
||||
encoding.encode(
|
||||
model_response["choices"][0]["message"].get("content", "")
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# set usage
|
||||
usage = Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue