mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
(fix) hf calculating usage non blocking
This commit is contained in:
parent
4e1885734a
commit
df57e9247a
1 changed files with 16 additions and 6 deletions
|
@ -333,15 +333,25 @@ def completion(
|
||||||
"content"
|
"content"
|
||||||
] = completion_response[0]["generated_text"]
|
] = completion_response[0]["generated_text"]
|
||||||
## CALCULATING USAGE
|
## CALCULATING USAGE
|
||||||
prompt_tokens = len(
|
prompt_tokens = 0
|
||||||
encoding.encode(input_text)
|
try:
|
||||||
) ##[TODO] use the llama2 tokenizer here
|
prompt_tokens = len(
|
||||||
|
encoding.encode(input_text)
|
||||||
|
) ##[TODO] use the llama2 tokenizer here
|
||||||
|
except:
|
||||||
|
# this should remain non blocking we should not block a response returning if calculating usage fails
|
||||||
|
pass
|
||||||
print_verbose(f'output: {model_response["choices"][0]["message"]}')
|
print_verbose(f'output: {model_response["choices"][0]["message"]}')
|
||||||
output_text = model_response["choices"][0]["message"].get("content", "")
|
output_text = model_response["choices"][0]["message"].get("content", "")
|
||||||
if output_text is not None and len(output_text) > 0:
|
if output_text is not None and len(output_text) > 0:
|
||||||
completion_tokens = len(
|
completion_tokens = 0
|
||||||
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
try:
|
||||||
) ##[TODO] use the llama2 tokenizer here
|
completion_tokens = len(
|
||||||
|
encoding.encode(model_response["choices"][0]["message"].get("content", ""))
|
||||||
|
) ##[TODO] use the llama2 tokenizer here
|
||||||
|
except:
|
||||||
|
# this should remain non blocking we should not block a response returning if calculating usage fails
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
completion_tokens = 0
|
completion_tokens = 0
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue