mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
fix(ollama_chat.py): use tiktoken as backup for prompt token counting
This commit is contained in:
parent
58ba7fa1b2
commit
d451aaca0c
1 changed files with 2 additions and 2 deletions
|
@ -220,7 +220,7 @@ def get_ollama_response(
|
|||
model_response["choices"][0]["message"] = response_json["message"]
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "ollama/" + model
|
||||
prompt_tokens = response_json["prompt_eval_count"] # type: ignore
|
||||
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt))) # type: ignore
|
||||
completion_tokens = response_json["eval_count"]
|
||||
model_response["usage"] = litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
|
@ -320,7 +320,7 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
|||
model_response["choices"][0]["message"] = response_json["message"]
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "ollama/" + data["model"]
|
||||
prompt_tokens = response_json["prompt_eval_count"] # type: ignore
|
||||
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt))) # type: ignore
|
||||
completion_tokens = response_json["eval_count"]
|
||||
model_response["usage"] = litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue