mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
fix(ollama_chat.py): fix default token counting for ollama chat
This commit is contained in:
parent
3e59a02dfb
commit
43f139fafd
2 changed files with 15 additions and 6 deletions
|
@ -220,8 +220,10 @@ def get_ollama_response(
|
||||||
model_response["choices"][0]["message"] = response_json["message"]
|
model_response["choices"][0]["message"] = response_json["message"]
|
||||||
model_response["created"] = int(time.time())
|
model_response["created"] = int(time.time())
|
||||||
model_response["model"] = "ollama/" + model
|
model_response["model"] = "ollama/" + model
|
||||||
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt))) # type: ignore
|
prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=messages)) # type: ignore
|
||||||
completion_tokens = response_json["eval_count"]
|
completion_tokens = response_json.get(
|
||||||
|
"eval_count", litellm.token_counter(text=response_json["message"])
|
||||||
|
)
|
||||||
model_response["usage"] = litellm.Usage(
|
model_response["usage"] = litellm.Usage(
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
|
@ -320,8 +322,10 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
||||||
model_response["choices"][0]["message"] = response_json["message"]
|
model_response["choices"][0]["message"] = response_json["message"]
|
||||||
model_response["created"] = int(time.time())
|
model_response["created"] = int(time.time())
|
||||||
model_response["model"] = "ollama/" + data["model"]
|
model_response["model"] = "ollama/" + data["model"]
|
||||||
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt))) # type: ignore
|
prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=data["messages"])) # type: ignore
|
||||||
completion_tokens = response_json["eval_count"]
|
completion_tokens = response_json.get(
|
||||||
|
"eval_count", litellm.token_counter(text=response_json["message"])
|
||||||
|
)
|
||||||
model_response["usage"] = litellm.Usage(
|
model_response["usage"] = litellm.Usage(
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
|
|
|
@ -2872,8 +2872,13 @@ def token_counter(
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"Token Counter - using generic token counter, for model={model}"
|
f"Token Counter - using generic token counter, for model={model}"
|
||||||
)
|
)
|
||||||
enc = tokenizer_json["tokenizer"].encode(text)
|
num_tokens = openai_token_counter(
|
||||||
num_tokens = len(enc)
|
text=text, # type: ignore
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=messages,
|
||||||
|
is_tool_call=is_tool_call,
|
||||||
|
count_response_tokens=count_response_tokens,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
num_tokens = len(encoding.encode(text)) # type: ignore
|
num_tokens = len(encoding.encode(text)) # type: ignore
|
||||||
return num_tokens
|
return num_tokens
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue