mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
fix(ollama_chat.py): fix ollama chat completion token counting
This commit is contained in:
parent
47bed68c7f
commit
2e3748e6eb
2 changed files with 6 additions and 5 deletions
|
@ -320,11 +320,15 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
|||
model_response["choices"][0]["message"] = message
|
||||
else:
|
||||
model_response["choices"][0]["message"] = response_json["message"]
|
||||
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "ollama/" + data["model"]
|
||||
model_response["model"] = "ollama_chat/" + data["model"]
|
||||
prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=data["messages"])) # type: ignore
|
||||
completion_tokens = response_json.get(
|
||||
"eval_count", litellm.token_counter(text=response_json["message"])
|
||||
"eval_count",
|
||||
litellm.token_counter(
|
||||
text=response_json["message"]["content"], count_response_tokens=True
|
||||
),
|
||||
)
|
||||
model_response["usage"] = litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
|
|
|
@ -983,9 +983,6 @@ class Logging:
|
|||
verbose_logger.debug(
|
||||
f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n"
|
||||
)
|
||||
verbose_logger.debug(
|
||||
f"Logging Details Post-API Call: LiteLLM Params: {self.model_call_details}"
|
||||
)
|
||||
if self.logger_fn and callable(self.logger_fn):
|
||||
try:
|
||||
self.logger_fn(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue