fix(ollama_chat.py): fix ollama chat completion token counting

This commit is contained in:
Krrish Dholakia 2024-02-06 07:22:54 -08:00
parent 47bed68c7f
commit 2e3748e6eb
2 changed files with 6 additions and 5 deletions

View file

@ -320,11 +320,15 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
model_response["choices"][0]["message"] = message
else:
model_response["choices"][0]["message"] = response_json["message"]
model_response["created"] = int(time.time())
model_response["model"] = "ollama/" + data["model"]
model_response["model"] = "ollama_chat/" + data["model"]
prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=data["messages"])) # type: ignore
completion_tokens = response_json.get(
"eval_count", litellm.token_counter(text=response_json["message"])
"eval_count",
litellm.token_counter(
text=response_json["message"]["content"], count_response_tokens=True
),
)
model_response["usage"] = litellm.Usage(
prompt_tokens=prompt_tokens,

View file

@ -983,9 +983,6 @@ class Logging:
verbose_logger.debug(
f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n"
)
verbose_logger.debug(
f"Logging Details Post-API Call: LiteLLM Params: {self.model_call_details}"
)
if self.logger_fn and callable(self.logger_fn):
try:
self.logger_fn(