fix(ollama_chat.py): fix ollama chat completion token counting

This commit is contained in:
Krrish Dholakia 2024-02-06 07:22:54 -08:00
parent 47bed68c7f
commit 2e3748e6eb
2 changed files with 6 additions and 5 deletions

View file

@ -320,11 +320,15 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
model_response["choices"][0]["message"] = message model_response["choices"][0]["message"] = message
else: else:
model_response["choices"][0]["message"] = response_json["message"] model_response["choices"][0]["message"] = response_json["message"]
model_response["created"] = int(time.time()) model_response["created"] = int(time.time())
model_response["model"] = "ollama/" + data["model"] model_response["model"] = "ollama_chat/" + data["model"]
prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=data["messages"])) # type: ignore prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=data["messages"])) # type: ignore
completion_tokens = response_json.get( completion_tokens = response_json.get(
"eval_count", litellm.token_counter(text=response_json["message"]) "eval_count",
litellm.token_counter(
text=response_json["message"]["content"], count_response_tokens=True
),
) )
model_response["usage"] = litellm.Usage( model_response["usage"] = litellm.Usage(
prompt_tokens=prompt_tokens, prompt_tokens=prompt_tokens,

View file

@ -983,9 +983,6 @@ class Logging:
verbose_logger.debug( verbose_logger.debug(
f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n" f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n"
) )
verbose_logger.debug(
f"Logging Details Post-API Call: LiteLLM Params: {self.model_call_details}"
)
if self.logger_fn and callable(self.logger_fn): if self.logger_fn and callable(self.logger_fn):
try: try:
self.logger_fn( self.logger_fn(