This commit is contained in:
Tensor Templar 2025-04-24 01:02:15 -07:00 committed by GitHub
commit 4c8c3c4bbb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -586,13 +586,28 @@ async def ollama_acompletion(
model_response.created = int(time.time())
model_response.model = "ollama_chat/" + data["model"]
prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=data["messages"])) # type: ignore
completion_tokens = response_json.get(
"eval_count",
litellm.token_counter(
text=response_json["message"]["content"], count_response_tokens=True
),
)
prompt_tokens = response_json.get("prompt_eval_count", 0)
if prompt_tokens == 0: # Only calculate if Ollama doesn't provide it
try:
prompt_tokens = litellm.token_counter(messages=data["messages"])
except (ValueError, TypeError, AttributeError) as e:
verbose_logger.debug(f"Error counting prompt tokens: {str(e)}")
prompt_tokens = 0 # Fallback if token counting fails
completion_tokens = response_json.get("eval_count", 0)
if completion_tokens == 0:
try:
# For function calls, the content might be JSON string, since ollama 5.0
response_text = (
response_json["message"]["content"]
if isinstance(response_json["message"]["content"], str)
else json.dumps(response_json["message"]["content"])
)
completion_tokens = litellm.token_counter(text=response_text, count_response_tokens=True)
except (ValueError, TypeError, KeyError, json.JSONDecodeError) as e:
verbose_logger.debug(f"Error counting completion tokens: {str(e)}")
completion_tokens = 0
setattr(
model_response,
"usage",