diff --git a/litellm/llms/aleph_alpha.py b/litellm/llms/aleph_alpha.py index 0e83b76a7..090262461 100644 --- a/litellm/llms/aleph_alpha.py +++ b/litellm/llms/aleph_alpha.py @@ -262,11 +262,9 @@ def completion( model_response["created"] = time.time() model_response["model"] = model - model_response["usage"] = { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": prompt_tokens + completion_tokens, - } + model_response.usage.completion_tokens = completion_tokens + model_response.usage.prompt_tokens = prompt_tokens + model_response.usage.total_tokens = prompt_tokens + completion_tokens return model_response def embedding(): diff --git a/litellm/llms/baseten.py b/litellm/llms/baseten.py index aecacd84f..05abb0005 100644 --- a/litellm/llms/baseten.py +++ b/litellm/llms/baseten.py @@ -136,11 +136,9 @@ def completion( model_response["created"] = time.time() model_response["model"] = model - model_response["usage"] = { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": prompt_tokens + completion_tokens, - } + model_response.usage.completion_tokens = completion_tokens + model_response.usage.prompt_tokens = prompt_tokens + model_response.usage.total_tokens = prompt_tokens + completion_tokens return model_response def embedding(): diff --git a/litellm/llms/cohere.py b/litellm/llms/cohere.py index cd6032c56..c5fb4cf56 100644 --- a/litellm/llms/cohere.py +++ b/litellm/llms/cohere.py @@ -179,11 +179,9 @@ def completion( model_response["created"] = time.time() model_response["model"] = model - model_response["usage"] = { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": prompt_tokens + completion_tokens, - } + model_response.usage.completion_tokens = completion_tokens + model_response.usage.prompt_tokens = prompt_tokens + model_response.usage.total_tokens = prompt_tokens + completion_tokens return model_response def embedding( diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py index b3c3e5e38..496cbc3c9 100644 --- a/litellm/llms/huggingface_restapi.py +++ b/litellm/llms/huggingface_restapi.py @@ -345,11 +345,9 @@ def completion( model_response["created"] = time.time() model_response["model"] = model - model_response["usage"] = { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": prompt_tokens + completion_tokens, - } + model_response.usage.completion_tokens = completion_tokens + model_response.usage.prompt_tokens = prompt_tokens + model_response.usage.total_tokens = prompt_tokens + completion_tokens return model_response diff --git a/litellm/llms/nlp_cloud.py b/litellm/llms/nlp_cloud.py index b12c23ff5..a4647bc08 100644 --- a/litellm/llms/nlp_cloud.py +++ b/litellm/llms/nlp_cloud.py @@ -171,11 +171,9 @@ def completion( model_response["created"] = time.time() model_response["model"] = model - model_response["usage"] = { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": prompt_tokens + completion_tokens, - } + model_response.usage.completion_tokens = completion_tokens + model_response.usage.prompt_tokens = prompt_tokens + model_response.usage.total_tokens = prompt_tokens + completion_tokens return model_response def embedding(): diff --git a/litellm/llms/oobabooga.py b/litellm/llms/oobabooga.py index e49eba422..74f3957be 100644 --- a/litellm/llms/oobabooga.py +++ b/litellm/llms/oobabooga.py @@ -111,11 +111,9 @@ def completion( model_response["created"] = time.time() model_response["model"] = model - model_response["usage"] = { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": prompt_tokens + completion_tokens, - } + model_response.usage.completion_tokens = completion_tokens + model_response.usage.prompt_tokens = prompt_tokens + model_response.usage.total_tokens = prompt_tokens + completion_tokens return model_response def embedding(): diff --git a/litellm/llms/replicate.py b/litellm/llms/replicate.py index 0912af5c0..afa56d978 100644 --- a/litellm/llms/replicate.py +++ b/litellm/llms/replicate.py @@ -240,11 +240,9 @@ def completion( prompt_tokens = len(encoding.encode(prompt)) completion_tokens = len(encoding.encode(model_response["choices"][0]["message"].get("content", ""))) model_response["model"] = "replicate/" + model - model_response["usage"] = { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": prompt_tokens + completion_tokens, - } + model_response.usage.completion_tokens = completion_tokens + model_response.usage.prompt_tokens = prompt_tokens + model_response.usage.total_tokens = prompt_tokens + completion_tokens return model_response diff --git a/litellm/llms/together_ai.py b/litellm/llms/together_ai.py index 9fc48b4f6..f49cd13b7 100644 --- a/litellm/llms/together_ai.py +++ b/litellm/llms/together_ai.py @@ -175,11 +175,9 @@ def completion( model_response.choices[0].finish_reason = completion_response["output"]["choices"][0]["finish_reason"] model_response["created"] = time.time() model_response["model"] = model - model_response["usage"] = { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": prompt_tokens + completion_tokens, - } + model_response.usage.completion_tokens = completion_tokens + model_response.usage.prompt_tokens = prompt_tokens + model_response.usage.total_tokens = prompt_tokens + completion_tokens return model_response def embedding(): diff --git a/litellm/llms/vllm.py b/litellm/llms/vllm.py index 379d54ae8..7519c381f 100644 --- a/litellm/llms/vllm.py +++ b/litellm/llms/vllm.py @@ -90,11 +90,9 @@ def completion( model_response["created"] = time.time() model_response["model"] = model - model_response["usage"] = { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": prompt_tokens + completion_tokens, - } + model_response.usage.completion_tokens = completion_tokens + model_response.usage.prompt_tokens = prompt_tokens + model_response.usage.total_tokens = prompt_tokens + completion_tokens return model_response def batch_completions( @@ -172,11 +170,9 @@ def batch_completions( model_response["created"] = time.time() model_response["model"] = model - model_response["usage"] = { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": prompt_tokens + completion_tokens, - } + model_response.usage.completion_tokens = completion_tokens + model_response.usage.prompt_tokens = prompt_tokens + model_response.usage.total_tokens = prompt_tokens + completion_tokens final_outputs.append(model_response) return final_outputs