forked from phoenix/litellm-mirror
(feat) use usage class for model responses for cohere, hf, tg ai, cohere
This commit is contained in:
parent
194f85e92f
commit
63928fa166
9 changed files with 30 additions and 50 deletions
|
@ -262,11 +262,9 @@ def completion(
|
|||
|
||||
model_response["created"] = time.time()
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
}
|
||||
model_response.usage.completion_tokens = completion_tokens
|
||||
model_response.usage.prompt_tokens = prompt_tokens
|
||||
model_response.usage.total_tokens = prompt_tokens + completion_tokens
|
||||
return model_response
|
||||
|
||||
def embedding():
|
||||
|
|
|
@ -136,11 +136,9 @@ def completion(
|
|||
|
||||
model_response["created"] = time.time()
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
}
|
||||
model_response.usage.completion_tokens = completion_tokens
|
||||
model_response.usage.prompt_tokens = prompt_tokens
|
||||
model_response.usage.total_tokens = prompt_tokens + completion_tokens
|
||||
return model_response
|
||||
|
||||
def embedding():
|
||||
|
|
|
@ -179,11 +179,9 @@ def completion(
|
|||
|
||||
model_response["created"] = time.time()
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
}
|
||||
model_response.usage.completion_tokens = completion_tokens
|
||||
model_response.usage.prompt_tokens = prompt_tokens
|
||||
model_response.usage.total_tokens = prompt_tokens + completion_tokens
|
||||
return model_response
|
||||
|
||||
def embedding(
|
||||
|
|
|
@ -345,11 +345,9 @@ def completion(
|
|||
|
||||
model_response["created"] = time.time()
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
}
|
||||
model_response.usage.completion_tokens = completion_tokens
|
||||
model_response.usage.prompt_tokens = prompt_tokens
|
||||
model_response.usage.total_tokens = prompt_tokens + completion_tokens
|
||||
return model_response
|
||||
|
||||
|
||||
|
|
|
@ -171,11 +171,9 @@ def completion(
|
|||
|
||||
model_response["created"] = time.time()
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
}
|
||||
model_response.usage.completion_tokens = completion_tokens
|
||||
model_response.usage.prompt_tokens = prompt_tokens
|
||||
model_response.usage.total_tokens = prompt_tokens + completion_tokens
|
||||
return model_response
|
||||
|
||||
def embedding():
|
||||
|
|
|
@ -111,11 +111,9 @@ def completion(
|
|||
|
||||
model_response["created"] = time.time()
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
}
|
||||
model_response.usage.completion_tokens = completion_tokens
|
||||
model_response.usage.prompt_tokens = prompt_tokens
|
||||
model_response.usage.total_tokens = prompt_tokens + completion_tokens
|
||||
return model_response
|
||||
|
||||
def embedding():
|
||||
|
|
|
@ -240,11 +240,9 @@ def completion(
|
|||
prompt_tokens = len(encoding.encode(prompt))
|
||||
completion_tokens = len(encoding.encode(model_response["choices"][0]["message"].get("content", "")))
|
||||
model_response["model"] = "replicate/" + model
|
||||
model_response["usage"] = {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
}
|
||||
model_response.usage.completion_tokens = completion_tokens
|
||||
model_response.usage.prompt_tokens = prompt_tokens
|
||||
model_response.usage.total_tokens = prompt_tokens + completion_tokens
|
||||
return model_response
|
||||
|
||||
|
||||
|
|
|
@ -175,11 +175,9 @@ def completion(
|
|||
model_response.choices[0].finish_reason = completion_response["output"]["choices"][0]["finish_reason"]
|
||||
model_response["created"] = time.time()
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
}
|
||||
model_response.usage.completion_tokens = completion_tokens
|
||||
model_response.usage.prompt_tokens = prompt_tokens
|
||||
model_response.usage.total_tokens = prompt_tokens + completion_tokens
|
||||
return model_response
|
||||
|
||||
def embedding():
|
||||
|
|
|
@ -90,11 +90,9 @@ def completion(
|
|||
|
||||
model_response["created"] = time.time()
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
}
|
||||
model_response.usage.completion_tokens = completion_tokens
|
||||
model_response.usage.prompt_tokens = prompt_tokens
|
||||
model_response.usage.total_tokens = prompt_tokens + completion_tokens
|
||||
return model_response
|
||||
|
||||
def batch_completions(
|
||||
|
@ -172,11 +170,9 @@ def batch_completions(
|
|||
|
||||
model_response["created"] = time.time()
|
||||
model_response["model"] = model
|
||||
model_response["usage"] = {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
}
|
||||
model_response.usage.completion_tokens = completion_tokens
|
||||
model_response.usage.prompt_tokens = prompt_tokens
|
||||
model_response.usage.total_tokens = prompt_tokens + completion_tokens
|
||||
final_outputs.append(model_response)
|
||||
return final_outputs
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue