(feat) use usage class for model responses for cohere, hf, tg ai, cohere

This commit is contained in:
ishaan-jaff 2023-10-27 09:58:47 -07:00
parent 194f85e92f
commit 63928fa166
9 changed files with 30 additions and 50 deletions

View file

@ -262,11 +262,9 @@ def completion(
model_response["created"] = time.time() model_response["created"] = time.time()
model_response["model"] = model model_response["model"] = model
model_response["usage"] = { model_response.usage.completion_tokens = completion_tokens
"prompt_tokens": prompt_tokens, model_response.usage.prompt_tokens = prompt_tokens
"completion_tokens": completion_tokens, model_response.usage.total_tokens = prompt_tokens + completion_tokens
"total_tokens": prompt_tokens + completion_tokens,
}
return model_response return model_response
def embedding(): def embedding():

View file

@ -136,11 +136,9 @@ def completion(
model_response["created"] = time.time() model_response["created"] = time.time()
model_response["model"] = model model_response["model"] = model
model_response["usage"] = { model_response.usage.completion_tokens = completion_tokens
"prompt_tokens": prompt_tokens, model_response.usage.prompt_tokens = prompt_tokens
"completion_tokens": completion_tokens, model_response.usage.total_tokens = prompt_tokens + completion_tokens
"total_tokens": prompt_tokens + completion_tokens,
}
return model_response return model_response
def embedding(): def embedding():

View file

@ -179,11 +179,9 @@ def completion(
model_response["created"] = time.time() model_response["created"] = time.time()
model_response["model"] = model model_response["model"] = model
model_response["usage"] = { model_response.usage.completion_tokens = completion_tokens
"prompt_tokens": prompt_tokens, model_response.usage.prompt_tokens = prompt_tokens
"completion_tokens": completion_tokens, model_response.usage.total_tokens = prompt_tokens + completion_tokens
"total_tokens": prompt_tokens + completion_tokens,
}
return model_response return model_response
def embedding( def embedding(

View file

@ -345,11 +345,9 @@ def completion(
model_response["created"] = time.time() model_response["created"] = time.time()
model_response["model"] = model model_response["model"] = model
model_response["usage"] = { model_response.usage.completion_tokens = completion_tokens
"prompt_tokens": prompt_tokens, model_response.usage.prompt_tokens = prompt_tokens
"completion_tokens": completion_tokens, model_response.usage.total_tokens = prompt_tokens + completion_tokens
"total_tokens": prompt_tokens + completion_tokens,
}
return model_response return model_response

View file

@ -171,11 +171,9 @@ def completion(
model_response["created"] = time.time() model_response["created"] = time.time()
model_response["model"] = model model_response["model"] = model
model_response["usage"] = { model_response.usage.completion_tokens = completion_tokens
"prompt_tokens": prompt_tokens, model_response.usage.prompt_tokens = prompt_tokens
"completion_tokens": completion_tokens, model_response.usage.total_tokens = prompt_tokens + completion_tokens
"total_tokens": prompt_tokens + completion_tokens,
}
return model_response return model_response
def embedding(): def embedding():

View file

@ -111,11 +111,9 @@ def completion(
model_response["created"] = time.time() model_response["created"] = time.time()
model_response["model"] = model model_response["model"] = model
model_response["usage"] = { model_response.usage.completion_tokens = completion_tokens
"prompt_tokens": prompt_tokens, model_response.usage.prompt_tokens = prompt_tokens
"completion_tokens": completion_tokens, model_response.usage.total_tokens = prompt_tokens + completion_tokens
"total_tokens": prompt_tokens + completion_tokens,
}
return model_response return model_response
def embedding(): def embedding():

View file

@ -240,11 +240,9 @@ def completion(
prompt_tokens = len(encoding.encode(prompt)) prompt_tokens = len(encoding.encode(prompt))
completion_tokens = len(encoding.encode(model_response["choices"][0]["message"].get("content", ""))) completion_tokens = len(encoding.encode(model_response["choices"][0]["message"].get("content", "")))
model_response["model"] = "replicate/" + model model_response["model"] = "replicate/" + model
model_response["usage"] = { model_response.usage.completion_tokens = completion_tokens
"prompt_tokens": prompt_tokens, model_response.usage.prompt_tokens = prompt_tokens
"completion_tokens": completion_tokens, model_response.usage.total_tokens = prompt_tokens + completion_tokens
"total_tokens": prompt_tokens + completion_tokens,
}
return model_response return model_response

View file

@ -175,11 +175,9 @@ def completion(
model_response.choices[0].finish_reason = completion_response["output"]["choices"][0]["finish_reason"] model_response.choices[0].finish_reason = completion_response["output"]["choices"][0]["finish_reason"]
model_response["created"] = time.time() model_response["created"] = time.time()
model_response["model"] = model model_response["model"] = model
model_response["usage"] = { model_response.usage.completion_tokens = completion_tokens
"prompt_tokens": prompt_tokens, model_response.usage.prompt_tokens = prompt_tokens
"completion_tokens": completion_tokens, model_response.usage.total_tokens = prompt_tokens + completion_tokens
"total_tokens": prompt_tokens + completion_tokens,
}
return model_response return model_response
def embedding(): def embedding():

View file

@ -90,11 +90,9 @@ def completion(
model_response["created"] = time.time() model_response["created"] = time.time()
model_response["model"] = model model_response["model"] = model
model_response["usage"] = { model_response.usage.completion_tokens = completion_tokens
"prompt_tokens": prompt_tokens, model_response.usage.prompt_tokens = prompt_tokens
"completion_tokens": completion_tokens, model_response.usage.total_tokens = prompt_tokens + completion_tokens
"total_tokens": prompt_tokens + completion_tokens,
}
return model_response return model_response
def batch_completions( def batch_completions(
@ -172,11 +170,9 @@ def batch_completions(
model_response["created"] = time.time() model_response["created"] = time.time()
model_response["model"] = model model_response["model"] = model
model_response["usage"] = { model_response.usage.completion_tokens = completion_tokens
"prompt_tokens": prompt_tokens, model_response.usage.prompt_tokens = prompt_tokens
"completion_tokens": completion_tokens, model_response.usage.total_tokens = prompt_tokens + completion_tokens
"total_tokens": prompt_tokens + completion_tokens,
}
final_outputs.append(model_response) final_outputs.append(model_response)
return final_outputs return final_outputs