map finish reason

This commit is contained in:
Krrish Dholakia 2023-09-13 19:22:38 -07:00
parent 1b346e0139
commit aaa57abddd
10 changed files with 64 additions and 56 deletions

View file

@ -90,7 +90,8 @@ def completion(
else: else:
try: try:
model_response["choices"][0]["message"]["content"] = completion_response["completions"][0]["data"]["text"] model_response["choices"][0]["message"]["content"] = completion_response["completions"][0]["data"]["text"]
except: model_response.choices[0].finish_reason = completion_response["completions"][0]["finishReason"]["reason"]
except Exception as e:
raise AI21Error(message=json.dumps(completion_response), status_code=response.status_code) raise AI21Error(message=json.dumps(completion_response), status_code=response.status_code)
## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here. ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.

View file

@ -114,6 +114,7 @@ def completion(
model_response["choices"][0]["message"]["content"] = completion_response[ model_response["choices"][0]["message"]["content"] = completion_response[
"completion" "completion"
] ]
model_response.choices[0].finish_reason = completion_response["stop_reason"]
## CALCULATING USAGE ## CALCULATING USAGE
prompt_tokens = len( prompt_tokens = len(

View file

@ -153,9 +153,10 @@ def completion(
elif task == "text-generation-inference": elif task == "text-generation-inference":
model_response["choices"][0]["message"][ model_response["choices"][0]["message"][
"content" "content"
] = completion_response[0]["generated_text"] ] = completion_response[0]["generated_text"]
## GETTING LOGPROBS ## GETTING LOGPROBS + FINISH REASON
if "details" in completion_response[0] and "tokens" in completion_response[0]["details"]: if "details" in completion_response[0] and "tokens" in completion_response[0]["details"]:
model_response.choices[0].finish_reason = completion_response[0]["details"]["finish_reason"]
sum_logprob = 0 sum_logprob = 0
for token in completion_response[0]["details"]["tokens"]: for token in completion_response[0]["details"]["tokens"]:
sum_logprob += token["logprob"] sum_logprob += token["logprob"]

View file

@ -104,14 +104,17 @@ def completion(
message=json.dumps(completion_response["output"]), status_code=response.status_code message=json.dumps(completion_response["output"]), status_code=response.status_code
) )
completion_response = completion_response["output"]["choices"][0]["text"] print(completion_response)
completion_text = completion_response["output"]["choices"][0]["text"]
## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here. ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
prompt_tokens = len(encoding.encode(prompt)) prompt_tokens = len(encoding.encode(prompt))
completion_tokens = len( completion_tokens = len(
encoding.encode(completion_response) encoding.encode(completion_text)
) )
model_response["choices"][0]["message"]["content"] = completion_response model_response["choices"][0]["message"]["content"] = completion_text
if "finish_reason" in completion_response["output"]["choices"][0]:
model_response.choices[0].finish_reason = completion_response["output"]["choices"][0]["finish_reason"]
model_response["created"] = time.time() model_response["created"] = time.time()
model_response["model"] = model model_response["model"] = model
model_response["usage"] = { model_response["usage"] = {

View file

@ -529,6 +529,8 @@ def completion(
completion_tokens = len(encoding.encode(completion_response)) completion_tokens = len(encoding.encode(completion_response))
## RESPONSE OBJECT ## RESPONSE OBJECT
model_response["choices"][0]["message"]["content"] = completion_response model_response["choices"][0]["message"]["content"] = completion_response
if response[0].finish_reason:
model_response.choices[0].finish_reason = response[0].finish_reason
model_response["created"] = time.time() model_response["created"] = time.time()
model_response["model"] = model model_response["model"] = model
model_response["usage"] = { model_response["usage"] = {

View file

@ -49,7 +49,7 @@ def test_completion_claude():
print(response) print(response)
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
# test_completion_claude()
# aleph alpha # aleph alpha
# def test_completion_aleph_alpha(): # def test_completion_aleph_alpha():
# try: # try:
@ -119,8 +119,8 @@ def test_completion_claude_stream():
# try: # try:
# user_message = "write some code to find the sum of two numbers" # user_message = "write some code to find the sum of two numbers"
# messages = [{ "content": user_message,"role": "user"}] # messages = [{ "content": user_message,"role": "user"}]
# api_base = "https://wyh9bqfgj2r1klv5.us-east-1.aws.endpoints.huggingface.cloud" # api_base = "https://ecd4sb5n09bo4ei2.us-east-1.aws.endpoints.huggingface.cloud"
# response = completion(model="facebook/blenderbot-400M-distill", messages=messages, custom_llm_provider="huggingface", task="conversational", api_base=api_base, logger_fn=logger_fn) # response = completion(model="togethercomputer/LLaMA-2-7B-32K", messages=messages, custom_llm_provider="huggingface", api_base=api_base, logger_fn=logger_fn)
# # Add any assertions here to check the response # # Add any assertions here to check the response
# print(response) # print(response)
# except Exception as e: # except Exception as e:
@ -141,26 +141,26 @@ def test_completion_claude_stream():
# pytest.fail(f"Error occurred: {e}") # pytest.fail(f"Error occurred: {e}")
# def test_completion_cohere(): # commenting for now as the cohere endpoint is being flaky def test_completion_cohere(): # commenting for now as the cohere endpoint is being flaky
# try: try:
# response = completion( response = completion(
# model="command-nightly", model="command-nightly",
# messages=messages, messages=messages,
# max_tokens=100, max_tokens=100,
# logit_bias={40: 10}, logit_bias={40: 10},
# ) logger_fn=logger_fn
# # Add any assertions here to check the response )
# print(response) # Add any assertions here to check the response
# response_str = response["choices"][0]["message"]["content"] print(response)
# print(f"str response{response_str}") response_str = response["choices"][0]["message"]["content"]
# response_str_2 = response.choices[0].message.content print(f"str response{response_str}")
# if type(response_str) != str: response_str_2 = response.choices[0].message.content
# pytest.fail(f"Error occurred: {e}") if type(response_str) != str:
# if type(response_str_2) != str: pytest.fail(f"Error occurred: {e}")
# pytest.fail(f"Error occurred: {e}") if type(response_str_2) != str:
# except Exception as e: pytest.fail(f"Error occurred: {e}")
# pytest.fail(f"Error occurred: {e}") except Exception as e:
## pytest.fail(f"Error occurred: {e}")
def test_completion_cohere_stream(): def test_completion_cohere_stream():
try: try:
@ -750,15 +750,16 @@ def test_completion_with_fallbacks():
#### Test A121 ################### #### Test A121 ###################
# def test_completion_ai21(): def test_completion_ai21():
# model_name = "j2-light" model_name = "j2-light"
# try: try:
# response = completion(model=model_name, messages=messages) response = completion(model=model_name, messages=messages)
# # Add any assertions here to check the response # Add any assertions here to check the response
# print(response) print(response)
# except Exception as e: except Exception as e:
# pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
# test_completion_ai21()
# test config file with completion # # test config file with completion #
# def test_completion_openai_config(): # def test_completion_openai_config():
# try: # try:

View file

@ -830,7 +830,23 @@ def get_optional_params( # use the openai defaults
optional_params["top_k"] = top_k optional_params["top_k"] = top_k
if stop != None: if stop != None:
optional_params["stop_sequences"] = stop optional_params["stop_sequences"] = stop
elif custom_llm_provider == "huggingface":
if temperature != 1:
optional_params["temperature"] = temperature
if top_p != 1:
optional_params["top_p"] = top_p
if n != 1:
optional_params["n"] = n
if stream:
optional_params["stream"] = stream
if stop != None:
optional_params["stop"] = stop
if max_tokens != float("inf"):
optional_params["max_new_tokens"] = max_tokens
if presence_penalty != 0:
optional_params["repetition_penalty"] = presence_penalty
optional_params["details"] = True
optional_params["task"] = task
elif custom_llm_provider == "together_ai" or ("togethercomputer" in model): elif custom_llm_provider == "together_ai" or ("togethercomputer" in model):
if stream: if stream:
optional_params["stream_tokens"] = stream optional_params["stream_tokens"] = stream
@ -867,23 +883,6 @@ def get_optional_params( # use the openai defaults
optional_params["num_beams"] = num_beams optional_params["num_beams"] = num_beams
if max_tokens != float("inf"): if max_tokens != float("inf"):
optional_params["max_new_tokens"] = max_tokens optional_params["max_new_tokens"] = max_tokens
elif custom_llm_provider == "huggingface":
if temperature != 1:
optional_params["temperature"] = temperature
if top_p != 1:
optional_params["top_p"] = top_p
if n != 1:
optional_params["n"] = n
if stream:
optional_params["stream"] = stream
if stop != None:
optional_params["stop"] = stop
if max_tokens != float("inf"):
optional_params["max_new_tokens"] = max_tokens
if presence_penalty != 0:
optional_params["repetition_penalty"] = presence_penalty
optional_params["details"] = True
optional_params["task"] = task
elif custom_llm_provider == "sagemaker": elif custom_llm_provider == "sagemaker":
if "llama-2" in model: if "llama-2" in model:
# llama-2 models on sagemaker support the following args # llama-2 models on sagemaker support the following args

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "0.1.620" version = "0.1.621"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT License" license = "MIT License"