mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
map finish reason
This commit is contained in:
parent
1b346e0139
commit
aaa57abddd
10 changed files with 64 additions and 56 deletions
Binary file not shown.
Binary file not shown.
|
@ -90,7 +90,8 @@ def completion(
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
model_response["choices"][0]["message"]["content"] = completion_response["completions"][0]["data"]["text"]
|
model_response["choices"][0]["message"]["content"] = completion_response["completions"][0]["data"]["text"]
|
||||||
except:
|
model_response.choices[0].finish_reason = completion_response["completions"][0]["finishReason"]["reason"]
|
||||||
|
except Exception as e:
|
||||||
raise AI21Error(message=json.dumps(completion_response), status_code=response.status_code)
|
raise AI21Error(message=json.dumps(completion_response), status_code=response.status_code)
|
||||||
|
|
||||||
## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
|
## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
|
||||||
|
|
|
@ -114,6 +114,7 @@ def completion(
|
||||||
model_response["choices"][0]["message"]["content"] = completion_response[
|
model_response["choices"][0]["message"]["content"] = completion_response[
|
||||||
"completion"
|
"completion"
|
||||||
]
|
]
|
||||||
|
model_response.choices[0].finish_reason = completion_response["stop_reason"]
|
||||||
|
|
||||||
## CALCULATING USAGE
|
## CALCULATING USAGE
|
||||||
prompt_tokens = len(
|
prompt_tokens = len(
|
||||||
|
|
|
@ -154,8 +154,9 @@ def completion(
|
||||||
model_response["choices"][0]["message"][
|
model_response["choices"][0]["message"][
|
||||||
"content"
|
"content"
|
||||||
] = completion_response[0]["generated_text"]
|
] = completion_response[0]["generated_text"]
|
||||||
## GETTING LOGPROBS
|
## GETTING LOGPROBS + FINISH REASON
|
||||||
if "details" in completion_response[0] and "tokens" in completion_response[0]["details"]:
|
if "details" in completion_response[0] and "tokens" in completion_response[0]["details"]:
|
||||||
|
model_response.choices[0].finish_reason = completion_response[0]["details"]["finish_reason"]
|
||||||
sum_logprob = 0
|
sum_logprob = 0
|
||||||
for token in completion_response[0]["details"]["tokens"]:
|
for token in completion_response[0]["details"]["tokens"]:
|
||||||
sum_logprob += token["logprob"]
|
sum_logprob += token["logprob"]
|
||||||
|
|
|
@ -104,14 +104,17 @@ def completion(
|
||||||
message=json.dumps(completion_response["output"]), status_code=response.status_code
|
message=json.dumps(completion_response["output"]), status_code=response.status_code
|
||||||
)
|
)
|
||||||
|
|
||||||
completion_response = completion_response["output"]["choices"][0]["text"]
|
print(completion_response)
|
||||||
|
completion_text = completion_response["output"]["choices"][0]["text"]
|
||||||
|
|
||||||
## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
|
## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
|
||||||
prompt_tokens = len(encoding.encode(prompt))
|
prompt_tokens = len(encoding.encode(prompt))
|
||||||
completion_tokens = len(
|
completion_tokens = len(
|
||||||
encoding.encode(completion_response)
|
encoding.encode(completion_text)
|
||||||
)
|
)
|
||||||
model_response["choices"][0]["message"]["content"] = completion_response
|
model_response["choices"][0]["message"]["content"] = completion_text
|
||||||
|
if "finish_reason" in completion_response["output"]["choices"][0]:
|
||||||
|
model_response.choices[0].finish_reason = completion_response["output"]["choices"][0]["finish_reason"]
|
||||||
model_response["created"] = time.time()
|
model_response["created"] = time.time()
|
||||||
model_response["model"] = model
|
model_response["model"] = model
|
||||||
model_response["usage"] = {
|
model_response["usage"] = {
|
||||||
|
|
|
@ -529,6 +529,8 @@ def completion(
|
||||||
completion_tokens = len(encoding.encode(completion_response))
|
completion_tokens = len(encoding.encode(completion_response))
|
||||||
## RESPONSE OBJECT
|
## RESPONSE OBJECT
|
||||||
model_response["choices"][0]["message"]["content"] = completion_response
|
model_response["choices"][0]["message"]["content"] = completion_response
|
||||||
|
if response[0].finish_reason:
|
||||||
|
model_response.choices[0].finish_reason = response[0].finish_reason
|
||||||
model_response["created"] = time.time()
|
model_response["created"] = time.time()
|
||||||
model_response["model"] = model
|
model_response["model"] = model
|
||||||
model_response["usage"] = {
|
model_response["usage"] = {
|
||||||
|
|
|
@ -49,7 +49,7 @@ def test_completion_claude():
|
||||||
print(response)
|
print(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
# test_completion_claude()
|
||||||
# aleph alpha
|
# aleph alpha
|
||||||
# def test_completion_aleph_alpha():
|
# def test_completion_aleph_alpha():
|
||||||
# try:
|
# try:
|
||||||
|
@ -119,8 +119,8 @@ def test_completion_claude_stream():
|
||||||
# try:
|
# try:
|
||||||
# user_message = "write some code to find the sum of two numbers"
|
# user_message = "write some code to find the sum of two numbers"
|
||||||
# messages = [{ "content": user_message,"role": "user"}]
|
# messages = [{ "content": user_message,"role": "user"}]
|
||||||
# api_base = "https://wyh9bqfgj2r1klv5.us-east-1.aws.endpoints.huggingface.cloud"
|
# api_base = "https://ecd4sb5n09bo4ei2.us-east-1.aws.endpoints.huggingface.cloud"
|
||||||
# response = completion(model="facebook/blenderbot-400M-distill", messages=messages, custom_llm_provider="huggingface", task="conversational", api_base=api_base, logger_fn=logger_fn)
|
# response = completion(model="togethercomputer/LLaMA-2-7B-32K", messages=messages, custom_llm_provider="huggingface", api_base=api_base, logger_fn=logger_fn)
|
||||||
# # Add any assertions here to check the response
|
# # Add any assertions here to check the response
|
||||||
# print(response)
|
# print(response)
|
||||||
# except Exception as e:
|
# except Exception as e:
|
||||||
|
@ -141,26 +141,26 @@ def test_completion_claude_stream():
|
||||||
# pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
# def test_completion_cohere(): # commenting for now as the cohere endpoint is being flaky
|
def test_completion_cohere(): # commenting for now as the cohere endpoint is being flaky
|
||||||
# try:
|
try:
|
||||||
# response = completion(
|
response = completion(
|
||||||
# model="command-nightly",
|
model="command-nightly",
|
||||||
# messages=messages,
|
messages=messages,
|
||||||
# max_tokens=100,
|
max_tokens=100,
|
||||||
# logit_bias={40: 10},
|
logit_bias={40: 10},
|
||||||
# )
|
logger_fn=logger_fn
|
||||||
# # Add any assertions here to check the response
|
)
|
||||||
# print(response)
|
# Add any assertions here to check the response
|
||||||
# response_str = response["choices"][0]["message"]["content"]
|
print(response)
|
||||||
# print(f"str response{response_str}")
|
response_str = response["choices"][0]["message"]["content"]
|
||||||
# response_str_2 = response.choices[0].message.content
|
print(f"str response{response_str}")
|
||||||
# if type(response_str) != str:
|
response_str_2 = response.choices[0].message.content
|
||||||
# pytest.fail(f"Error occurred: {e}")
|
if type(response_str) != str:
|
||||||
# if type(response_str_2) != str:
|
pytest.fail(f"Error occurred: {e}")
|
||||||
# pytest.fail(f"Error occurred: {e}")
|
if type(response_str_2) != str:
|
||||||
# except Exception as e:
|
pytest.fail(f"Error occurred: {e}")
|
||||||
# pytest.fail(f"Error occurred: {e}")
|
except Exception as e:
|
||||||
##
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
def test_completion_cohere_stream():
|
def test_completion_cohere_stream():
|
||||||
try:
|
try:
|
||||||
|
@ -750,15 +750,16 @@ def test_completion_with_fallbacks():
|
||||||
|
|
||||||
|
|
||||||
#### Test A121 ###################
|
#### Test A121 ###################
|
||||||
# def test_completion_ai21():
|
def test_completion_ai21():
|
||||||
# model_name = "j2-light"
|
model_name = "j2-light"
|
||||||
# try:
|
try:
|
||||||
# response = completion(model=model_name, messages=messages)
|
response = completion(model=model_name, messages=messages)
|
||||||
# # Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
# print(response)
|
print(response)
|
||||||
# except Exception as e:
|
except Exception as e:
|
||||||
# pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
# test_completion_ai21()
|
||||||
# test config file with completion #
|
# test config file with completion #
|
||||||
# def test_completion_openai_config():
|
# def test_completion_openai_config():
|
||||||
# try:
|
# try:
|
||||||
|
|
|
@ -830,7 +830,23 @@ def get_optional_params( # use the openai defaults
|
||||||
optional_params["top_k"] = top_k
|
optional_params["top_k"] = top_k
|
||||||
if stop != None:
|
if stop != None:
|
||||||
optional_params["stop_sequences"] = stop
|
optional_params["stop_sequences"] = stop
|
||||||
|
elif custom_llm_provider == "huggingface":
|
||||||
|
if temperature != 1:
|
||||||
|
optional_params["temperature"] = temperature
|
||||||
|
if top_p != 1:
|
||||||
|
optional_params["top_p"] = top_p
|
||||||
|
if n != 1:
|
||||||
|
optional_params["n"] = n
|
||||||
|
if stream:
|
||||||
|
optional_params["stream"] = stream
|
||||||
|
if stop != None:
|
||||||
|
optional_params["stop"] = stop
|
||||||
|
if max_tokens != float("inf"):
|
||||||
|
optional_params["max_new_tokens"] = max_tokens
|
||||||
|
if presence_penalty != 0:
|
||||||
|
optional_params["repetition_penalty"] = presence_penalty
|
||||||
|
optional_params["details"] = True
|
||||||
|
optional_params["task"] = task
|
||||||
elif custom_llm_provider == "together_ai" or ("togethercomputer" in model):
|
elif custom_llm_provider == "together_ai" or ("togethercomputer" in model):
|
||||||
if stream:
|
if stream:
|
||||||
optional_params["stream_tokens"] = stream
|
optional_params["stream_tokens"] = stream
|
||||||
|
@ -867,23 +883,6 @@ def get_optional_params( # use the openai defaults
|
||||||
optional_params["num_beams"] = num_beams
|
optional_params["num_beams"] = num_beams
|
||||||
if max_tokens != float("inf"):
|
if max_tokens != float("inf"):
|
||||||
optional_params["max_new_tokens"] = max_tokens
|
optional_params["max_new_tokens"] = max_tokens
|
||||||
elif custom_llm_provider == "huggingface":
|
|
||||||
if temperature != 1:
|
|
||||||
optional_params["temperature"] = temperature
|
|
||||||
if top_p != 1:
|
|
||||||
optional_params["top_p"] = top_p
|
|
||||||
if n != 1:
|
|
||||||
optional_params["n"] = n
|
|
||||||
if stream:
|
|
||||||
optional_params["stream"] = stream
|
|
||||||
if stop != None:
|
|
||||||
optional_params["stop"] = stop
|
|
||||||
if max_tokens != float("inf"):
|
|
||||||
optional_params["max_new_tokens"] = max_tokens
|
|
||||||
if presence_penalty != 0:
|
|
||||||
optional_params["repetition_penalty"] = presence_penalty
|
|
||||||
optional_params["details"] = True
|
|
||||||
optional_params["task"] = task
|
|
||||||
elif custom_llm_provider == "sagemaker":
|
elif custom_llm_provider == "sagemaker":
|
||||||
if "llama-2" in model:
|
if "llama-2" in model:
|
||||||
# llama-2 models on sagemaker support the following args
|
# llama-2 models on sagemaker support the following args
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "0.1.620"
|
version = "0.1.621"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT License"
|
license = "MIT License"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue