fix hf tgi best of bug

2023-09-20 20:53:32 -07:00 · 2023-09-20 20:53:32 -07:00 · a8711dc5c2
commit a8711dc5c2
parent 35fda2cd05
2 changed files with 12 additions and 13 deletions
--- a/litellm/llms/huggingface_restapi.py
+++ b/litellm/llms/huggingface_restapi.py
@ -173,18 +173,6 @@ def completion(
                    "content"
                ] = completion_response["generated_text"]
            elif task == "text-generation-inference": 
                if "best_of" in optional_params and optional_params["best_of"] > 1: 
                    if "details" in completion_response[0] and "best_of_sequences" in completion_response[0]["details"]:
                        choices_list = []
                        for idx, item in enumerate(completion_response[0]["details"]["best_of_sequences"]):
                            sum_logprob = 0
                            for token in item["tokens"]:
                                sum_logprob += token["logprob"]
                            message_obj = Message(content=item["generated_text"], logprobs=sum_logprob)
                            choice_obj = Choices(finish_reason=item["finish_reason"], index=idx, message=message_obj)
                            choices_list.append(choice_obj)
                        model_response["choices"] = choices_list
                else: 
                model_response["choices"][0]["message"][
                    "content"
                ] = completion_response[0]["generated_text"]   
@ -195,6 +183,17 @@ def completion(
                    for token in completion_response[0]["details"]["tokens"]:
                        sum_logprob += token["logprob"]
                    model_response["choices"][0]["message"]["logprobs"] = sum_logprob
                if "best_of" in optional_params and optional_params["best_of"] > 1: 
                    if "details" in completion_response[0] and "best_of_sequences" in completion_response[0]["details"]:
                        choices_list = []
                        for idx, item in enumerate(completion_response[0]["details"]["best_of_sequences"]):
                            sum_logprob = 0
                            for token in item["tokens"]:
                                sum_logprob += token["logprob"]
                            message_obj = Message(content=item["generated_text"], logprobs=sum_logprob)
                            choice_obj = Choices(finish_reason=item["finish_reason"], index=idx, message=message_obj)
                            choices_list.append(choice_obj)
                        model_response["choices"].extend(choices_list)
            else:
                model_response["choices"][0]["message"]["content"] = completion_response[0]["generated_text"]
        ## CALCULATING USAGE
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.719"
+version = "0.1.720"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"