fix: fix value error if model returns empty completion

2025-04-27 11:43:54 +00:00 · 2023-10-10 10:11:21 -07:00 · 2023-10-10 10:11:21 -07:00 · cc0e4f4f9f
commit cc0e4f4f9f
parent 9ec4b0902b
21 changed files with 84 additions and 50 deletions
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/llms/ai21.py
+++ b/litellm/llms/ai21.py
@ -161,7 +161,10 @@ def completion(
            try:
                choices_list = []
                for idx, item in enumerate(completion_response["completions"]):
+                    if len(item["data"]["text"]) > 0:
                        message_obj = Message(content=item["data"]["text"])
+                    else: 
+                        message_obj = Message(content=None)
                    choice_obj = Choices(finish_reason=item["finishReason"]["reason"], index=idx+1, message=message_obj)
                    choices_list.append(choice_obj)
                model_response["choices"] = choices_list
@ -173,7 +176,7 @@ def completion(
            encoding.encode(prompt)
        ) 
        completion_tokens = len(
-            encoding.encode(model_response["choices"][0]["message"]["content"])
+            encoding.encode(model_response["choices"][0]["message"].get("content"))
        )

        model_response["created"] = time.time()
--- a/litellm/llms/aleph_alpha.py
+++ b/litellm/llms/aleph_alpha.py
@ -5,7 +5,7 @@ import requests
 import time
 from typing import Callable, Optional
 import litellm
-from litellm.utils import ModelResponse
+from litellm.utils import ModelResponse, Choices, Message

 class AlephAlphaError(Exception):
    def __init__(self, status_code, message):
@ -239,8 +239,15 @@ def completion(
            )
        else:
            try:
-                model_response["choices"][0]["message"]["content"] = completion_response["completions"][0]["completion"]
-                model_response.choices[0].finish_reason = completion_response["completions"][0]["finish_reason"]
+                choices_list = []
+                for idx, item in enumerate(completion_response["completions"]):
+                    if len(item["completion"]) > 0:
+                        message_obj = Message(content=item["completion"])
+                    else: 
+                        message_obj = Message(content=None)
+                    choice_obj = Choices(finish_reason=item["finish_reason"], index=idx+1, message=message_obj)
+                    choices_list.append(choice_obj)
+                model_response["choices"] = choices_list
            except:
                raise AlephAlphaError(message=json.dumps(completion_response), status_code=response.status_code)

--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@ -152,6 +152,7 @@ def completion(
                status_code=response.status_code,
            )
        else:
+            if len(completion_response["completion"]) > 0:
                model_response["choices"][0]["message"]["content"] = completion_response[
                    "completion"
                ]
@ -162,7 +163,7 @@ def completion(
            encoding.encode(prompt)
        )  ##[TODO] use the anthropic tokenizer here
        completion_tokens = len(
-            encoding.encode(model_response["choices"][0]["message"]["content"])
+            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
        )  ##[TODO] use the anthropic tokenizer here

        model_response["created"] = time.time()
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@ -285,8 +285,6 @@ def completion(
    prompt = convert_messages_to_prompt(messages, provider)
    inference_params = copy.deepcopy(optional_params)
    stream = inference_params.pop("stream", False)
-
-    print(f"bedrock provider: {provider}")
    if provider == "anthropic":
        ## LOAD CONFIG
        config = litellm.AmazonAnthropicConfig.get_config() 
@ -384,6 +382,7 @@ def completion(
        )
    else:
        try:
+            if len(outputText) > 0:
                model_response["choices"][0]["message"]["content"] = outputText
        except:
            raise BedrockError(message=json.dumps(outputText), status_code=response.status_code)
@ -393,7 +392,7 @@ def completion(
        encoding.encode(prompt)
    )
    completion_tokens = len(
-        encoding.encode(model_response["choices"][0]["message"]["content"])
+        encoding.encode(model_response["choices"][0]["message"].get("content", ""))
    )

    model_response["created"] = time.time()
--- a/litellm/llms/cohere.py
+++ b/litellm/llms/cohere.py
@ -158,7 +158,10 @@ def completion(
            try:
                choices_list = []
                for idx, item in enumerate(completion_response["generations"]):
+                    if len(item["text"]) > 0:
                        message_obj = Message(content=item["text"])
+                    else: 
+                        message_obj = Message(content=None)
                    choice_obj = Choices(finish_reason=item["finish_reason"], index=idx+1, message=message_obj)
                    choices_list.append(choice_obj)
                model_response["choices"] = choices_list
@ -170,7 +173,7 @@ def completion(
            encoding.encode(prompt)
        ) 
        completion_tokens = len(
-            encoding.encode(model_response["choices"][0]["message"]["content"])
+            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
        )

        model_response["created"] = time.time()
--- a/litellm/llms/huggingface_restapi.py
+++ b/litellm/llms/huggingface_restapi.py
@ -268,10 +268,12 @@ def completion(
            )
        else:
            if task == "conversational": 
+                if len(completion_response["generated_text"]) > 0: 
                    model_response["choices"][0]["message"][
                        "content"
                    ] = completion_response["generated_text"]
            elif task == "text-generation-inference": 
+                if len(completion_response[0]["generated_text"]) > 0: 
                    model_response["choices"][0]["message"][
                        "content"
                    ] = completion_response[0]["generated_text"]   
@ -289,18 +291,24 @@ def completion(
                            sum_logprob = 0
                            for token in item["tokens"]:
                                sum_logprob += token["logprob"]
+                            if len(item["generated_text"]) > 0: 
                                message_obj = Message(content=item["generated_text"], logprobs=sum_logprob)
+                            else: 
+                                message_obj = Message(content=None)
                            choice_obj = Choices(finish_reason=item["finish_reason"], index=idx+1, message=message_obj)
                            choices_list.append(choice_obj)
                        model_response["choices"].extend(choices_list)
            else:
-                model_response["choices"][0]["message"]["content"] = completion_response[0]["generated_text"]
+                if len(completion_response[0]["generated_text"]) > 0: 
+                    model_response["choices"][0]["message"][
+                        "content"
+                    ] = completion_response[0]["generated_text"]   
        ## CALCULATING USAGE
        prompt_tokens = len(
            encoding.encode(input_text)
        )  ##[TODO] use the llama2 tokenizer here
        completion_tokens = len(
-            encoding.encode(model_response["choices"][0]["message"]["content"])
+            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
        )  ##[TODO] use the llama2 tokenizer here

        model_response["created"] = time.time()
--- a/litellm/llms/nlp_cloud.py
+++ b/litellm/llms/nlp_cloud.py
@ -158,6 +158,7 @@ def completion(
            )
        else:
            try:
+                if len(completion_response["generated_text"]) > 0:
                    model_response["choices"][0]["message"]["content"] = completion_response["generated_text"]
            except:
                raise NLPCloudError(message=json.dumps(completion_response), status_code=response.status_code)
--- a/litellm/llms/palm.py
+++ b/litellm/llms/palm.py
@ -88,7 +88,7 @@ def completion(
    
    ## Load Config
    inference_params = copy.deepcopy(optional_params)
-    inference_params.pop("stream") # palm does not support streaming, so we handle this by fake streaming in main.py
+    inference_params.pop("stream", None) # palm does not support streaming, so we handle this by fake streaming in main.py
    config = litellm.PalmConfig.get_config() 
    for k, v in config.items(): 
        if k not in inference_params: # completion(top_k=3) > palm_config(top_k=3) <- allows for dynamic variables to be passed in
@ -136,7 +136,10 @@ def completion(
    try:
        choices_list = []
        for idx, item in enumerate(completion_response.candidates):
+            if len(item["output"]) > 0:
                message_obj = Message(content=item["output"])
+            else:
+                message_obj = Message(content=None)
            choice_obj = Choices(index=idx+1, message=message_obj)
            choices_list.append(choice_obj)
        model_response["choices"] = choices_list
@ -149,7 +152,7 @@ def completion(
        encoding.encode(prompt)
    ) 
    completion_tokens = len(
-        encoding.encode(model_response["choices"][0]["message"]["content"])
+        encoding.encode(model_response["choices"][0]["message"].get("content", ""))
    )

    model_response["created"] = time.time()
--- a/litellm/llms/petals.py
+++ b/litellm/llms/petals.py
@ -163,13 +163,15 @@ def completion(
            )
        ## RESPONSE OBJECT
        output_text = tokenizer.decode(outputs[0])
+    
+    if len(output_text) > 0:
        model_response["choices"][0]["message"]["content"] = output_text

    prompt_tokens = len(
        encoding.encode(prompt)
    ) 
    completion_tokens = len(
-        encoding.encode(model_response["choices"][0]["message"]["content"])
+        encoding.encode(model_response["choices"][0]["message"].get("content"))
    )

    model_response["created"] = time.time()
--- a/litellm/llms/replicate.py
+++ b/litellm/llms/replicate.py
@ -232,11 +232,12 @@ def completion(
            result = " "
        
        ## Building RESPONSE OBJECT
+        if len(result) > 1:
            model_response["choices"][0]["message"]["content"] = result

        # Calculate usage
        prompt_tokens = len(encoding.encode(prompt))
-        completion_tokens = len(encoding.encode(model_response["choices"][0]["message"]["content"]))
+        completion_tokens = len(encoding.encode(model_response["choices"][0]["message"].get("content", "")))
        model_response["model"] = "replicate/" + model
        model_response["usage"] = {
            "prompt_tokens": prompt_tokens,
--- a/litellm/llms/sagemaker.py
+++ b/litellm/llms/sagemaker.py
@ -154,6 +154,7 @@ def completion(
        )
    else:
        try:
+            if len(completion_response[0]["generation"]) > 0: 
                model_response["choices"][0]["message"]["content"] = completion_response[0]["generation"]
        except:
            raise SagemakerError(message=json.dumps(completion_response), status_code=response.status_code)
@ -163,7 +164,7 @@ def completion(
        encoding.encode(prompt)
    ) 
    completion_tokens = len(
-        encoding.encode(model_response["choices"][0]["message"]["content"])
+        encoding.encode(model_response["choices"][0]["message"].get("content", ""))
    )

    model_response["created"] = time.time()
--- a/litellm/llms/together_ai.py
+++ b/litellm/llms/together_ai.py
@ -162,14 +162,14 @@ def completion(
                message=json.dumps(completion_response["output"]), status_code=response.status_code
            )
    
-        completion_text = completion_response["output"]["choices"][0]["text"]
+        if len(completion_response["output"]["choices"][0]["text"]) > 0:
+            model_response["choices"][0]["message"]["content"] = completion_response["output"]["choices"][0]["text"]

-        ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
+        ## CALCULATING USAGE
        prompt_tokens = len(encoding.encode(prompt))
        completion_tokens = len(
-            encoding.encode(completion_text)
+            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
        )
-        model_response["choices"][0]["message"]["content"] = completion_text
        if "finish_reason" in completion_response["output"]["choices"][0]:
            model_response.choices[0].finish_reason = completion_response["output"]["choices"][0]["finish_reason"]
        model_response["created"] = time.time()
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@ -129,6 +129,10 @@ def completion(
    )

    ## RESPONSE OBJECT
+    if len(str(completion_response)) > 0: 
+        model_response["choices"][0]["message"][
+            "content"
+        ] = str(completion_response)
    model_response["choices"][0]["message"]["content"] = str(completion_response)
    model_response["created"] = time.time()
    model_response["model"] = model
@ -137,7 +141,7 @@ def completion(
        encoding.encode(prompt)
    ) 
    completion_tokens = len(
-        encoding.encode(model_response["choices"][0]["message"]["content"])
+        encoding.encode(model_response["choices"][0]["message"].get("content", ""))
    )

    model_response["usage"] = {
--- a/litellm/main.py
+++ b/litellm/main.py
@ -513,7 +513,10 @@ def completion(
            ## RESPONSE OBJECT
            choices_list = []
            for idx, item in enumerate(response["choices"]):
+                if len(item["text"]) > 0: 
                    message_obj = Message(content=item["text"])
+                else: 
+                    message_obj = Message(content=None)
                choice_obj = Choices(finish_reason=item["finish_reason"], index=idx+1, message=message_obj)
                choices_list.append(choice_obj)
            model_response["choices"] = choices_list
--- a/litellm/tests/litellm_results.jsonl
+++ b/litellm/tests/litellm_results.jsonl
@ -1,2 +1,4 @@
 [{"model": "gpt-3.5-turbo-16k", "messages": [{"content": "Please provide a summary of the latest scientific discoveries.", "role": "user"}], "api_key": "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"}, {"id": "chatcmpl-86XzBUa7ISDFCWYFUFc1bfuwoMPbG", "object": "chat.completion", "created": 1696571301, "model": "gpt-3.5-turbo-16k-0613", "choices": [{"index": 0, "message": {"role": "assistant", "content": "I'm sorry, but as an AI language model, my responses are generated based on pre-existing knowledge and do not have real-time access to the latest scientific discoveries. As a result, I cannot provide you with a summary of the most recent scientific findings. However, I am capable of providing general information on a wide range of scientific topics if you have any specific questions or interests."}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 17, "completion_tokens": 76, "total_tokens": 93}}]
 [{"model": "gpt-3.5-turbo-16k", "messages": [{"content": "Please provide a summary of the latest scientific discoveries.", "role": "user"}], "api_key": "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"}, {"id": "chatcmpl-86XzBof0OA5JEEClaK9qGv1gHhAs8", "object": "chat.completion", "created": 1696571301, "model": "gpt-3.5-turbo-16k-0613", "choices": [{"index": 0, "message": {"role": "assistant", "content": "As an AI language model, I can provide you with a general overview of some recent scientific discoveries across various fields. However, please note that the summaries provided may not encompass every single discovery happening since scientific progress is a continuous and constantly evolving process. Here are a few of the notable recent scientific discoveries:\n\n1. Mars Rover Finds Evidence of Ancient Life: NASA's Mars Perseverance rover discovered rocks believed to be volcanic, containing possible signs of ancient microbial life on Mars. The samples collected from a crater could provide crucial insights into the planet's past habitability.\n\n2. Quantum Supremacy Achieved: Google's prototype quantum computer, Sycamore, achieved \"quantum supremacy,\" demonstrating calculations that are impossible for classical computers. This milestone brings a new era of computing possibilities and has the potential to impact cryptography and optimization problems.\n\n3. CRISPR Advancements: CRISPR gene-editing technology continues to make progress with new discoveries. Recent breakthroughs include high-precision gene editing techniques, expansion to new organisms, and potential therapies for genetic diseases.\n\n4. Black Hole Imaging: In 2019, the Event Horizon Telescope captured the first-ever direct image of a black hole, located at the center of the galaxy Messier 87. The image provided valuable insights into the nature and behavior of these enigmatic cosmic objects.\n\n5. Renewable Energy Innovations: Scientists and engineers are continuously working on improving renewable energy technologies. Recent advancements include highly efficient solar panels, next-generation wind turbines, and advancements in energy storage systems.\n\n6. COVID-19 Vaccines: Rapid development and deployment of COVID-19 vaccines emerged as a significant scientific achievement. Multiple vaccines, such as those developed by Pfizer-BioNTech, Moderna, and AstraZeneca, have been authorized for emergency use or full approval, contributing to global efforts in combating the pandemic.\n\n7. Artificial Intelligence Breakthroughs: Researchers continue to push the boundaries of artificial intelligence. Advancements include improved natural language processing models, reinforcement learning algorithms, and the development of AI systems for medical diagnosis and autonomous vehicles.\n\nThese are just a few examples of recent scientific discoveries and breakthroughs. Scientific research is a vast and diverse domain, with discoveries occurring across many fields, often on a daily basis."}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 17, "completion_tokens": 454, "total_tokens": 471}}]
+[{"model": "gpt-3.5-turbo-16k", "messages": [{"content": "Please provide a summary of the latest scientific discoveries.", "role": "user"}], "api_key": "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"}, {"id": "chatcmpl-88APE6LBJebPYs1OkBdzaAiWr4yIc", "object": "chat.completion", "created": 1696957316, "model": "gpt-3.5-turbo-16k-0613", "choices": [{"index": 0, "message": {"role": "assistant", "content": "As an AI language model, I don't have real-time access to current news or the ability to browse the internet. Therefore, I'm unable to provide a summary of the latest scientific discoveries as they are constantly evolving. However, I can provide information on various scientific topics or answer specific questions you may have. If you would like information on a particular scientific discovery or topic, please let me know."}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 17, "completion_tokens": 80, "total_tokens": 97}}]
+[{"model": "gpt-3.5-turbo-16k", "messages": [{"content": "Please provide a summary of the latest scientific discoveries.", "role": "user"}], "api_key": "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"}, {"id": "chatcmpl-88APEz1u9Dq2XDL3BIkfjspXQbP4J", "object": "chat.completion", "created": 1696957316, "model": "gpt-3.5-turbo-16k-0613", "choices": [{"index": 0, "message": {"role": "assistant", "content": "As an AI language model, I don't have access to real-time data or the ability to browse the internet. Hence, I cannot provide you with a summary of the latest scientific discoveries, as it would require up-to-date information. Scientific discoveries are constantly being made across various fields, including physics, chemistry, biology, medicine, and more. To stay updated on the latest scientific discoveries, I recommend referring to reputable scientific journals, news outlets, or websites that specialize in scientific communication and research."}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 17, "completion_tokens": 99, "total_tokens": 116}}]
--- a/litellm/tests/test_bad_params.py
+++ b/litellm/tests/test_bad_params.py
@ -10,6 +10,7 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 import litellm
 from litellm import embedding, completion
+from litellm.utils import Message


 # litellm.set_verbose = True
@ -116,4 +117,3 @@ def test_completion_with_no_provider():
 #     print(f"error occurred: {traceback.format_exc()}")
 #     pass
 # os.environ["OPENAI_API_KEY"] = str(temp_key)  # this passes linting#5
-
--- a/litellm/tests/test_provider_specific_config.py
+++ b/litellm/tests/test_provider_specific_config.py
@ -317,13 +317,11 @@ def aleph_alpha_test_completion():

        assert len(response_2_text) < len(response_1_text)

-        try: 
        response_3 = litellm.completion(model="luminous-base", 
                                                messages=[{ "content": "Hello, how are you?","role": "user"}],
                                                n=2)
-            pytest.fail(f"Error not raised when n=2 passed to provider")
-        except: 
-            pass
+        
+        assert len(response_3.choices) > 1
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -219,7 +219,7 @@ def test_completion_cohere_stream():
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

-# test_completion_cohere_stream()
+test_completion_cohere_stream()

 def test_completion_cohere_stream_bad_key():
    try:
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -111,12 +111,10 @@ def _generate_id(): # private helper function

 def map_finish_reason(finish_reason: str): # openai supports 5 stop sequences - 'stop', 'length', 'function_call', 'content_filter', 'null'
    # anthropic mapping
-    print(f"receives finish reason: {finish_reason}")
    if finish_reason == "stop_sequence":
        return "stop"
    return finish_reason

-
 class Message(OpenAIObject):
    def __init__(self, content="default", role="assistant", logprobs=None, **params):
        super(Message, self).__init__(**params)