refactor(openai.py): moving openai text completion calls to http

2025-04-24 18:24:20 +00:00 · 2023-11-08 18:39:56 -08:00 · 2023-11-08 18:39:56 -08:00 · c2cbdb23fd
commit c2cbdb23fd
parent db0e032d53
6 changed files with 211 additions and 66 deletions
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@ -269,3 +269,132 @@ class OpenAIChatCompletion(BaseLLM):
            else: 
                import traceback
                raise OpenAIError(status_code=500, message=traceback.format_exc())
+
+
+class OpenAITextCompletion(BaseLLM):
+    _client_session: requests.Session
+
+    def __init__(self) -> None:
+        super().__init__()
+        self._client_session = self.create_client_session()
+    
+    def validate_environment(self, api_key):
+        headers = {
+            "content-type": "application/json",
+        }
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+        return headers
+    
+    def convert_to_model_response_object(self, response_object: Optional[dict]=None, model_response_object: Optional[ModelResponse]=None):
+        try: 
+            ## RESPONSE OBJECT
+            if response_object is None or model_response_object is None:
+                raise OpenAIError(status_code=500, message="Error in response object format")
+            choice_list=[]
+            for idx, choice in enumerate(response_object["choices"]): 
+                message = Message(content=choice["text"], role="assistant")
+                choice = Choices(finish_reason=choice["finish_reason"], index=idx, message=message)
+                choice_list.append(choice)
+            model_response_object.choices = choice_list
+
+            if "usage" in response_object: 
+                model_response_object.usage = response_object["usage"]
+            
+            if "id" in response_object: 
+                model_response_object.id = response_object["id"]
+            
+            if "model" in response_object: 
+                model_response_object.model = response_object["model"]
+            
+            model_response_object._hidden_params["original_response"] = response_object # track original response, if users make a litellm.text_completion() request, we can return the original response
+            return model_response_object
+        except: 
+            OpenAIError(status_code=500, message="Invalid response object.")
+
+    def completion(self, 
+               model: Optional[str]=None,
+               messages: Optional[list]=None,
+               model_response: Optional[ModelResponse]=None,
+               print_verbose: Optional[Callable]=None,
+               api_key: Optional[str]=None,
+               api_base: Optional[str]=None,
+               logging_obj=None,
+               optional_params=None,
+               litellm_params=None,
+               logger_fn=None,
+               headers: Optional[dict]=None):
+        super().completion()
+        exception_mapping_worked = False
+        try: 
+            if headers is None:
+                headers = self.validate_environment(api_key=api_key)
+            if model is None or messages is None:
+                raise OpenAIError(status_code=422, message=f"Missing model or messages")
+            
+            api_base = f"{api_base}/completions"
+
+            if len(messages)>0 and "content" in messages[0] and type(messages[0]["content"]) == list: 
+                # Note: internal logic - for enabling litellm.text_completion()
+                # text-davinci-003 can accept a string or array, if it's an array, assume the array is set in messages[0]['content']
+                # https://platform.openai.com/docs/api-reference/completions/create
+                prompt = messages[0]["content"]
+            else:
+                prompt = " ".join([message["content"] for message in messages]) # type: ignore
+
+            data = {
+                "model": model,
+                "prompt": prompt, 
+                **optional_params
+            }
+            
+            ## LOGGING
+            logging_obj.pre_call(
+                input=messages,
+                api_key=api_key,
+                additional_args={"headers": headers, "api_base": api_base, "data": data},
+            )
+
+            if "stream" in optional_params and optional_params["stream"] == True:
+                response = self._client_session.post(
+                    url=f"{api_base}",
+                    json=data,
+                    headers=headers,
+                    stream=optional_params["stream"]
+                )
+                if response.status_code != 200:
+                    raise OpenAIError(status_code=response.status_code, message=response.text)
+                    
+                ## RESPONSE OBJECT
+                return response.iter_lines()
+            else:
+                response = self._client_session.post(
+                    url=f"{api_base}",
+                    json=data,
+                    headers=headers,
+                )
+                if response.status_code != 200:
+                    raise OpenAIError(status_code=response.status_code, message=response.text)
+                
+                ## LOGGING
+                logging_obj.post_call(
+                    input=prompt,
+                    api_key=api_key,
+                    original_response=response,
+                    additional_args={
+                        "headers": headers,
+                        "api_base": api_base,
+                    },
+                )
+
+                ## RESPONSE OBJECT
+                return self.convert_to_model_response_object(response_object=response.json(), model_response_object=model_response)
+        except OpenAIError as e: 
+            exception_mapping_worked = True
+            raise e
+        except Exception as e: 
+            if exception_mapping_worked: 
+                raise e
+            else: 
+                import traceback
+                raise OpenAIError(status_code=500, message=traceback.format_exc())
--- a/litellm/main.py
+++ b/litellm/main.py
@ -49,7 +49,7 @@ from .llms import (
    palm,
    vertex_ai,
    maritalk)
-from .llms.openai import OpenAIChatCompletion
+from .llms.openai import OpenAIChatCompletion, OpenAITextCompletion
 from .llms.azure import AzureChatCompletion
 from .llms.prompt_templates.factory import prompt_factory, custom_prompt, function_call_prompt
 import tiktoken
@ -73,6 +73,7 @@ from litellm.utils import (
 ####### ENVIRONMENT VARIABLES ###################
 dotenv.load_dotenv()  # Loading env variables using dotenv
 openai_chat_completions = OpenAIChatCompletion()
+openai_text_completions = OpenAITextCompletion()
 azure_chat_completions = AzureChatCompletion()
 ####### COMPLETION ENDPOINTS ################

@ -498,14 +499,8 @@ def completion(
            )
        elif (
            custom_llm_provider == "text-completion-openai"
-            or model in litellm.open_ai_text_completion_models
            or "ft:babbage-002" in model
            or "ft:davinci-002" in model  # support for finetuned completion models
-            # NOTE: Do NOT add custom_llm_provider == "openai". 
-            # this will break hosted vllm/proxy calls. 
-            # see: https://docs.litellm.ai/docs/providers/vllm#calling-hosted-vllm-server. 
-            # VLLM expects requests to call openai.ChatCompletion we need those requests to always 
-            # call openai.ChatCompletion
        ):
            # print("calling custom openai provider")
            openai.api_type = "openai"
@ -558,43 +553,22 @@ def completion(
                },
            )
            ## COMPLETION CALL
-            response = openai.Completion.create(
-                model=model, 
-                prompt=prompt,
-                headers=headers,
-                api_key = api_key,
-                api_base=api_base,
-                **optional_params
-            )
-            if "stream" in optional_params and optional_params["stream"] == True:
-                response = CustomStreamWrapper(response, model, custom_llm_provider="text-completion-openai", logging_obj=logging)
-                return response
-            ## LOGGING
-            logging.post_call(
-                input=prompt,
+            model_response = openai_text_completions.completion(
+                model=model,
+                messages=messages,
+                model_response=model_response,
+                print_verbose=print_verbose,
                api_key=api_key,
-                original_response=response,
-                additional_args={
-                    "openai_organization": litellm.organization,
-                    "headers": headers,
-                    "api_base": openai.api_base,
-                    "api_type": openai.api_type,
-                },
+                api_base=api_base,
+                logging_obj=logging,
+                optional_params=optional_params,
+                litellm_params=litellm_params,
+                logger_fn=logger_fn
            )
-            ## RESPONSE OBJECT
-            model_response._hidden_params["original_response"] = response # track original response, if users make a litellm.text_completion() request, we can return the original response
-            choices_list = []
-            for idx, item in enumerate(response["choices"]):
-                if len(item["text"]) > 0: 
-                    message_obj = Message(content=item["text"])
-                else: 
-                    message_obj = Message(content=None)
-                choice_obj = Choices(finish_reason=item["finish_reason"], index=idx+1, message=message_obj)
-                choices_list.append(choice_obj)
-            model_response["choices"] = choices_list
-            model_response["created"] = response.get("created", time.time())
-            model_response["model"] = model
-            model_response["usage"] = response.get("usage", 0)
+            
+            if "stream" in optional_params and optional_params["stream"] == True:
+                response = CustomStreamWrapper(model_response, model, custom_llm_provider="text-completion-openai", logging_obj=logging)
+                return response
            response = model_response
        elif (
            "replicate" in model or 
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -391,11 +391,12 @@ def test_completion_openai():

 def test_completion_text_openai():
    try:
+        litellm.set_verbose = True
        response = completion(model="gpt-3.5-turbo-instruct", messages=messages)
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
-# test_completion_text_openai()
+test_completion_text_openai()

 def test_completion_openai_with_optional_params():
    try:
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@ -62,7 +62,7 @@ def test_context_window_with_fallbacks(model):

 # for model in litellm.models_by_provider["bedrock"]:
 #     test_context_window(model=model)
-# test_context_window(model="gpt-3.5-turbo")
+# test_context_window(model="gpt-3.5-turbo-instruct")
 # test_context_window_with_fallbacks(model="command-nightly")
 # Test 2: InvalidAuth Errors
@pytest.mark.parametrize("model", models)
@ -70,7 +70,7 @@ def invalid_auth(model):  # set the model key to an invalid key, depending on th
    messages = [{"content": "Hello, how are you?", "role": "user"}]
    temporary_key = None
    try:
-        if model == "gpt-3.5-turbo":
+        if model == "gpt-3.5-turbo" or model == "gpt-3.5-turbo-instruct":
            temporary_key = os.environ["OPENAI_API_KEY"]
            os.environ["OPENAI_API_KEY"] = "bad-key"
        elif model == "bedrock/anthropic.claude-v2":
@ -158,7 +158,7 @@ def invalid_auth(model):  # set the model key to an invalid key, depending on th

 # for model in litellm.models_by_provider["bedrock"]:
 #     invalid_auth(model=model)
-# invalid_auth(model="gpt-3.5-turbo")
+# invalid_auth(model="gpt-3.5-turbo-instruct")

 # Test 3: Invalid Request Error 
@pytest.mark.parametrize("model", models)
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -916,7 +916,31 @@ def test_openai_chat_completion_call():
        print(f"error occurred: {traceback.format_exc()}")
        pass

-test_openai_chat_completion_call()
+# test_openai_chat_completion_call()
+
+
+def test_openai_text_completion_call():
+    try:
+        litellm.set_verbose = True
+        response = completion(
+            model="gpt-3.5-turbo-instruct", messages=messages, stream=True
+        )
+        complete_response = ""
+        start_time = time.time()
+        for idx, chunk in enumerate(response):
+            chunk, finished = streaming_format_tests(idx, chunk)
+            complete_response += chunk
+            if finished:
+                break
+            # print(f'complete_chunk: {complete_response}')
+        if complete_response.strip() == "": 
+            raise Exception("Empty response received")
+        print(f"complete response: {complete_response}")
+    except:
+        print(f"error occurred: {traceback.format_exc()}")
+        pass
+
+test_openai_text_completion_call()

 # # test on together ai completion call - starcoder
 def test_together_ai_completion_call_starcoder():
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2890,19 +2890,19 @@ def exception_type(
                exception_type = type(original_exception).__name__
            else:
                exception_type = ""
-            if custom_llm_provider == "openai":
+            if custom_llm_provider == "openai" or custom_llm_provider == "text-completion-openai":
                if "This model's maximum context length is" in error_str:
                    exception_mapping_worked = True
                    raise ContextWindowExceededError(
-                        message=f"AzureException - {original_exception.message}",
-                        llm_provider="azure",
+                        message=f"OpenAIException - {original_exception.message}",
+                        llm_provider="openai",
                        model=model
                    )
                elif "invalid_request_error" in error_str:
                    exception_mapping_worked = True
                    raise InvalidRequestError(
-                        message=f"AzureException - {original_exception.message}",
-                        llm_provider="azure",
+                        message=f"OpenAIException - {original_exception.message}",
+                        llm_provider="openai",
                        model=model
                    )
                elif hasattr(original_exception, "status_code"):
@ -4013,16 +4013,33 @@ class CustomStreamWrapper:
            else:
                return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}

-        except:
+        except Exception as e:
            traceback.print_exc()
-            pass
+            raise e


    def handle_openai_text_completion_chunk(self, chunk):
-        try:
-            return chunk["choices"][0]["text"]
-        except:
-            raise ValueError(f"Unable to parse response. Original response: {chunk}")
+        try: 
+            str_line = chunk.decode("utf-8")  # Convert bytes to string
+            text = "" 
+            is_finished = False
+            finish_reason = None
+            if str_line.startswith("data:"):
+                data_json = json.loads(str_line[5:])
+                print_verbose(f"delta content: {data_json['choices'][0]['text']}")
+                text = data_json["choices"][0].get("text", "") 
+                if data_json["choices"][0].get("finish_reason", None): 
+                    is_finished = True
+                    finish_reason = data_json["choices"][0]["finish_reason"]
+                return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
+            elif "error" in str_line:
+                raise ValueError(f"Unable to parse response. Original response: {str_line}")
+            else:
+                return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
+
+        except Exception as e:
+            traceback.print_exc()
+            raise e

    def handle_baseten_chunk(self, chunk):
        try:
@ -4146,9 +4163,6 @@ class CustomStreamWrapper:
                    completion_obj["content"] = response_obj["text"]
                    if response_obj["is_finished"]: 
                        model_response.choices[0].finish_reason = response_obj["finish_reason"]
-                elif self.custom_llm_provider and self.custom_llm_provider == "text-completion-openai":
-                    chunk = next(self.completion_stream)
-                    completion_obj["content"] = self.handle_openai_text_completion_chunk(chunk)
                elif self.model in litellm.nlp_cloud_models or self.custom_llm_provider == "nlp_cloud":
                    try: 
                        chunk = next(self.completion_stream)
@ -4235,12 +4249,15 @@ class CustomStreamWrapper:
                    print_verbose(f"completion obj content: {completion_obj['content']}")
                    if response_obj["is_finished"]: 
                        model_response.choices[0].finish_reason = response_obj["finish_reason"]
-                else: # openai chat/azure models
+                elif self.custom_llm_provider == "text-completion-openai":
                    chunk = next(self.completion_stream)
-                    model_response = chunk
-                    # LOGGING
-                    threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start()
-                    return model_response
+                    response_obj = self.handle_openai_text_completion_chunk(chunk)
+                    completion_obj["content"] = response_obj["text"]
+                    print_verbose(f"completion obj content: {completion_obj['content']}")
+                    if response_obj["is_finished"]: 
+                        model_response.choices[0].finish_reason = response_obj["finish_reason"]
+                else: # openai chat/azure models
+                    raise Exception("Unmapped Model Error")
                
                model_response.model = self.model
                if len(completion_obj["content"]) > 0: # cannot set content of an OpenAI Object to be an empty string