fix(openai.py): support completion, streaming, async_streaming

2025-04-27 03:34:10 +00:00 · 2024-07-20 15:23:42 -07:00 · 2024-07-20 15:23:42 -07:00 · a01a45230c
commit a01a45230c
parent 6a7bf0c251
4 changed files with 174 additions and 48 deletions
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@ -803,6 +803,7 @@ class OpenAIChatCompletion(BaseLLM):
        client=None,
        organization: Optional[str] = None,
        custom_llm_provider: Optional[str] = None,
        drop_params: Optional[bool] = None,
    ):
        super().completion()
        exception_mapping_worked = False
@ -858,6 +859,7 @@ class OpenAIChatCompletion(BaseLLM):
                                client=client,
                                max_retries=max_retries,
                                organization=organization,
                                drop_params=drop_params,
                            )
                        else:
                            return self.acompletion(
@ -871,6 +873,7 @@ class OpenAIChatCompletion(BaseLLM):
                                client=client,
                                max_retries=max_retries,
                                organization=organization,
                                drop_params=drop_params,
                            )
                    elif optional_params.get("stream", False):
                        return self.streaming(
@ -925,6 +928,33 @@ class OpenAIChatCompletion(BaseLLM):
                            response_object=stringified_response,
                            model_response_object=model_response,
                        )
                except openai.UnprocessableEntityError as e:
                    ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
                    if litellm.drop_params is True or drop_params is True:
                        if e.body is not None and e.body.get("detail"):  # type: ignore
                            detail = e.body.get("detail")  # type: ignore
                            invalid_params: List[str] = []
                            if (
                                isinstance(detail, List)
                                and len(detail) > 0
                                and isinstance(detail[0], dict)
                            ):
                                for error_dict in detail:
                                    if (
                                        error_dict.get("loc")
                                        and isinstance(error_dict.get("loc"), list)
                                        and len(error_dict.get("loc")) == 2
                                    ):
                                        invalid_params.append(error_dict["loc"][1])
                        new_data = {}
                        for k, v in optional_params.items():
                            if k not in invalid_params:
                                new_data[k] = v
                        optional_params = new_data
                    else:
                        raise e
                    # e.message
                except Exception as e:
                    if print_verbose is not None:
                        print_verbose(f"openai.py: Received openai error - {str(e)}")
@ -982,6 +1012,7 @@ class OpenAIChatCompletion(BaseLLM):
        client=None,
        max_retries=None,
        headers=None,
        drop_params: Optional[bool] = None,
    ):
        response = None
        for _ in range(
@ -1030,7 +1061,7 @@ class OpenAIChatCompletion(BaseLLM):
                )
            except openai.UnprocessableEntityError as e:
                ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
-                if litellm.drop_params is True:
+                if litellm.drop_params is True or drop_params is True:
                    if e.body is not None and e.body.get("detail"):  # type: ignore
                        detail = e.body.get("detail")  # type: ignore
                        invalid_params: List[str] = []
@ -1113,57 +1144,87 @@ class OpenAIChatCompletion(BaseLLM):
        client=None,
        max_retries=None,
        headers=None,
        drop_params: Optional[bool] = None,
    ):
        response = None
-        try:
+        for _ in range(2):
-            openai_aclient = self._get_openai_client(
+            try:
-                is_async=True,
+                openai_aclient = self._get_openai_client(
-                api_key=api_key,
+                    is_async=True,
-                api_base=api_base,
+                    api_key=api_key,
-                timeout=timeout,
+                    api_base=api_base,
-                max_retries=max_retries,
+                    timeout=timeout,
-                organization=organization,
+                    max_retries=max_retries,
-                client=client,
+                    organization=organization,
-            )
+                    client=client,
            ## LOGGING
            logging_obj.pre_call(
                input=data["messages"],
                api_key=api_key,
                additional_args={
                    "headers": headers,
                    "api_base": api_base,
                    "acompletion": True,
                    "complete_input_dict": data,
                },
            )
            headers, response = await self.make_openai_chat_completion_request(
                openai_aclient=openai_aclient, data=data, timeout=timeout
            )
            logging_obj.model_call_details["response_headers"] = headers
            streamwrapper = CustomStreamWrapper(
                completion_stream=response,
                model=model,
                custom_llm_provider="openai",
                logging_obj=logging_obj,
                stream_options=data.get("stream_options", None),
            )
            return streamwrapper
        except (
            Exception
        ) as e:  # need to exception handle here. async exceptions don't get caught in sync functions.
            if response is not None and hasattr(response, "text"):
                raise OpenAIError(
                    status_code=500,
                    message=f"{str(e)}\n\nOriginal Response: {response.text}",
                )
-            else:
+                ## LOGGING
-                if type(e).__name__ == "ReadTimeout":
+                logging_obj.pre_call(
-                    raise OpenAIError(status_code=408, message=f"{type(e).__name__}")
+                    input=data["messages"],
-                elif hasattr(e, "status_code"):
+                    api_key=api_key,
-                    raise OpenAIError(status_code=e.status_code, message=str(e))
+                    additional_args={
                        "headers": headers,
                        "api_base": api_base,
                        "acompletion": True,
                        "complete_input_dict": data,
                    },
                )
                headers, response = await self.make_openai_chat_completion_request(
                    openai_aclient=openai_aclient, data=data, timeout=timeout
                )
                logging_obj.model_call_details["response_headers"] = headers
                streamwrapper = CustomStreamWrapper(
                    completion_stream=response,
                    model=model,
                    custom_llm_provider="openai",
                    logging_obj=logging_obj,
                    stream_options=data.get("stream_options", None),
                )
                return streamwrapper
            except openai.UnprocessableEntityError as e:
                ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
                if litellm.drop_params is True or drop_params is True:
                    if e.body is not None and e.body.get("detail"):  # type: ignore
                        detail = e.body.get("detail")  # type: ignore
                        invalid_params: List[str] = []
                        if (
                            isinstance(detail, List)
                            and len(detail) > 0
                            and isinstance(detail[0], dict)
                        ):
                            for error_dict in detail:
                                if (
                                    error_dict.get("loc")
                                    and isinstance(error_dict.get("loc"), list)
                                    and len(error_dict.get("loc")) == 2
                                ):
                                    invalid_params.append(error_dict["loc"][1])
                    new_data = {}
                    for k, v in data.items():
                        if k not in invalid_params:
                            new_data[k] = v
                    data = new_data
                else:
-                    raise OpenAIError(status_code=500, message=f"{str(e)}")
+                    raise e
            except (
                Exception
            ) as e:  # need to exception handle here. async exceptions don't get caught in sync functions.
                if response is not None and hasattr(response, "text"):
                    raise OpenAIError(
                        status_code=500,
                        message=f"{str(e)}\n\nOriginal Response: {response.text}",
                    )
                else:
                    if type(e).__name__ == "ReadTimeout":
                        raise OpenAIError(
                            status_code=408, message=f"{type(e).__name__}"
                        )
                    elif hasattr(e, "status_code"):
                        raise OpenAIError(status_code=e.status_code, message=str(e))
                    else:
                        raise OpenAIError(status_code=500, message=f"{str(e)}")
    # Embedding
    async def make_openai_embedding_request(
--- a/litellm/main.py
+++ b/litellm/main.py
@ -1176,6 +1176,7 @@ def completion(
                    client=client,  # pass AsyncOpenAI, OpenAI client
                    organization=organization,
                    custom_llm_provider=custom_llm_provider,
                    drop_params=non_default_params.get("drop_params"),
                )
            except Exception as e:
                ## LOGGING - log the original exception returned
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -142,6 +142,36 @@ def test_completion_azure_ai_command_r():
        pytest.fail(f"Error occurred: {e}")
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
 async def test_completion_azure_ai_mistral_invalid_params(sync_mode):
    try:
        import os
        litellm.set_verbose = True
        os.environ["AZURE_AI_API_BASE"] = os.getenv("AZURE_MISTRAL_API_BASE", "")
        os.environ["AZURE_AI_API_KEY"] = os.getenv("AZURE_MISTRAL_API_KEY", "")
        data = {
            "model": "azure_ai/mistral",
            "messages": [{"role": "user", "content": "What is the meaning of life?"}],
            "frequency_penalty": 0.1,
            "presence_penalty": 0.1,
            "drop_params": True,
        }
        if sync_mode:
            response: litellm.ModelResponse = completion(**data)  # type: ignore
        else:
            response: litellm.ModelResponse = await litellm.acompletion(**data)  # type: ignore
        assert "azure_ai" in response.model
    except litellm.Timeout as e:
        pass
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 def test_completion_azure_command_r():
    try:
        litellm.set_verbose = True
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -2881,6 +2881,40 @@ def test_azure_streaming_and_function_calling():
        raise e
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
 async def test_completion_azure_ai_mistral_invalid_params(sync_mode):
    try:
        import os
        litellm.set_verbose = True
        os.environ["AZURE_AI_API_BASE"] = os.getenv("AZURE_MISTRAL_API_BASE", "")
        os.environ["AZURE_AI_API_KEY"] = os.getenv("AZURE_MISTRAL_API_KEY", "")
        data = {
            "model": "azure_ai/mistral",
            "messages": [{"role": "user", "content": "What is the meaning of life?"}],
            "frequency_penalty": 0.1,
            "presence_penalty": 0.1,
            "drop_params": True,
            "stream": True,
        }
        if sync_mode:
            response: litellm.ModelResponse = completion(**data)  # type: ignore
            for chunk in response:
                print(chunk)
        else:
            response: litellm.ModelResponse = await litellm.acompletion(**data)  # type: ignore
            async for chunk in response:
                print(chunk)
    except litellm.Timeout as e:
        pass
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
@pytest.mark.asyncio
 async def test_azure_astreaming_and_function_calling():
    import uuid