Merge branch 'main' into litellm_return-response_headers

2024-07-20 19:05:56 -07:00 · 2024-07-20 19:05:56 -07:00 · f6225623e9
commit f6225623e9
parent 82764d2cec 36cb63cb82
37 changed files with 570 additions and 89 deletions
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@ -831,6 +831,7 @@ class OpenAIChatCompletion(BaseLLM):
        client=None,
        organization: Optional[str] = None,
        custom_llm_provider: Optional[str] = None,
+        drop_params: Optional[bool] = None,
    ):
        super().completion()
        exception_mapping_worked = False
@ -886,6 +887,7 @@ class OpenAIChatCompletion(BaseLLM):
                                client=client,
                                max_retries=max_retries,
                                organization=organization,
+                                drop_params=drop_params,
                            )
                        else:
                            return self.acompletion(
@ -899,6 +901,7 @@ class OpenAIChatCompletion(BaseLLM):
                                client=client,
                                max_retries=max_retries,
                                organization=organization,
+                                drop_params=drop_params,
                            )
                    elif optional_params.get("stream", False):
                        return self.streaming(
@ -962,6 +965,33 @@ class OpenAIChatCompletion(BaseLLM):
                            model_response_object=model_response,
                            _response_headers=headers,
                        )
+                except openai.UnprocessableEntityError as e:
+                    ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
+                    if litellm.drop_params is True or drop_params is True:
+                        if e.body is not None and e.body.get("detail"):  # type: ignore
+                            detail = e.body.get("detail")  # type: ignore
+                            invalid_params: List[str] = []
+                            if (
+                                isinstance(detail, List)
+                                and len(detail) > 0
+                                and isinstance(detail[0], dict)
+                            ):
+                                for error_dict in detail:
+                                    if (
+                                        error_dict.get("loc")
+                                        and isinstance(error_dict.get("loc"), list)
+                                        and len(error_dict.get("loc")) == 2
+                                    ):
+                                        invalid_params.append(error_dict["loc"][1])
+
+                        new_data = {}
+                        for k, v in optional_params.items():
+                            if k not in invalid_params:
+                                new_data[k] = v
+                        optional_params = new_data
+                    else:
+                        raise e
+                    # e.message
                except Exception as e:
                    if print_verbose is not None:
                        print_verbose(f"openai.py: Received openai error - {str(e)}")
@ -1019,30 +1049,36 @@ class OpenAIChatCompletion(BaseLLM):
        client=None,
        max_retries=None,
        headers=None,
+        drop_params: Optional[bool] = None,
    ):
        response = None
-        try:
-            openai_aclient = self._get_openai_client(
-                is_async=True,
-                api_key=api_key,
-                api_base=api_base,
-                timeout=timeout,
-                max_retries=max_retries,
-                organization=organization,
-                client=client,
-            )
+        for _ in range(
+            2
+        ):  # if call fails due to alternating messages, retry with reformatted message
+            try:
+                openai_aclient = self._get_openai_client(
+                    is_async=True,
+                    api_key=api_key,
+                    api_base=api_base,
+                    timeout=timeout,
+                    max_retries=max_retries,
+                    organization=organization,
+                    client=client,
+                )

-            ## LOGGING
-            logging_obj.pre_call(
-                input=data["messages"],
-                api_key=openai_aclient.api_key,
-                additional_args={
-                    "headers": {"Authorization": f"Bearer {openai_aclient.api_key}"},
-                    "api_base": openai_aclient._base_url._uri_reference,
-                    "acompletion": True,
-                    "complete_input_dict": data,
-                },
-            )
+                ## LOGGING
+                logging_obj.pre_call(
+                    input=data["messages"],
+                    api_key=openai_aclient.api_key,
+                    additional_args={
+                        "headers": {
+                            "Authorization": f"Bearer {openai_aclient.api_key}"
+                        },
+                        "api_base": openai_aclient._base_url._uri_reference,
+                        "acompletion": True,
+                        "complete_input_dict": data,
+                    },
+                )

            headers, response = await self.make_openai_chat_completion_request(
                openai_aclient=openai_aclient, data=data, timeout=timeout
@ -1063,6 +1099,51 @@ class OpenAIChatCompletion(BaseLLM):
            )
        except Exception as e:
            raise e
+                headers, response = await self.make_openai_chat_completion_request(
+                    openai_aclient=openai_aclient, data=data, timeout=timeout
+                )
+                stringified_response = response.model_dump()
+                logging_obj.post_call(
+                    input=data["messages"],
+                    api_key=api_key,
+                    original_response=stringified_response,
+                    additional_args={"complete_input_dict": data},
+                )
+                logging_obj.model_call_details["response_headers"] = headers
+                return convert_to_model_response_object(
+                    response_object=stringified_response,
+                    model_response_object=model_response,
+                    hidden_params={"headers": headers},
+                )
+            except openai.UnprocessableEntityError as e:
+                ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
+                if litellm.drop_params is True or drop_params is True:
+                    if e.body is not None and e.body.get("detail"):  # type: ignore
+                        detail = e.body.get("detail")  # type: ignore
+                        invalid_params: List[str] = []
+                        if (
+                            isinstance(detail, List)
+                            and len(detail) > 0
+                            and isinstance(detail[0], dict)
+                        ):
+                            for error_dict in detail:
+                                if (
+                                    error_dict.get("loc")
+                                    and isinstance(error_dict.get("loc"), list)
+                                    and len(error_dict.get("loc")) == 2
+                                ):
+                                    invalid_params.append(error_dict["loc"][1])
+
+                    new_data = {}
+                    for k, v in data.items():
+                        if k not in invalid_params:
+                            new_data[k] = v
+                    data = new_data
+                else:
+                    raise e
+                # e.message
+            except Exception as e:
+                raise e

    def streaming(
        self,
@ -1126,29 +1207,31 @@ class OpenAIChatCompletion(BaseLLM):
        client=None,
        max_retries=None,
        headers=None,
+        drop_params: Optional[bool] = None,
    ):
        response = None
-        try:
-            openai_aclient = self._get_openai_client(
-                is_async=True,
-                api_key=api_key,
-                api_base=api_base,
-                timeout=timeout,
-                max_retries=max_retries,
-                organization=organization,
-                client=client,
-            )
-            ## LOGGING
-            logging_obj.pre_call(
-                input=data["messages"],
-                api_key=api_key,
-                additional_args={
-                    "headers": headers,
-                    "api_base": api_base,
-                    "acompletion": True,
-                    "complete_input_dict": data,
-                },
-            )
+        for _ in range(2):
+            try:
+                openai_aclient = self._get_openai_client(
+                    is_async=True,
+                    api_key=api_key,
+                    api_base=api_base,
+                    timeout=timeout,
+                    max_retries=max_retries,
+                    organization=organization,
+                    client=client,
+                )
+                ## LOGGING
+                logging_obj.pre_call(
+                    input=data["messages"],
+                    api_key=api_key,
+                    additional_args={
+                        "headers": headers,
+                        "api_base": api_base,
+                        "acompletion": True,
+                        "complete_input_dict": data,
+                    },
+                )

            headers, response = await self.make_openai_chat_completion_request(
                openai_aclient=openai_aclient, data=data, timeout=timeout
@ -1170,14 +1253,63 @@ class OpenAIChatCompletion(BaseLLM):
                raise OpenAIError(
                    status_code=500,
                    message=f"{str(e)}\n\nOriginal Response: {response.text}",
+
+                headers, response = await self.make_openai_chat_completion_request(
+                    openai_aclient=openai_aclient, data=data, timeout=timeout
+
                )
-            else:
-                if type(e).__name__ == "ReadTimeout":
-                    raise OpenAIError(status_code=408, message=f"{type(e).__name__}")
-                elif hasattr(e, "status_code"):
-                    raise OpenAIError(status_code=e.status_code, message=str(e))
+                logging_obj.model_call_details["response_headers"] = headers
+                streamwrapper = CustomStreamWrapper(
+                    completion_stream=response,
+                    model=model,
+                    custom_llm_provider="openai",
+                    logging_obj=logging_obj,
+                    stream_options=data.get("stream_options", None),
+                )
+                return streamwrapper
+            except openai.UnprocessableEntityError as e:
+                ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
+                if litellm.drop_params is True or drop_params is True:
+                    if e.body is not None and e.body.get("detail"):  # type: ignore
+                        detail = e.body.get("detail")  # type: ignore
+                        invalid_params: List[str] = []
+                        if (
+                            isinstance(detail, List)
+                            and len(detail) > 0
+                            and isinstance(detail[0], dict)
+                        ):
+                            for error_dict in detail:
+                                if (
+                                    error_dict.get("loc")
+                                    and isinstance(error_dict.get("loc"), list)
+                                    and len(error_dict.get("loc")) == 2
+                                ):
+                                    invalid_params.append(error_dict["loc"][1])
+
+                    new_data = {}
+                    for k, v in data.items():
+                        if k not in invalid_params:
+                            new_data[k] = v
+                    data = new_data
                else:
-                    raise OpenAIError(status_code=500, message=f"{str(e)}")
+                    raise e
+            except (
+                Exception
+            ) as e:  # need to exception handle here. async exceptions don't get caught in sync functions.
+                if response is not None and hasattr(response, "text"):
+                    raise OpenAIError(
+                        status_code=500,
+                        message=f"{str(e)}\n\nOriginal Response: {response.text}",
+                    )
+                else:
+                    if type(e).__name__ == "ReadTimeout":
+                        raise OpenAIError(
+                            status_code=408, message=f"{type(e).__name__}"
+                        )
+                    elif hasattr(e, "status_code"):
+                        raise OpenAIError(status_code=e.status_code, message=str(e))
+                    else:
+                        raise OpenAIError(status_code=500, message=f"{str(e)}")

    # Embedding
    async def make_openai_embedding_request(