fix(openai.p): adding support for exception mapping for openai-compatible apis via http calls

2025-04-27 03:34:10 +00:00 · 2023-10-13 21:56:43 -07:00 · 2023-10-13 21:56:43 -07:00 · ec5e7aa4a9
commit ec5e7aa4a9
parent b455bdfff1
8 changed files with 4943 additions and 32 deletions
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@ -246,30 +246,53 @@ class OpenAIChatCompletion(BaseLLM):
               logger_fn=None):
        super().completion()
        headers = self.validate_environment(api_key=api_key)
-        data = {
+
-            "messages": messages, 
+        for _ in range(2): # if call fails due to alternating messages, retry with reformatted message
-            **optional_params
+            data = {
-        }
+                "model": model,
-        if "stream" in optional_params and optional_params["stream"] == True:
+                "messages": messages, 
-            response = self._client_session.post(
+                **optional_params
-                url=f"{api_base}/chat/completions",
+            }
-                json=data,
+            try: 
-                headers=headers,
+                if "stream" in optional_params and optional_params["stream"] == True:
-                stream=optional_params["stream"]
+                    response = self._client_session.post(
-            )
+                        url=f"{api_base}/chat/completions",
-            if response.status_code != 200:
+                        json=data,
-                raise CustomOpenAIError(status_code=response.status_code, message=response.text)
+                        headers=headers,
-                
+                        stream=optional_params["stream"]
-            ## RESPONSE OBJECT
+                    )
-            return response.iter_lines()
+                    if response.status_code != 200:
-        else:
+                        raise CustomOpenAIError(status_code=response.status_code, message=response.text)
-            response = self._client_session.post(
+                        
-                url=f"{api_base}/chat/completions",
+                    ## RESPONSE OBJECT
-                json=data,
+                    return response.iter_lines()
-                headers=headers,
+                else:
-            )
+                    response = self._client_session.post(
-            if response.status_code != 200:
+                        url=f"{api_base}/chat/completions",
-                raise CustomOpenAIError(status_code=response.status_code, message=response.text)
+                        json=data,
-                
+                        headers=headers,
-            ## RESPONSE OBJECT
+                    )
-            return self.convert_to_model_response_object(response_object=response.json(), model_response_object=model_response)
+                    if response.status_code != 200:
                        raise CustomOpenAIError(status_code=response.status_code, message=response.text)
                    ## RESPONSE OBJECT
                    return self.convert_to_model_response_object(response_object=response.json(), model_response_object=model_response)
            except Exception as e:
                if "Conversation roles must alternate user/assistant" in str(e) or "user and assistant roles should be alternating" in str(e): 
                    # reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, add a blank 'user' or 'assistant' message to ensure compatibility
                    new_messages = []
                    for i in range(len(messages)-1): 
                        new_messages.append(messages[i])
                        if messages[i]["role"] == messages[i+1]["role"]:
                            if messages[i]["role"] == "user":
                                new_messages.append({"role": "assistant", "content": ""})
                            else:
                                new_messages.append({"role": "user", "content": ""})
                    new_messages.append(messages[-1])
                    messages = new_messages
                elif "Last message must have role `user`" in str(e):
                    new_messages = messages
                    new_messages.append({"role": "user", "content": ""})
                    messages = new_messages
                else:
                    raise e
--- a/litellm/main.py
+++ b/litellm/main.py
@ -445,7 +445,7 @@ def completion(
                raise e
            if "stream" in optional_params and optional_params["stream"] == True:
-                response = CustomStreamWrapper(response, model, custom_llm_provider="openai", logging_obj=logging)
+                response = CustomStreamWrapper(response, model, custom_llm_provider=custom_llm_provider, logging_obj=logging)
                return response
            ## LOGGING
            logging.post_call(
--- a/litellm/proxy/api_log.json
+++ b/litellm/proxy/api_log.json
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -332,7 +332,6 @@ def logger(
    end_time=None    # start/end time
 ):
  log_event_type = kwargs['log_event_type']
  print(f"REACHES LOGGER: {log_event_type}")
  try: 
    if log_event_type == 'pre_api_call':
        inference_params = copy.deepcopy(kwargs)
@ -355,7 +354,6 @@ def logger(
        with open(log_file, 'w') as f:
            json.dump(existing_data, f, indent=2)
    elif log_event_type == 'post_api_call':
        print(f"post api call kwargs: {kwargs}")
        if "stream" not in kwargs["optional_params"] or kwargs["optional_params"]["stream"] is False or kwargs.get("complete_streaming_response", False):
            inference_params = copy.deepcopy(kwargs)
            timestamp = inference_params.pop('start_time')
@ -438,7 +436,6 @@ async def completion(request: Request):
@router.post("/chat/completions")
 async def chat_completion(request: Request):
    data = await request.json()
    print(f"data passed in: {data}")
    response = litellm_completion(data, type="chat_completion")
    return response
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -108,6 +108,28 @@ def test_completion_with_litellm_call_id():
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 def test_completion_perplexity_api():
    try:
        litellm.set_verbose=True
        messages=[{
            "role": "system", 
            "content": "You're a good bot"
        },{
            "role": "user", 
            "content": "Hey", 
        },{
            "role": "user", 
            "content": "Hey", 
        }]
        response = completion(
            model="mistral-7b-instruct", 
            messages=messages,
            api_base="https://api.perplexity.ai")
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 test_completion_perplexity_api()
 # commenting out as this is a flaky test on circle ci
 # def test_completion_nlp_cloud():
 #     try:
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1419,7 +1419,9 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_
        if api_base: 
            for endpoint in litellm.openai_compatible_endpoints:
                if endpoint in api_base:
-                    custom_llm_provider = "openai"
+                    custom_llm_provider = "custom_openai"
                    if endpoint == "api.perplexity.ai": 
                        litellm.api_key = os.getenv("PERPLEXITYAI_API_KEY")
                    return model, custom_llm_provider
        # check if model in known model provider list  -> for huggingface models, raise exception as they don't have a fixed provider (can be togetherai, anyscale, baseten, runpod, et.)
@ -2936,6 +2938,45 @@ def exception_type(
            elif custom_llm_provider == "ollama":
                if "no attribute 'async_get_ollama_response_stream" in error_str:
                    raise ImportError("Import error - trying to use async for ollama. import async_generator failed. Try 'pip install async_generator'")
            elif custom_llm_provider == "custom_openai":
                if hasattr(original_exception, "status_code"):
                    exception_mapping_worked = True
                    if original_exception.status_code == 401:
                        exception_mapping_worked = True
                        raise AuthenticationError(
                            message=f"CustomOpenAIException - {original_exception.message}",
                            llm_provider="custom_openai",
                            model=model
                        )
                    elif original_exception.status_code == 408:
                        exception_mapping_worked = True
                        raise Timeout(
                            message=f"CustomOpenAIException - {original_exception.message}",
                            model=model,
                            llm_provider="custom_openai"
                        )
                    if original_exception.status_code == 422:
                        exception_mapping_worked = True
                        raise InvalidRequestError(
                            message=f"CustomOpenAIException - {original_exception.message}",
                            model=model,
                            llm_provider="custom_openai",
                        )
                    elif original_exception.status_code == 429:
                        exception_mapping_worked = True
                        raise RateLimitError(
                            message=f"CustomOpenAIException - {original_exception.message}",
                            model=model,
                            llm_provider="custom_openai",
                        )
                    else:
                        exception_mapping_worked = True
                        raise APIError(
                            status_code=original_exception.status_code, 
                            message=f"CustomOpenAIException - {original_exception.message}",
                            llm_provider="custom_openai",
                            model=model
                        )
        exception_mapping_worked = True
        raise APIError(status_code=500, message=str(original_exception), llm_provider=custom_llm_provider, model=model)
    except Exception as e:
@ -3205,6 +3246,30 @@ class CustomStreamWrapper:
        except:
            raise ValueError(f"Unable to parse response. Original response: {chunk}")
    def handle_custom_openai_chat_completion_chunk(self, chunk): 
        try: 
            str_line = chunk.decode("utf-8")  # Convert bytes to string
            text = "" 
            is_finished = False
            finish_reason = None
            if str_line.startswith("data:"):
                data_json = json.loads(str_line[5:])
                print(f"delta content: {data_json['choices'][0]['delta']}")
                text = data_json["choices"][0]["delta"].get("content", "") 
                if data_json["choices"][0].get("finish_reason", None): 
                    is_finished = True
                    finish_reason = data_json["choices"][0]["finish_reason"]
                return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
            elif "error" in str_line:
                raise ValueError(f"Unable to parse response. Original response: {str_line}")
            else:
                return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
        except:
            traceback.print_exc()
            pass
    def handle_openai_text_completion_chunk(self, chunk):
        try:
            return chunk["choices"][0]["text"]
@ -3401,6 +3466,13 @@ class CustomStreamWrapper:
                    if "error" in chunk:
                        exception_type(model=self.model, custom_llm_provider=self.custom_llm_provider, original_exception=chunk["error"])
                    completion_obj = chunk
                elif self.custom_llm_provider == "custom_openai":
                    chunk = next(self.completion_stream)
                    response_obj = self.handle_custom_openai_chat_completion_chunk(chunk)
                    completion_obj["content"] = response_obj["text"]
                    print(f"completion obj content: {completion_obj['content']}")
                    if response_obj["is_finished"]: 
                        model_response.choices[0].finish_reason = response_obj["finish_reason"]
                else: # openai chat/azure models
                    chunk = next(self.completion_stream)
                    model_response = chunk