diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index 70e28a7d63..5ac2702899 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -7,7 +7,7 @@ from typing import Callable, Optional # This file just has the openai config classes. # For implementation check out completion() in main.py -class CustomOpenAIError(Exception): +class OpenAIError(Exception): def __init__(self, status_code, message): self.status_code = status_code self.message = message @@ -163,7 +163,7 @@ class OpenAIChatCompletion(BaseLLM): def convert_to_model_response_object(self, response_object: Optional[dict]=None, model_response_object: Optional[ModelResponse]=None): try: if response_object is None or model_response_object is None: - raise CustomOpenAIError(status_code=500, message="Error in response object format") + raise OpenAIError(status_code=500, message="Error in response object format") choice_list=[] for idx, choice in enumerate(response_object["choices"]): message = Message(content=choice["message"]["content"], role=choice["message"]["role"]) @@ -181,7 +181,7 @@ class OpenAIChatCompletion(BaseLLM): model_response_object.model = response_object["model"] return model_response_object except: - CustomOpenAIError(status_code=500, message="Invalid response object.") + OpenAIError(status_code=500, message="Invalid response object.") def completion(self, model: Optional[str]=None, @@ -193,58 +193,79 @@ class OpenAIChatCompletion(BaseLLM): logging_obj=None, optional_params=None, litellm_params=None, - logger_fn=None): + logger_fn=None, + headers: Optional[dict]=None): super().completion() - headers = self.validate_environment(api_key=api_key) - if model is None or messages is None: - raise CustomOpenAIError(status_code=422, message=f"Missing model or messages") - - for _ in range(2): # if call fails due to alternating messages, retry with reformatted message - data = { - "model": model, - "messages": messages, - **optional_params - } - try: - if "stream" in optional_params and optional_params["stream"] == True: - response = self._client_session.post( - url=f"{api_base}/chat/completions", - json=data, - headers=headers, - stream=optional_params["stream"] - ) - if response.status_code != 200: - raise CustomOpenAIError(status_code=response.status_code, message=response.text) - - ## RESPONSE OBJECT - return response.iter_lines() - else: - response = self._client_session.post( - url=f"{api_base}/chat/completions", - json=data, - headers=headers, - ) - if response.status_code != 200: - raise CustomOpenAIError(status_code=response.status_code, message=response.text) - - ## RESPONSE OBJECT - return self.convert_to_model_response_object(response_object=response.json(), model_response_object=model_response) - except Exception as e: - if "Conversation roles must alternate user/assistant" in str(e) or "user and assistant roles should be alternating" in str(e): - # reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, add a blank 'user' or 'assistant' message to ensure compatibility - new_messages = [] - for i in range(len(messages)-1): - new_messages.append(messages[i]) - if messages[i]["role"] == messages[i+1]["role"]: - if messages[i]["role"] == "user": - new_messages.append({"role": "assistant", "content": ""}) - else: - new_messages.append({"role": "user", "content": ""}) - new_messages.append(messages[-1]) - messages = new_messages - elif "Last message must have role `user`" in str(e): - new_messages = messages - new_messages.append({"role": "user", "content": ""}) - messages = new_messages - else: - raise e + exception_mapping_worked = False + try: + if headers is None: + headers = self.validate_environment(api_key=api_key) + if model is None or messages is None: + raise OpenAIError(status_code=422, message=f"Missing model or messages") + + for _ in range(2): # if call fails due to alternating messages, retry with reformatted message + data = { + "model": model, + "messages": messages, + **optional_params + } + + ## LOGGING + logging_obj.pre_call( + input=messages, + api_key=api_key, + additional_args={"headers": headers, "api_base": api_base}, + ) + + try: + if "stream" in optional_params and optional_params["stream"] == True: + response = self._client_session.post( + url=f"{api_base}/chat/completions", + json=data, + headers=headers, + stream=optional_params["stream"] + ) + if response.status_code != 200: + raise OpenAIError(status_code=response.status_code, message=response.text) + + ## RESPONSE OBJECT + return response.iter_lines() + else: + response = self._client_session.post( + url=f"{api_base}/chat/completions", + json=data, + headers=headers, + ) + if response.status_code != 200: + raise OpenAIError(status_code=response.status_code, message=response.text) + + ## RESPONSE OBJECT + return self.convert_to_model_response_object(response_object=response.json(), model_response_object=model_response) + except Exception as e: + if "Conversation roles must alternate user/assistant" in str(e) or "user and assistant roles should be alternating" in str(e): + # reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, add a blank 'user' or 'assistant' message to ensure compatibility + new_messages = [] + for i in range(len(messages)-1): + new_messages.append(messages[i]) + if messages[i]["role"] == messages[i+1]["role"]: + if messages[i]["role"] == "user": + new_messages.append({"role": "assistant", "content": ""}) + else: + new_messages.append({"role": "user", "content": ""}) + new_messages.append(messages[-1]) + messages = new_messages + elif "Last message must have role `user`" in str(e): + new_messages = messages + new_messages.append({"role": "user", "content": ""}) + messages = new_messages + else: + raise e + except OpenAIError as e: + exception_mapping_worked = True + raise e + except Exception as e: + if exception_mapping_worked: + raise e + else: + import traceback + raise OpenAIError(status_code=500, message=traceback.format_exc()) diff --git a/litellm/main.py b/litellm/main.py index 182c57dac6..6a4ce3a92c 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -198,7 +198,6 @@ def completion( logit_bias: dict = {}, user: str = "", deployment_id = None, - request_timeout: Optional[int] = None, # set api_base, api_version, api_key api_base: Optional[str] = None, @@ -270,7 +269,7 @@ def completion( eos_token = kwargs.get("eos_token", None) ######## end of unpacking kwargs ########### openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "request_timeout", "api_base", "api_version", "api_key"] - litellm_params = ["metadata", "acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "fallbacks", "azure", "headers", "model_list", "num_retries", "context_window_fallback_dict", "roles", "final_prompt_value", "bos_token", "eos_token"] + litellm_params = ["metadata", "acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "fallbacks", "azure", "headers", "model_list", "num_retries", "context_window_fallback_dict", "roles", "final_prompt_value", "bos_token", "eos_token", "request_timeout"] default_params = openai_params + litellm_params non_default_params = {k: v for k,v in kwargs.items() if k not in default_params} # model-specific params - pass them straight to the model/provider if mock_response: @@ -334,7 +333,6 @@ def completion( frequency_penalty=frequency_penalty, logit_bias=logit_bias, user=user, - request_timeout=request_timeout, deployment_id=deployment_id, # params to identify the model model=model, @@ -464,38 +462,20 @@ def completion( if k not in optional_params: # completion(top_k=3) > openai_config(top_k=3) <- allows for dynamic variables to be passed in optional_params[k] = v - ## LOGGING - logging.pre_call( - input=messages, - api_key=api_key, - additional_args={"headers": headers, "api_base": api_base}, - ) ## COMPLETION CALL try: - if custom_llm_provider == "custom_openai": - response = openai_chat_completions.completion( - model=model, - messages=messages, - model_response=model_response, - print_verbose=print_verbose, - api_key=api_key, - api_base=api_base, - logging_obj=logging, - optional_params=optional_params, - litellm_params=litellm_params, - logger_fn=logger_fn - ) - else: - response = openai.ChatCompletion.create( - model=model, - messages=messages, - headers=headers, # None by default - api_base=api_base, # thread safe setting base, key, api_version - api_key=api_key, - api_type="openai", - api_version=api_version, # default None - **optional_params, - ) + response = openai_chat_completions.completion( + model=model, + messages=messages, + model_response=model_response, + print_verbose=print_verbose, + api_key=api_key, + api_base=api_base, + logging_obj=logging, + optional_params=optional_params, + litellm_params=litellm_params, + logger_fn=logger_fn + ) except Exception as e: ## LOGGING - log the original exception returned logging.post_call( diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 10c5ed7066..dc00e4c36d 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -95,20 +95,6 @@ def test_completion_claude(): # pytest.fail(f"Error occurred: {e}") # test_completion_aleph_alpha_control_models() -def test_completion_with_litellm_call_id(): - try: - litellm.use_client = False - response = completion( - model="gpt-3.5-turbo", messages=messages) - print(response) - if 'litellm_call_id' in response: - pytest.fail(f"Error occurred: litellm_call_id in response objects") - print(response.usage) - print(response.usage.completion_tokens) - - except Exception as e: - pytest.fail(f"Error occurred: {e}") -# test_completion_with_litellm_call_id() import openai def test_completion_gpt4_turbo(): try: @@ -381,6 +367,8 @@ def test_completion_cohere(): # commenting for now as the cohere endpoint is bei def test_completion_openai(): try: + litellm.set_verbose=True + print(f"api key: {os.environ['OPENAI_API_KEY']}") litellm.api_key = os.environ['OPENAI_API_KEY'] response = completion(model="gpt-3.5-turbo", messages=messages, max_tokens=10, request_timeout=10) print("This is the response object\n", response) diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py index dffbe37591..58872f1f30 100644 --- a/litellm/tests/test_exceptions.py +++ b/litellm/tests/test_exceptions.py @@ -62,7 +62,7 @@ def test_context_window_with_fallbacks(model): # for model in litellm.models_by_provider["bedrock"]: # test_context_window(model=model) -# test_context_window(model="azure/chatgpt-v-2") +# test_context_window(model="gpt-3.5-turbo") # test_context_window_with_fallbacks(model="command-nightly") # Test 2: InvalidAuth Errors @pytest.mark.parametrize("model", models) @@ -158,7 +158,7 @@ def invalid_auth(model): # set the model key to an invalid key, depending on th # for model in litellm.models_by_provider["bedrock"]: # invalid_auth(model=model) -# invalid_auth(model="azure/chatgpt-v-2") +# invalid_auth(model="gpt-3.5-turbo") # Test 3: Invalid Request Error @pytest.mark.parametrize("model", models) @@ -168,7 +168,7 @@ def test_invalid_request_error(model): with pytest.raises(InvalidRequestError): completion(model=model, messages=messages, max_tokens="hello world") -test_invalid_request_error(model="azure/chatgpt-v-2") +# test_invalid_request_error(model="gpt-3.5-turbo") # Test 3: Rate Limit Errors # def test_model_call(model): # try: diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 4d62808012..df8dd02ac0 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -897,8 +897,9 @@ def ai21_completion_call_bad_key(): # test on openai completion call def test_openai_chat_completion_call(): try: + litellm.set_verbose = True response = completion( - model="gpt-3.5-turbo", messages=messages, stream=True, logger_fn=logger_fn, max_tokens=10 + model="gpt-3.5-turbo", messages=messages, stream=True ) complete_response = "" start_time = time.time() @@ -915,7 +916,7 @@ def test_openai_chat_completion_call(): print(f"error occurred: {traceback.format_exc()}") pass -# test_openai_chat_completion_call() +test_openai_chat_completion_call() # # test on together ai completion call - starcoder def test_together_ai_completion_call_starcoder(): diff --git a/litellm/utils.py b/litellm/utils.py index 6a78beb928..f628aa927c 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1358,7 +1358,6 @@ def get_optional_params( # use the openai defaults frequency_penalty=0, logit_bias={}, user="", - request_timeout=None, deployment_id=None, model=None, custom_llm_provider="", @@ -1383,7 +1382,6 @@ def get_optional_params( # use the openai defaults "logit_bias":{}, "user":"", "deployment_id":None, - "request_timeout":None, "model":None, "custom_llm_provider":"", } @@ -1408,8 +1406,6 @@ def get_optional_params( # use the openai defaults if k == "n" and n == 1: # langchain sends n=1 as a default value pass # Always keeps this in elif code blocks - elif k == "request_timeout": # litellm handles request time outs - pass else: unsupported_params[k] = non_default_params[k] if unsupported_params and not litellm.drop_params: @@ -1761,7 +1757,7 @@ def get_optional_params( # use the openai defaults if stream: optional_params["stream"] = stream elif custom_llm_provider == "deepinfra": - supported_params = ["temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "deployment_id", "request_timeout"] + supported_params = ["temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "deployment_id"] _check_valid_arg(supported_params=supported_params) optional_params = non_default_params if temperature != None: @@ -1769,7 +1765,7 @@ def get_optional_params( # use the openai defaults temperature = 0.0001 # close to 0 optional_params["temperature"] = temperature else: # assume passing in params for openai/azure openai - supported_params = ["functions", "function_call", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "deployment_id", "request_timeout"] + supported_params = ["functions", "function_call", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "deployment_id"] _check_valid_arg(supported_params=supported_params) optional_params = non_default_params # if user passed in non-default kwargs for specific providers/models, pass them along @@ -2881,8 +2877,6 @@ def exception_type( llm_provider="openrouter" ) original_exception.llm_provider = "openrouter" - else: - original_exception.llm_provider = "openai" if "This model's maximum context length is" in original_exception._message: raise ContextWindowExceededError( message=str(original_exception), @@ -2896,7 +2890,60 @@ def exception_type( exception_type = type(original_exception).__name__ else: exception_type = "" - if custom_llm_provider == "anthropic": # one of the anthropics + if custom_llm_provider == "openai": + if "This model's maximum context length is" in error_str: + exception_mapping_worked = True + raise ContextWindowExceededError( + message=f"AzureException - {original_exception.message}", + llm_provider="azure", + model=model + ) + elif "invalid_request_error" in error_str: + exception_mapping_worked = True + raise InvalidRequestError( + message=f"AzureException - {original_exception.message}", + llm_provider="azure", + model=model + ) + elif hasattr(original_exception, "status_code"): + exception_mapping_worked = True + if original_exception.status_code == 401: + exception_mapping_worked = True + raise AuthenticationError( + message=f"OpenAIException - {original_exception.message}", + llm_provider="openai", + model=model + ) + elif original_exception.status_code == 408: + exception_mapping_worked = True + raise Timeout( + message=f"OpenAIException - {original_exception.message}", + model=model, + llm_provider="openai" + ) + if original_exception.status_code == 422: + exception_mapping_worked = True + raise InvalidRequestError( + message=f"OpenAIException - {original_exception.message}", + model=model, + llm_provider="openai", + ) + elif original_exception.status_code == 429: + exception_mapping_worked = True + raise RateLimitError( + message=f"OpenAIException - {original_exception.message}", + model=model, + llm_provider="openai", + ) + else: + exception_mapping_worked = True + raise APIError( + status_code=original_exception.status_code, + message=f"OpenAIException - {original_exception.message}", + llm_provider="openai", + model=model + ) + elif custom_llm_provider == "anthropic": # one of the anthropics if hasattr(original_exception, "message"): if "prompt is too long" in original_exception.message: exception_mapping_worked = True @@ -3941,7 +3988,7 @@ class CustomStreamWrapper: except: raise ValueError(f"Unable to parse response. Original response: {chunk}") - def handle_custom_openai_chat_completion_chunk(self, chunk): + def handle_openai_chat_completion_chunk(self, chunk): try: str_line = chunk.decode("utf-8") # Convert bytes to string text = "" @@ -3977,12 +4024,6 @@ class CustomStreamWrapper: except: raise ValueError(f"Unable to parse response. Original response: {chunk}") - def handle_openai_chat_completion_chunk(self, chunk): - try: - return chunk["choices"][0]["delta"]["content"] - except: - return "" - def handle_baseten_chunk(self, chunk): try: chunk = chunk.decode("utf-8") @@ -4187,9 +4228,9 @@ class CustomStreamWrapper: if "error" in chunk: exception_type(model=self.model, custom_llm_provider=self.custom_llm_provider, original_exception=chunk["error"]) completion_obj = chunk - elif self.custom_llm_provider == "custom_openai": + elif self.custom_llm_provider == "openai": chunk = next(self.completion_stream) - response_obj = self.handle_custom_openai_chat_completion_chunk(chunk) + response_obj = self.handle_openai_chat_completion_chunk(chunk) completion_obj["content"] = response_obj["text"] print_verbose(f"completion obj content: {completion_obj['content']}") if response_obj["is_finished"]: