From f24786095abf0dfa4e8480bbb0049cc8a630f8dc Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Thu, 23 Nov 2023 17:35:26 -0800
Subject: [PATCH] fix(vertex_ai.py): fix exception mapping for vertex ai

---
 docs/my-website/docs/proxy_server.md |   2 +-
 litellm/llms/vertex_ai.py            | 173 ++++++++++++++-------------
 litellm/tests/test_exceptions.py     |   2 +-
 litellm/utils.py                     |  17 +++
 4 files changed, 107 insertions(+), 87 deletions(-)

diff --git a/docs/my-website/docs/proxy_server.md b/docs/my-website/docs/proxy_server.md
index 6adf0b10e4..438e68cf78 100644
--- a/docs/my-website/docs/proxy_server.md
+++ b/docs/my-website/docs/proxy_server.md
@@ -1,7 +1,7 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 
-# Local OpenAI Proxy Server
+# [OLD PROXY 👉 [**NEW** proxy here](./simple_proxy.md)] Local OpenAI Proxy Server
 
 A fast, and lightweight OpenAI-compatible server to call 100+ LLM APIs. 
 
diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py
index 4ca2994c98..9d468306b0 100644
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@@ -73,93 +73,96 @@ def completion(
     try:
         import vertexai
     except:
-        raise Exception("vertexai import failed please run `pip install google-cloud-aiplatform`")
-    from vertexai.preview.language_models import ChatModel, CodeChatModel, InputOutputTextPair
-    from vertexai.language_models import TextGenerationModel, CodeGenerationModel
+        raise VertexAIError(status_code=400,message="vertexai import failed please run `pip install google-cloud-aiplatform`")
+    try: 
+        from vertexai.preview.language_models import ChatModel, CodeChatModel, InputOutputTextPair
+        from vertexai.language_models import TextGenerationModel, CodeGenerationModel
 
-    vertexai.init(
-        project=vertex_project, location=vertex_location
-    )
-
-    ## Load Config
-    config = litellm.VertexAIConfig.get_config()
-    for k, v in config.items(): 
-        if k not in optional_params: 
-            optional_params[k] = v
-
-    # vertexai does not use an API key, it looks for credentials.json in the environment
-
-    prompt = " ".join([message["content"] for message in messages])
-
-    mode = "" 
-    if model in litellm.vertex_chat_models:
-        chat_model = ChatModel.from_pretrained(model)
-        mode = "chat"
-    elif model in litellm.vertex_text_models:
-        text_model = TextGenerationModel.from_pretrained(model)
-        mode = "text"
-    elif model in litellm.vertex_code_text_models:
-        text_model = CodeGenerationModel.from_pretrained(model)
-        mode = "text"
-    else: # vertex_code_chat_models
-        chat_model = CodeChatModel.from_pretrained(model)
-        mode = "chat"
-    
-    if mode == "chat":
-        chat = chat_model.start_chat()
-
-        ## LOGGING
-        logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params})
-
-        if "stream" in optional_params and optional_params["stream"] == True:
-            # NOTE: VertexAI does not accept stream=True as a param and raises an error,
-            # we handle this by removing 'stream' from optional params and sending the request
-            # after we get the response we add optional_params["stream"] = True, since main.py needs to know it's a streaming response to then transform it for the OpenAI format
-            optional_params.pop("stream", None) # vertex ai raises an error when passing stream in optional params
-            model_response = chat.send_message_streaming(prompt, **optional_params)
-            optional_params["stream"] = True
-            return model_response
-
-        completion_response = chat.send_message(prompt, **optional_params).text
-    elif mode == "text":
-        ## LOGGING
-        logging_obj.pre_call(input=prompt, api_key=None)
-
-        if "stream" in optional_params and optional_params["stream"] == True:
-            optional_params.pop("stream", None) # See note above on handling streaming for vertex ai 
-            model_response = text_model.predict_streaming(prompt, **optional_params)
-            optional_params["stream"] = True
-            return model_response
-
-        completion_response = text_model.predict(prompt, **optional_params).text
-        
-    ## LOGGING
-    logging_obj.post_call(
-        input=prompt, api_key=None, original_response=completion_response
-    )
-
-    ## RESPONSE OBJECT
-    if len(str(completion_response)) > 0: 
-        model_response["choices"][0]["message"][
-            "content"
-        ] = str(completion_response)
-    model_response["choices"][0]["message"]["content"] = str(completion_response)
-    model_response["created"] = int(time.time())
-    model_response["model"] = model
-    ## CALCULATING USAGE
-    prompt_tokens = len(
-        encoding.encode(prompt)
-    ) 
-    completion_tokens = len(
-        encoding.encode(model_response["choices"][0]["message"].get("content", ""))
-    )
-    usage = Usage(
-            prompt_tokens=prompt_tokens,
-            completion_tokens=completion_tokens,
-            total_tokens=prompt_tokens + completion_tokens
+        vertexai.init(
+            project=vertex_project, location=vertex_location
         )
-    model_response.usage = usage
-    return model_response
+
+        ## Load Config
+        config = litellm.VertexAIConfig.get_config()
+        for k, v in config.items(): 
+            if k not in optional_params: 
+                optional_params[k] = v
+
+        # vertexai does not use an API key, it looks for credentials.json in the environment
+
+        prompt = " ".join([message["content"] for message in messages])
+
+        mode = "" 
+        if model in litellm.vertex_chat_models:
+            chat_model = ChatModel.from_pretrained(model)
+            mode = "chat"
+        elif model in litellm.vertex_text_models:
+            text_model = TextGenerationModel.from_pretrained(model)
+            mode = "text"
+        elif model in litellm.vertex_code_text_models:
+            text_model = CodeGenerationModel.from_pretrained(model)
+            mode = "text"
+        else: # vertex_code_chat_models
+            chat_model = CodeChatModel.from_pretrained(model)
+            mode = "chat"
+        
+        if mode == "chat":
+            chat = chat_model.start_chat()
+
+            ## LOGGING
+            logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params})
+
+            if "stream" in optional_params and optional_params["stream"] == True:
+                # NOTE: VertexAI does not accept stream=True as a param and raises an error,
+                # we handle this by removing 'stream' from optional params and sending the request
+                # after we get the response we add optional_params["stream"] = True, since main.py needs to know it's a streaming response to then transform it for the OpenAI format
+                optional_params.pop("stream", None) # vertex ai raises an error when passing stream in optional params
+                model_response = chat.send_message_streaming(prompt, **optional_params)
+                optional_params["stream"] = True
+                return model_response
+
+            completion_response = chat.send_message(prompt, **optional_params).text
+        elif mode == "text":
+            ## LOGGING
+            logging_obj.pre_call(input=prompt, api_key=None)
+
+            if "stream" in optional_params and optional_params["stream"] == True:
+                optional_params.pop("stream", None) # See note above on handling streaming for vertex ai 
+                model_response = text_model.predict_streaming(prompt, **optional_params)
+                optional_params["stream"] = True
+                return model_response
+
+            completion_response = text_model.predict(prompt, **optional_params).text
+            
+        ## LOGGING
+        logging_obj.post_call(
+            input=prompt, api_key=None, original_response=completion_response
+        )
+
+        ## RESPONSE OBJECT
+        if len(str(completion_response)) > 0: 
+            model_response["choices"][0]["message"][
+                "content"
+            ] = str(completion_response)
+        model_response["choices"][0]["message"]["content"] = str(completion_response)
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        ## CALCULATING USAGE
+        prompt_tokens = len(
+            encoding.encode(prompt)
+        ) 
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+        )
+        usage = Usage(
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=prompt_tokens + completion_tokens
+            )
+        model_response.usage = usage
+        return model_response
+    except Exception as e: 
+        raise VertexAIError(status_code=500, message=str(e))
 
 
 def embedding():
diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py
index acfedc3d76..c2a7a57ad9 100644
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@@ -64,7 +64,7 @@ def test_context_window_with_fallbacks(model):
 
 # for model in litellm.models_by_provider["bedrock"]:
 #     test_context_window(model=model)
-# test_context_window(model="command-nightly")
+# test_context_window(model="chat-bison")
 # test_context_window_with_fallbacks(model="command-nightly")
 # Test 2: InvalidAuth Errors
 @pytest.mark.parametrize("model", models)
diff --git a/litellm/utils.py b/litellm/utils.py
index 2d521bbf66..2a2dc9d204 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -3816,6 +3816,23 @@ def exception_type(
                         llm_provider="vertex_ai",
                         response=original_exception.response
                     )
+                if hasattr(original_exception, "status_code"):
+                    if original_exception.status_code == 400:
+                        exception_mapping_worked = True
+                        raise BadRequestError(
+                            message=f"VertexAIException - {error_str}",
+                            model=model,
+                            llm_provider="vertex_ai",
+                            response=original_exception.response
+                        )
+                    if original_exception.status_code == 500: 
+                        exception_mapping_worked = True
+                        raise APIError(
+                            message=f"VertexAIException - {error_str}",
+                            model=model,
+                            llm_provider="vertex_ai",
+                            request=original_exception.request
+                        )
             elif custom_llm_provider == "palm":
                 if "503 Getting metadata" in error_str:
                     # auth errors look like this