diff --git a/dist/litellm-1.14.0.dev1-py3-none-any.whl b/dist/litellm-1.14.0.dev1-py3-none-any.whl
new file mode 100644
index 000000000..7428d252d
Binary files /dev/null and b/dist/litellm-1.14.0.dev1-py3-none-any.whl differ
diff --git a/dist/litellm-1.14.0.dev1.tar.gz b/dist/litellm-1.14.0.dev1.tar.gz
new file mode 100644
index 000000000..cd80b0e71
Binary files /dev/null and b/dist/litellm-1.14.0.dev1.tar.gz differ
diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py
index 3d814f22e..b7bc7935a 100644
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@@ -69,6 +69,7 @@ def completion(
     optional_params=None,
     litellm_params=None,
     logger_fn=None,
+    acompletion: bool=False
 ):
     try:
         import vertexai
@@ -77,7 +78,7 @@ def completion(
     try: 
         from vertexai.preview.language_models import ChatModel, CodeChatModel, InputOutputTextPair
         from vertexai.language_models import TextGenerationModel, CodeGenerationModel
-        from vertexai.preview.generative_models import GenerativeModel, Part
+        from vertexai.preview.generative_models import GenerativeModel, Part, GenerationConfig
 
 
         vertexai.init(
@@ -99,13 +100,13 @@ def completion(
         request_str = ""
         response_obj = None
         if model in litellm.vertex_language_models: 
-            chat_model = GenerativeModel(model)
+            llm_model = GenerativeModel(model)
             mode = ""
-            request_str += f"chat_model = GenerativeModel({model})\n"
+            request_str += f"llm_model = GenerativeModel({model})\n"
         elif model in litellm.vertex_chat_models:
-            chat_model = ChatModel.from_pretrained(model)
+            llm_model = ChatModel.from_pretrained(model)
             mode = "chat"
-            request_str += f"chat_model = ChatModel.from_pretrained({model})\n"
+            request_str += f"llm_model = ChatModel.from_pretrained({model})\n"
         elif model in litellm.vertex_text_models:
             text_model = TextGenerationModel.from_pretrained(model)
             mode = "text"
@@ -114,34 +115,38 @@ def completion(
             text_model = CodeGenerationModel.from_pretrained(model)
             mode = "text"
             request_str += f"text_model = CodeGenerationModel.from_pretrained({model})\n"
-        else: # vertex_code_chat_models
-            chat_model = CodeChatModel.from_pretrained(model)
+        else: # vertex_code_llm_models
+            llm_model = CodeChatModel.from_pretrained(model)
             mode = "chat"
-            request_str += f"chat_model = CodeChatModel.from_pretrained({model})\n"
+            request_str += f"llm_model = CodeChatModel.from_pretrained({model})\n"
         
+        if acompletion == True and model in litellm.vertex_language_models: # [TODO] expand support to vertex ai chat + text models 
+            if optional_params.get("stream", False) is True: 
+                # async streaming
+                pass
+            return async_completion(llm_model=llm_model, mode=mode, prompt=prompt, logging_obj=logging_obj, request_str=request_str, model=model, model_response=model_response, **optional_params)
+
         if mode == "":
-            chat = chat_model.start_chat() 
-            request_str+= f"chat = chat_model.start_chat()\n"
+            chat = llm_model.start_chat() 
+            request_str+= f"chat = llm_model.start_chat()\n"
 
             if "stream" in optional_params and optional_params["stream"] == True:
                 request_str += f"chat.send_message_streaming({prompt}, **{optional_params})\n"
                 ## LOGGING
                 logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
-                model_response = chat.send_message(prompt, **optional_params)
+                model_response = chat.send_message(prompt, generation_config=GenerationConfig(**optional_params))
                 optional_params["stream"] = True
                 return model_response
             
             request_str += f"chat.send_message({prompt}, **{optional_params}).text\n"
+            ## LOGGING
             logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
-            response_obj = chat.send_message(prompt, **optional_params)
+            response_obj = chat.send_message(prompt, generation_config=GenerationConfig(**optional_params))
             completion_response = response_obj.text
             response_obj = response_obj._raw_response
         elif mode == "chat":
-            chat = chat_model.start_chat()
-            request_str+= f"chat = chat_model.start_chat()\n"
-
-            ## LOGGING
-            
+            chat = llm_model.start_chat()
+            request_str+= f"chat = llm_model.start_chat()\n"
 
             if "stream" in optional_params and optional_params["stream"] == True:
                 # NOTE: VertexAI does not accept stream=True as a param and raises an error,
@@ -149,12 +154,14 @@ def completion(
                 # after we get the response we add optional_params["stream"] = True, since main.py needs to know it's a streaming response to then transform it for the OpenAI format
                 optional_params.pop("stream", None) # vertex ai raises an error when passing stream in optional params
                 request_str += f"chat.send_message_streaming({prompt}, **{optional_params})\n"
+                ## LOGGING
                 logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
                 model_response = chat.send_message_streaming(prompt, **optional_params)
                 optional_params["stream"] = True
                 return model_response
 
             request_str += f"chat.send_message({prompt}, **{optional_params}).text\n"
+            ## LOGGING
             logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
             completion_response = chat.send_message(prompt, **optional_params).text
         elif mode == "text":
@@ -162,12 +169,14 @@ def completion(
             if "stream" in optional_params and optional_params["stream"] == True:
                 optional_params.pop("stream", None) # See note above on handling streaming for vertex ai 
                 request_str += f"text_model.predict_streaming({prompt}, **{optional_params})\n"
+                ## LOGGING
                 logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
                 model_response = text_model.predict_streaming(prompt, **optional_params)
                 optional_params["stream"] = True
                 return model_response
 
             request_str += f"text_model.predict({prompt}, **{optional_params}).text\n"
+            ## LOGGING
             logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
             completion_response = text_model.predict(prompt, **optional_params).text
             
@@ -207,6 +216,49 @@ def completion(
     except Exception as e: 
         raise VertexAIError(status_code=500, message=str(e))
 
+async def async_completion(llm_model, mode: str, prompt: str, model: str, model_response: ModelResponse, logging_obj=None, request_str=None, **optional_params):
+    """
+    Add support for acompletion calls for gemini-pro
+    """
+    from vertexai.preview.generative_models import GenerationConfig
+
+    if mode == "":
+        # gemini-pro
+        llm_model = llm_model.start_chat()
+        ## LOGGING
+        logging_obj.pre_call(input=prompt, api_key=None, additional_args={"complete_input_dict": optional_params, "request_str": request_str})
+        response_obj = await llm_model.send_message_async(prompt, generation_config=GenerationConfig(**optional_params))
+        completion_response = response_obj.text
+        response_obj = response_obj._raw_response
+    elif mode == "chat":
+        # chat-bison etc.
+        pass
+    elif mode == "text":
+        # gecko etc.
+        pass
+    
+
+    ## RESPONSE OBJECT
+    if len(str(completion_response)) > 0: 
+        model_response["choices"][0]["message"][
+            "content"
+        ] = str(completion_response)
+    model_response["choices"][0]["message"]["content"] = str(completion_response)
+    model_response["created"] = int(time.time())
+    model_response["model"] = model
+    ## CALCULATING USAGE
+    if model in litellm.vertex_language_models and response_obj is not None:
+        model_response["choices"][0].finish_reason = response_obj.candidates[0].finish_reason.name
+        usage = Usage(prompt_tokens=response_obj.usage_metadata.prompt_token_count, 
+                        completion_tokens=response_obj.usage_metadata.candidates_token_count,
+                        total_tokens=response_obj.usage_metadata.total_token_count)
+    model_response.usage = usage
+    return model_response
+
+def async_streaming():
+    """
+    Add support for async streaming calls for gemini-pro
+    """
 
 def embedding():
     # logic for parsing in - calling - parsing out model embedding calls
diff --git a/litellm/main.py b/litellm/main.py
index 5138607bd..d8850c7e6 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -177,7 +177,8 @@ async def acompletion(*args, **kwargs):
             or custom_llm_provider == "perplexity"
             or custom_llm_provider == "text-completion-openai"
             or custom_llm_provider == "huggingface"
-            or custom_llm_provider == "ollama"): # currently implemented aiohttp calls for just azure and openai, soon all. 
+            or custom_llm_provider == "ollama"
+            or custom_llm_provider == "vertex_ai"): # currently implemented aiohttp calls for just azure and openai, soon all. 
             if kwargs.get("stream", False): 
                 response = completion(*args, **kwargs)
             else:
@@ -1152,7 +1153,8 @@ def completion(
                 encoding=encoding,
                 vertex_location=vertex_ai_location,
                 vertex_project=vertex_ai_project,
-                logging_obj=logging
+                logging_obj=logging, 
+                acompletion=acompletion
             )
             
             if "stream" in optional_params and optional_params["stream"] == True:
diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py
index 02531abe8..620962128 100644
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@@ -9,15 +9,15 @@ import os, io
 sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path  
-import pytest
+import pytest, asyncio
 import litellm
-from litellm import embedding, completion, completion_cost, Timeout
+from litellm import embedding, completion, completion_cost, Timeout, acompletion
 from litellm import RateLimitError
 import json
 import os
 import tempfile
 
-# litellm.num_retries = 3
+litellm.num_retries = 3
 litellm.cache = None
 user_message = "Write a short poem about the sky"
 messages = [{"content": user_message, "role": "user"}]
@@ -73,14 +73,14 @@ def test_vertex_ai():
     litellm.vertex_project = "hardy-device-386718"
 
     test_models = random.sample(test_models, 4)
-    test_models += litellm.vertex_language_models # always test gemini-pro
+    test_models = litellm.vertex_language_models # always test gemini-pro
     for model in test_models:
         try:
             if model in ["code-gecko@001", "code-gecko@latest", "code-bison@001", "text-bison@001"]:
                 # our account does not have access to this model
                 continue
             print("making request", model)
-            response = completion(model=model, messages=[{'role': 'user', 'content': 'hi'}])
+            response = completion(model=model, messages=[{'role': 'user', 'content': 'hi'}], temperature=0.7)
             print("\nModel Response", response)
             print(response)
             assert type(response.choices[0].message.content) == str
@@ -117,3 +117,19 @@ def test_vertex_ai_stream():
         except Exception as e:
             pytest.fail(f"Error occurred: {e}")
 # test_vertex_ai_stream() 
+
+@pytest.mark.asyncio
+async def test_async_vertexai_response():
+    load_vertex_ai_credentials()
+    user_message = "Hello, how are you?"
+    messages = [{"content": user_message, "role": "user"}]
+    try:
+        response = await acompletion(model="gemini-pro", messages=messages, temperature=0.7, timeout=5)
+        # response = await response
+        print(f"response: {response}")
+    except litellm.Timeout as e: 
+        pass
+    except Exception as e:
+        pytest.fail(f"An exception occurred: {e}")
+
+asyncio.run(test_async_vertexai_response())
\ No newline at end of file