diff --git a/litellm/integrations/langsmith.py b/litellm/integrations/langsmith.py
index 43e9de4d34..de9dd2f719 100644
--- a/litellm/integrations/langsmith.py
+++ b/litellm/integrations/langsmith.py
@@ -58,7 +58,7 @@ class LangsmithLogger:
                     "inputs": {
                         **new_kwargs
                     },
-                    "outputs": response_obj,
+                    "outputs": response_obj.json(),
                     "session_name": project_name,
                     "start_time": start_time,
                     "end_time": end_time,
diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py
index b1bf77f0e3..abc6c845a9 100644
--- a/litellm/llms/ollama.py
+++ b/litellm/llms/ollama.py
@@ -219,7 +219,6 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
     except Exception as e:
         traceback.print_exc()
 
-
 async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
     try:
         timeout = aiohttp.ClientTimeout(total=600)  # 10 minutes
@@ -230,12 +229,12 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
                 text = await resp.text()
                 raise OllamaError(status_code=resp.status, message=text)
             
+            completion_string = ""
             async for line in resp.content.iter_any():
                 if line:
                     try:
                         json_chunk = line.decode("utf-8")
                         chunks = json_chunk.split("\n")
-                        completion_string = ""
                         for chunk in chunks:
                             if chunk.strip() != "":
                                 j = json.loads(chunk)
@@ -245,14 +244,16 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
                                         "content": "",
                                         "error": j
                                     }
+                                    raise Exception(f"OllamError - {chunk}")
                                 if "response" in j:
                                     completion_obj = {
                                         "role": "assistant",
                                         "content": j["response"],
                                     }
-                                    completion_string += completion_obj["content"]
+                                    completion_string = completion_string + completion_obj["content"]
                     except Exception as e:
                         traceback.print_exc()
+            
             ## RESPONSE OBJECT
             model_response["choices"][0]["finish_reason"] = "stop"
             model_response["choices"][0]["message"]["content"] = completion_string
diff --git a/litellm/main.py b/litellm/main.py
index 38526bfd6d..8748cb1f5e 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -624,7 +624,6 @@ def completion(
             or "ft:babbage-002" in model
             or "ft:davinci-002" in model  # support for finetuned completion models
         ):
-            # print("calling custom openai provider")
             openai.api_type = "openai"
 
             api_base = (
@@ -1319,13 +1318,8 @@ def completion(
                 )
             else:
                 prompt = prompt_factory(model=model, messages=messages, custom_llm_provider=custom_llm_provider)
+            
             ## LOGGING
-            if kwargs.get('acompletion', False) == True:    
-                if optional_params.get("stream", False) == True:
-                # assume all ollama responses are streamed
-                    async_generator = ollama.async_get_ollama_response_stream(api_base, model, prompt, optional_params, logging_obj=logging)
-                    return async_generator
-
             generator = ollama.get_ollama_response_stream(api_base, model, prompt, optional_params, logging_obj=logging, acompletion=acompletion, model_response=model_response, encoding=encoding)
             if acompletion is True:
                 return generator
@@ -2126,7 +2120,7 @@ def text_completion(
                         *args,
                         **all_params,
                     )
-                    #print(response)
+
                     text_completion_response["id"] = response.get("id", None)
                     text_completion_response["object"] = "text_completion"
                     text_completion_response["created"] = response.get("created", None)
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index af7bd3b4ab..03dc6e1cd4 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1004,6 +1004,7 @@ async def chat_completion(request: Request, model: Optional[str] = None, user_ap
         ### ROUTE THE REQUEST ###
         router_model_names = [m["model_name"] for m in llm_model_list] if llm_model_list is not None else []
         if llm_router is not None and data["model"] in router_model_names: # model in router model list 
+                print(f"ENTERS LLM ROUTER ACOMPLETION")
                 response = await llm_router.acompletion(**data)
         elif llm_router is not None and data["model"] in llm_router.deployment_names: # model in router deployments, calling a specific deployment on the router
             response = await llm_router.acompletion(**data, specific_deployment = True)
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 96ba25c8a7..7c9863fad9 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -64,7 +64,7 @@ class ProxyLogging:
         1. /chat/completions
         2. /embeddings 
         """
-        try: 
+        try:
             self.call_details["data"] = data
             self.call_details["call_type"] = call_type
             ## check if max parallel requests set   
@@ -75,6 +75,7 @@ class ProxyLogging:
                     api_key=user_api_key_dict.api_key,
                     user_api_key_cache=self.call_details["user_api_key_cache"])
                 
+            print_verbose(f'final data being sent to {call_type} call: {data}') 
             return data
         except Exception as e:
             raise e
diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py
index 8a227c4543..29bd0b0fb1 100644
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@@ -76,7 +76,7 @@ def test_vertex_ai():
     test_models += litellm.vertex_language_models # always test gemini-pro
     for model in test_models:
         try:
-            if model in ["code-gecko@001", "code-gecko@latest", "code-bison@001", "text-bison@001"]:
+            if model in ["code-gecko", "code-gecko@001", "code-gecko@002", "code-gecko@latest", "code-bison@001", "text-bison@001"]:
                 # our account does not have access to this model
                 continue
             print("making request", model)
@@ -97,10 +97,11 @@ def test_vertex_ai_stream():
 
     test_models = litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models 
     test_models = random.sample(test_models, 4)
-    test_models += litellm.vertex_language_models # always test gemini-pro
+    # test_models += litellm.vertex_language_models # always test gemini-pro
+    test_models = ["code-gecko@001"]
     for model in test_models:
         try:
-            if model in ["code-gecko@001", "code-gecko@latest", "code-bison@001", "text-bison@001"]:
+            if model in ["code-gecko", "code-gecko@001", "code-gecko@002", "code-gecko@latest", "code-bison@001", "text-bison@001"]:
                 # our account does not have access to this model
                 continue
             print("making request", model)
@@ -116,7 +117,7 @@ def test_vertex_ai_stream():
             assert len(completed_str) > 4
         except Exception as e:
             pytest.fail(f"Error occurred: {e}")
-# test_vertex_ai_stream() 
+test_vertex_ai_stream() 
 
 @pytest.mark.asyncio
 async def test_async_vertexai_response():
@@ -127,6 +128,9 @@ async def test_async_vertexai_response():
     test_models += litellm.vertex_language_models # always test gemini-pro
     for model in test_models:
         print(f'model being tested in async call: {model}')
+        if model in ["code-gecko", "code-gecko@001", "code-gecko@002", "code-gecko@latest", "code-bison@001", "text-bison@001"]:
+                # our account does not have access to this model
+                continue
         try:
             user_message = "Hello, how are you?"
             messages = [{"content": user_message, "role": "user"}]
@@ -147,6 +151,9 @@ async def test_async_vertexai_streaming_response():
     test_models = random.sample(test_models, 4)
     test_models += litellm.vertex_language_models # always test gemini-pro
     for model in test_models:
+        if model in ["code-gecko", "code-gecko@001", "code-gecko@002", "code-gecko@latest", "code-bison@001", "text-bison@001"]:
+                # our account does not have access to this model
+                continue
         try:
             user_message = "Hello, how are you?"
             messages = [{"content": user_message, "role": "user"}]
diff --git a/litellm/utils.py b/litellm/utils.py
index c2aab4700b..1a9594c9ad 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -365,6 +365,13 @@ class ModelResponse(OpenAIObject):
     def __setitem__(self, key, value):
         # Allow dictionary-style assignment of attributes
         setattr(self, key, value)
+    
+    def json(self, **kwargs):
+        try:
+            return self.model_dump() # noqa
+        except:
+            # if using pydantic v1
+            return self.dict()
 
 class Embedding(OpenAIObject):
     embedding: list = []
@@ -430,6 +437,13 @@ class EmbeddingResponse(OpenAIObject):
     def __setitem__(self, key, value):
         # Allow dictionary-style assignment of attributes
         setattr(self, key, value)
+    
+    def json(self, **kwargs):
+        try:
+            return self.model_dump() # noqa
+        except:
+            # if using pydantic v1
+            return self.dict()
 
 class TextChoices(OpenAIObject):
     def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params):