diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index 381bcc1ac..71aeb4bee 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -625,7 +625,11 @@ class Logging:
                             model_call_details=self.model_call_details
                         ),
                         call_type=self.call_type,
-                        optional_params=self.optional_params,
+                        optional_params=(
+                            self.optional_params
+                            if hasattr(self, "optional_params")
+                            else {}
+                        ),
                     )
                 )
             if self.dynamic_success_callbacks is not None and isinstance(
diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py
index 35504a490..f7aa2d593 100644
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@@ -1330,17 +1330,30 @@ class ModelResponseIterator:
 
             gemini_chunk = processed_chunk["candidates"][0]
 
-            if (
-                "content" in gemini_chunk
-                and "text" in gemini_chunk["content"]["parts"][0]
-            ):
-                text = gemini_chunk["content"]["parts"][0]["text"]
+            if "content" in gemini_chunk:
+                if "text" in gemini_chunk["content"]["parts"][0]:
+                    text = gemini_chunk["content"]["parts"][0]["text"]
+                elif "functionCall" in gemini_chunk["content"]["parts"][0]:
+                    function_call = ChatCompletionToolCallFunctionChunk(
+                        name=gemini_chunk["content"]["parts"][0]["functionCall"][
+                            "name"
+                        ],
+                        arguments=json.dumps(
+                            gemini_chunk["content"]["parts"][0]["functionCall"]["args"]
+                        ),
+                    )
+                    tool_use = ChatCompletionToolCallChunk(
+                        id=str(uuid.uuid4()),
+                        type="function",
+                        function=function_call,
+                        index=0,
+                    )
 
             if "finishReason" in gemini_chunk:
                 finish_reason = map_finish_reason(
                     finish_reason=gemini_chunk["finishReason"]
                 )
-                ## DO NOT SET 'finish_reason' = True
+                ## DO NOT SET 'is_finished' = True
                 ## GEMINI SETS FINISHREASON ON EVERY CHUNK!
 
             if "usageMetadata" in processed_chunk:
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 7f4b86ec4..67b0ab4c0 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -2,11 +2,13 @@ model_list:
   - model_name: "*"             
     litellm_params:
       model: "openai/*"
-      mock_response: "Hello world!"
+  - model_name: "gemini-1.5-flash"
+    litellm_params:
+      model: "gemini/gemini-1.5-flash"
 
-litellm_settings:
-  success_callback: ["langfuse"]
-  failure_callback: ["langfuse"]
+# litellm_settings:
+#   success_callback: ["langfuse"]
+#   failure_callback: ["langfuse"]
 
 general_settings:
   alerting: ["slack"]
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index b2e2b29b7..50f02f272 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -12,6 +12,9 @@ from typing import Tuple
 import pytest
 from pydantic import BaseModel
 
+import litellm.litellm_core_utils
+import litellm.litellm_core_utils.litellm_logging
+
 sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
@@ -3034,8 +3037,11 @@ def test_completion_claude_3_function_call_with_streaming():
         pytest.fail(f"Error occurred: {e}")
 
 
+@pytest.mark.parametrize(
+    "model", ["gemini/gemini-1.5-flash"]
+)  # "claude-3-opus-20240229",
 @pytest.mark.asyncio
-async def test_acompletion_claude_3_function_call_with_streaming():
+async def test_acompletion_claude_3_function_call_with_streaming(model):
     litellm.set_verbose = True
     tools = [
         {
@@ -3066,7 +3072,7 @@ async def test_acompletion_claude_3_function_call_with_streaming():
     try:
         # test without max tokens
         response = await acompletion(
-            model="claude-3-opus-20240229",
+            model=model,
             messages=messages,
             tools=tools,
             tool_choice="required",
@@ -3453,3 +3459,55 @@ def test_aamazing_unit_test_custom_stream_wrapper_n():
         assert (
             chunk_dict == chunks[idx]
         ), f"idx={idx} translated chunk = {chunk_dict} != openai chunk = {chunks[idx]}"
+
+
+def test_unit_test_custom_stream_wrapper_function_call():
+    """
+    Test if model returns a tool call, the finish reason is correctly set to 'tool_calls'
+    """
+    from litellm.types.llms.openai import ChatCompletionDeltaChunk
+
+    litellm.set_verbose = False
+    delta: ChatCompletionDeltaChunk = {
+        "content": None,
+        "role": "assistant",
+        "tool_calls": [
+            {
+                "function": {"arguments": '"}'},
+                "type": "function",
+                "index": 0,
+            }
+        ],
+    }
+    chunk = {
+        "id": "chatcmpl-123",
+        "object": "chat.completion.chunk",
+        "created": 1694268190,
+        "model": "gpt-3.5-turbo-0125",
+        "system_fingerprint": "fp_44709d6fcb",
+        "choices": [{"index": 0, "delta": delta, "finish_reason": "stop"}],
+    }
+    chunk = litellm.ModelResponse(**chunk, stream=True)
+
+    completion_stream = ModelResponseIterator(model_response=chunk)
+
+    response = litellm.CustomStreamWrapper(
+        completion_stream=completion_stream,
+        model="gpt-3.5-turbo",
+        custom_llm_provider="cached_response",
+        logging_obj=litellm.litellm_core_utils.litellm_logging.Logging(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "Hey"}],
+            stream=True,
+            call_type="completion",
+            start_time=time.time(),
+            litellm_call_id="12345",
+            function_id="1245",
+        ),
+    )
+
+    finish_reason: Optional[str] = None
+    for chunk in response:
+        if chunk.choices[0].finish_reason is not None:
+            finish_reason = chunk.choices[0].finish_reason
+    assert finish_reason == "tool_calls"
diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py
index 64dee3420..6fc0593b9 100644
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@@ -300,7 +300,7 @@ class ListBatchRequest(TypedDict, total=False):
     timeout: Optional[float]
 
 
-class ChatCompletionToolCallFunctionChunk(TypedDict):
+class ChatCompletionToolCallFunctionChunk(TypedDict, total=False):
     name: Optional[str]
     arguments: str
 
@@ -312,7 +312,7 @@ class ChatCompletionToolCallChunk(TypedDict):
     index: int
 
 
-class ChatCompletionDeltaToolCallChunk(TypedDict):
+class ChatCompletionDeltaToolCallChunk(TypedDict, total=False):
     id: str
     type: Literal["function"]
     function: ChatCompletionToolCallFunctionChunk
diff --git a/litellm/utils.py b/litellm/utils.py
index a28531b6c..51236245a 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -7950,6 +7950,7 @@ class CustomStreamWrapper:
         )
         self.messages = getattr(logging_obj, "messages", None)
         self.sent_stream_usage = False
+        self.tool_call = False
         self.chunks: List = (
             []
         )  # keep track of the returned chunks - used for calculating the input/output tokens for stream options
@@ -9192,9 +9193,16 @@ class CustomStreamWrapper:
                     "is_finished": True,
                     "finish_reason": chunk.choices[0].finish_reason,
                     "original_chunk": chunk,
+                    "tool_calls": (
+                        chunk.choices[0].delta.tool_calls
+                        if hasattr(chunk.choices[0].delta, "tool_calls")
+                        else None
+                    ),
                 }
 
                 completion_obj["content"] = response_obj["text"]
+                if response_obj["tool_calls"] is not None:
+                    completion_obj["tool_calls"] = response_obj["tool_calls"]
                 print_verbose(f"completion obj content: {completion_obj['content']}")
                 if hasattr(chunk, "id"):
                     model_response.id = chunk.id
@@ -9352,6 +9360,10 @@ class CustomStreamWrapper:
             )
             print_verbose(f"self.sent_first_chunk: {self.sent_first_chunk}")
 
+            ## CHECK FOR TOOL USE
+            if "tool_calls" in completion_obj and len(completion_obj["tool_calls"]) > 0:
+                self.tool_call = True
+
             ## RETURN ARG
             if (
                 "content" in completion_obj
@@ -9530,6 +9542,12 @@ class CustomStreamWrapper:
             )
         else:
             model_response.choices[0].finish_reason = "stop"
+
+        ## if tool use
+        if (
+            model_response.choices[0].finish_reason == "stop" and self.tool_call
+        ):  # don't overwrite for other - potential error finish reasons
+            model_response.choices[0].finish_reason = "tool_calls"
         return model_response
 
     def __next__(self):
@@ -9583,7 +9601,7 @@ class CustomStreamWrapper:
                     return response
 
         except StopIteration:
-            if self.sent_last_chunk == True:
+            if self.sent_last_chunk is True:
                 if (
                     self.sent_stream_usage == False
                     and self.stream_options is not None