diff --git a/fastrepl-proxy b/fastrepl-proxy
deleted file mode 160000
index 21400f5028..0000000000
--- a/fastrepl-proxy
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 21400f5028e5b357b9115ebfb8ed34fbd09d503c
diff --git a/litellm/__init__.py b/litellm/__init__.py
index df6b8717d3..880491080a 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -30,13 +30,13 @@ togetherai_api_key: Optional[str] = None
 baseten_key: Optional[str] = None
 aleph_alpha_key: Optional[str] = None
 nlp_cloud_key: Optional[str] = None
-use_client = False
-logging = True
-caching = False # deprecated son
-caching_with_models = False  # if you want the caching key to be model + prompt # deprecated soon
+use_client: bool = False
+logging: bool = True
+caching: bool = False # deprecated son
+caching_with_models: bool = False  # if you want the caching key to be model + prompt # deprecated soon
 cache: Optional[Cache] = None # cache object
 model_alias_map: Dict[str, str] = {}
-max_budget = None # set the max budget across all providers
+max_budget: float = None # set the max budget across all providers
 _current_cost = 0 # private variable, used if max budget is set 
 #############################################
 
@@ -78,7 +78,7 @@ config_path = None
 ####### Secret Manager #####################
 secret_manager_client = None
 ####### COMPLETION MODELS ###################
-open_ai_chat_completion_models = [
+open_ai_chat_completion_models: str = [
     "gpt-4",
     "gpt-4-0613",
     "gpt-4-0314",
@@ -92,7 +92,7 @@ open_ai_chat_completion_models = [
     "gpt-3.5-turbo-16k",
     "gpt-3.5-turbo-16k-0613",
 ]
-open_ai_text_completion_models = [
+open_ai_text_completion_models: str = [
     "text-davinci-003", 
     "text-curie-001", 
     "text-babbage-001", 
@@ -101,7 +101,7 @@ open_ai_text_completion_models = [
     "text-davinci-002",
 ]
 
-cohere_models = [
+cohere_models: str = [
     "command-nightly",
     "command",
     "command-light",
@@ -109,10 +109,10 @@ cohere_models = [
     "command-xlarge-beta",
 ]
 
-anthropic_models = ["claude-2", "claude-instant-1", "claude-instant-1.2"]
+anthropic_models: str = ["claude-2", "claude-instant-1", "claude-instant-1.2"]
 
 # well supported replicate llms
-replicate_models = [
+replicate_models: str = [
     # llama replicate supported LLMs
     "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
     "a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52",
@@ -127,7 +127,7 @@ replicate_models = [
     "replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad",
 ]
 
-openrouter_models = [
+openrouter_models: str = [
     "google/palm-2-codechat-bison",
     "google/palm-2-chat-bison",
     "openai/gpt-3.5-turbo",
@@ -139,25 +139,25 @@ openrouter_models = [
     "meta-llama/llama-2-70b-chat",
 ]
 
-vertex_chat_models = [
+vertex_chat_models: str = [
     "chat-bison-32k",
     "chat-bison", 
     "chat-bison@001",
 ]
 
-vertex_code_chat_models = [
+vertex_code_chat_models: str = [
     "codechat-bison",
     "codechat-bison-32k",
     "codechat-bison@001",
 ]
 
-vertex_text_models = [
+vertex_text_models: str = [
     "text-bison", 
     "text-bison@001",
     # "text-bison-32k",
 ]
 
-vertex_code_text_models = [
+vertex_code_text_models: str = [
     "code-bison",
     # "code-bison-32K",
     "code-bison@001",
@@ -165,7 +165,7 @@ vertex_code_text_models = [
     "code-gecko@latest",
 ]
 
-huggingface_models = [
+huggingface_models: str = [
     "meta-llama/Llama-2-7b-hf",
     "meta-llama/Llama-2-7b-chat-hf",
     "meta-llama/Llama-2-13b-hf",
@@ -180,11 +180,11 @@ huggingface_models = [
     "meta-llama/Llama-2-70b-chat",
 ]  # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers
 
-ai21_models = ["j2-ultra", "j2-mid", "j2-light"]
+ai21_models: str = ["j2-ultra", "j2-mid", "j2-light"]
 
-nlp_cloud_models = ["dolphin", "chatdolphin"]
+nlp_cloud_models: str = ["dolphin", "chatdolphin"]
 
-together_ai_models = [
+together_ai_models: str = [
     # llama llms - chat
     "togethercomputer/llama-2-70b-chat",
 
@@ -221,7 +221,7 @@ together_ai_models = [
 
 ] # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...)
 
-aleph_alpha_models = [
+aleph_alpha_models: str = [
     "luminous-base",
     "luminous-base-control",
     "luminous-extended",
@@ -230,9 +230,9 @@ aleph_alpha_models = [
     "luminous-supreme-control"
 ]
 
-baseten_models = ["qvv0xeq", "q841o8w", "31dxrj3"]  # FALCON 7B  # WizardLM  # Mosaic ML
+baseten_models: str = ["qvv0xeq", "q841o8w", "31dxrj3"]  # FALCON 7B  # WizardLM  # Mosaic ML
 
-bedrock_models = [
+bedrock_models: str = [
     "amazon.titan-tg1-large",
     "ai21.j2-grande-instruct"
 ]
@@ -254,7 +254,7 @@ model_list = (
     + nlp_cloud_models
 )
 
-provider_list = [
+provider_list: str = [
     "openai",
     "cohere",
     "anthropic",
@@ -274,7 +274,7 @@ provider_list = [
     "custom", # custom apis
 ]
 
-models_by_provider = {
+models_by_provider: dict = {
     "openai": open_ai_chat_completion_models + open_ai_text_completion_models,
     "cohere": cohere_models,
     "anthropic": anthropic_models,
@@ -289,7 +289,7 @@ models_by_provider = {
 }
 
 ####### EMBEDDING MODELS ###################
-open_ai_embedding_models = ["text-embedding-ada-002"]
+open_ai_embedding_models: str = ["text-embedding-ada-002"]
 
 from .timeout import timeout
 from .testing import *
diff --git a/litellm/main.py b/litellm/main.py
index 9d98d40074..74604e7622 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -1193,3 +1193,82 @@ def config_completion(**kwargs):
         raise ValueError(
             "No config path set, please set a config path using `litellm.config_path = 'path/to/config.json'`"
         )
+
+def stream_chunk_builder(chunks: list):
+    id = chunks[0]["id"]
+    object = chunks[0]["object"]
+    created = chunks[0]["created"]
+    model = chunks[0]["model"]
+    role = chunks[0]["choices"][0]["delta"]["role"]
+    finnish_reason = chunks[-1]["choices"][0]["finish_reason"]
+    
+    # Initialize the response dictionary
+    response = {
+        "id": id,
+        "object": object,
+        "created": created,
+        "model": model,
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": role,
+                    "content": ""
+                },
+                "finish_reason": finnish_reason,
+            }
+        ],
+        # "usage": {
+        #     "prompt_tokens": 0,  # Modify as needed
+        #     "completion_tokens": 0,  # Modify as needed
+        #     "total_tokens": 0  # Modify as needed
+        # }
+    }
+
+    # Extract the "content" strings from the nested dictionaries within "choices"
+    content_list = []
+
+    if "function_call" in chunks[0]["choices"][0]["delta"]:
+        argument_list = []
+        delta = chunks[0]["choices"][0]["delta"]
+        function_call = delta.get("function_call", "")
+        function_call_name = function_call.get("name", "")
+
+        message = response["choices"][0]["message"]
+        message["function_call"] = {}
+        message["function_call"]["name"] = function_call_name
+
+        for chunk in chunks:
+            choices = chunk["choices"]
+            for choice in choices:
+                delta = choice.get("delta", {})
+                function_call = delta.get("function_call", "")
+                
+                # Check if a function call is present
+                if function_call:
+                    # Now, function_call is expected to be a dictionary
+                    arguments = function_call.get("arguments", "")
+                    argument_list.append(arguments)
+
+        combined_arguments = "".join(argument_list)
+        response["choices"][0]["message"]["content"] = None
+        response["choices"][0]["message"]["function_call"]["arguments"] = combined_arguments
+    else:
+        for chunk in chunks:
+            choices = chunk["choices"]
+            for choice in choices:
+                delta = choice.get("delta", {})
+                content = delta.get("content", "")
+                content_list.append(content)
+
+        # Combine the "content" strings into a single string
+        combined_content = "".join(content_list)
+
+        # Update the "content" field within the response dictionary
+        response["choices"][0]["message"]["content"] = combined_content
+
+
+    # # Update usage information if needed
+    # response["usage"]["completion_tokens"] = token
+
+    return response
diff --git a/litellm/tests/test_stream_chunk_builder.py b/litellm/tests/test_stream_chunk_builder.py
new file mode 100644
index 0000000000..23dcf57a8e
--- /dev/null
+++ b/litellm/tests/test_stream_chunk_builder.py
@@ -0,0 +1,56 @@
+from litellm import completion, stream_chunk_builder
+import litellm
+import os
+
+user_message = "What is the current weather in Boston?"
+messages = [{"content": user_message, "role": "user"}]
+
+function_schema = {
+  "name": "get_weather",
+  "description":
+  "gets the current weather",
+  "parameters": {
+    "type": "object",
+    "properties": {
+      "location": {
+        "type": "string",
+        "description":
+        "The city and state, e.g. San Francisco, CA"
+      },
+    },
+    "required": ["location"]
+  },
+}
+
+def test_stream_chunk_builder():
+    litellm.api_key = os.environ["OPENAI_API_KEY"]
+    response = completion(
+        model="gpt-3.5-turbo",
+        messages=messages,
+        functions=[function_schema],
+        stream=True,
+    )
+
+    chunks = []
+
+    for chunk in response:
+        chunks.append(chunk)
+
+    try:
+        rebuilt_response = stream_chunk_builder(chunks)
+
+        # exract the response from the rebuilt response
+        rebuilt_response["id"]
+        rebuilt_response["object"]
+        rebuilt_response["created"]
+        rebuilt_response["model"]
+        rebuilt_response["choices"]
+        rebuilt_response["choices"][0]["index"]
+        choices = rebuilt_response["choices"][0]
+        message = choices["message"]
+        role = message["role"]
+        content = message["content"]
+        finnish_reason = choices["finish_reason"]
+    except:
+        raise Exception("stream_chunk_builder failed to rebuild response")
+test_stream_chunk_builder()