diff --git a/litellm/tests/test_config.yaml b/litellm/tests/test_config.yaml
index 0e678d2d3b..a38dc76156 100644
--- a/litellm/tests/test_config.yaml
+++ b/litellm/tests/test_config.yaml
@@ -1,3 +1,20 @@
+model_list: 
+  - model_name: "azure-model"
+    litellm_params: 
+      model: "azure/gpt-35-turbo"
+      api_key: "os.environ/AZURE_EUROPE_API_KEY"
+      api_base: "https://my-endpoint-europe-berri-992.openai.azure.com/"
+  - model_name: "azure-model"
+    litellm_params:
+      model: "azure/gpt-35-turbo"
+      api_key: "os.environ/AZURE_CANADA_API_KEY"
+      api_base: "https://my-endpoint-canada-berri992.openai.azure.com"
+  - model_name: "azure-model"
+    litellm_params:
+      model: "azure/gpt-turbo"
+      api_key: "os.environ/AZURE-FRANCE-API-KEY"
+      api_base: "https://openai-france-1234.openai.azure.com"
+
 litellm_settings:
   drop_params: True
   set_verbose: True
diff --git a/litellm/tests/test_proxy_server_cost.py b/litellm/tests/test_proxy_server_cost.py
new file mode 100644
index 0000000000..7688e58995
--- /dev/null
+++ b/litellm/tests/test_proxy_server_cost.py
@@ -0,0 +1,27 @@
+# #### What this tests ####
+# #    This tests the cost tracking function works with consecutive calls (~10 consecutive calls)
+
+# import sys, os
+# import traceback
+# import pytest
+# sys.path.insert(
+#     0, os.path.abspath("../..")
+# )  # Adds the parent directory to the system path
+# import litellm
+
+# async def test_proxy_cost_tracking(): 
+#     """
+#     Get expected cost. 
+#     Create new key.
+#     Run 10 parallel calls. 
+#     Check cost for key at the end. 
+#     assert it's = expected cost. 
+#     """
+#     model = "gpt-3.5-turbo"
+#     messages = [{"role": "user", "content": "Hey, how's it going?"}]
+#     number_of_calls = 10
+#     expected_cost = litellm.completion_cost(model=model, messages=messages) * number_of_calls
+#     async def litellm_acompletion(): 
+
+
+
diff --git a/litellm/tests/test_stream_chunk_builder.py b/litellm/tests/test_stream_chunk_builder.py
index 807e74cfbe..23f67a2e8b 100644
--- a/litellm/tests/test_stream_chunk_builder.py
+++ b/litellm/tests/test_stream_chunk_builder.py
@@ -110,4 +110,26 @@ def test_stream_chunk_builder_litellm_tool_call():
     except Exception as e: 
        pytest.fail(f"An exception occurred - {str(e)}")
 
-test_stream_chunk_builder_litellm_tool_call()
+# test_stream_chunk_builder_litellm_tool_call()
+
+def test_stream_chunk_builder_litellm_tool_call_regular_message():
+    try: 
+      messages = [{"role": "user", "content": "Hey, how's it going?"}]
+      litellm.set_verbose = False
+      response = litellm.completion(
+          model="azure/gpt-4-nov-release",
+          messages=messages,
+          tools=tools_schema,
+          stream=True,
+          api_key="os.environ/AZURE_FRANCE_API_KEY",
+          api_base="https://openai-france-1234.openai.azure.com",
+          complete_response = True
+      )
+
+      print(f"complete response: {response}")
+      print(f"complete response usage: {response.usage}")
+      assert response.system_fingerprint is not None
+    except Exception as e: 
+       pytest.fail(f"An exception occurred - {str(e)}")
+
+test_stream_chunk_builder_litellm_tool_call_regular_message()
diff --git a/litellm/utils.py b/litellm/utils.py
index dce2592373..3756337b66 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -5266,14 +5266,32 @@ class CustomStreamWrapper:
             print_verbose(f"model_response: {model_response}; completion_obj: {completion_obj}")
             print_verbose(f"model_response finish reason 3: {model_response.choices[0].finish_reason}")
             if len(completion_obj["content"]) > 0: # cannot set content of an OpenAI Object to be an empty string
-                hold, model_response_str = self.check_special_tokens(chunk=completion_obj["content"], finish_reason=model_response.choices[0].finish_reason)
+                hold, model_response_str = self.check_special_tokens(chunk=completion_obj["content"], finish_reason=model_response.choices[0].finish_reason) # filter out bos/eos tokens from openai-compatible hf endpoints
                 print_verbose(f"hold - {hold}, model_response_str - {model_response_str}")
                 if hold is False: 
-                    completion_obj["content"] = model_response_str  
-                    if self.sent_first_chunk == False:
-                        completion_obj["role"] = "assistant"
-                        self.sent_first_chunk = True
-                    model_response.choices[0].delta = Delta(**completion_obj)
+                    ## check if openai/azure chunk 
+                    original_chunk = response_obj.get("original_chunk", None)
+                    if original_chunk: 
+                        model_response.id = original_chunk.id
+                        if len(original_chunk.choices) > 0:
+                            try:
+                                delta = dict(original_chunk.choices[0].delta)
+                                model_response.choices[0].delta = Delta(**delta)
+                            except Exception as e:
+                                model_response.choices[0].delta = Delta()
+                        else: 
+                            return 
+                        model_response.system_fingerprint = original_chunk.system_fingerprint
+                        if self.sent_first_chunk == False:
+                            model_response.choices[0].delta["role"] = "assistant"
+                            self.sent_first_chunk = True
+                    else: 
+                        ## else 
+                        completion_obj["content"] = model_response_str  
+                        if self.sent_first_chunk == False:
+                            completion_obj["role"] = "assistant"
+                            self.sent_first_chunk = True
+                        model_response.choices[0].delta = Delta(**completion_obj)
                     # LOGGING
                     threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start()
                     print_verbose(f"model_response: {model_response}")