diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index b78bc9b00..9a3c21972 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index e95fb8744..f71c34263 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/main.py b/litellm/main.py
index 06d938ac8..7d39afab4 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -132,6 +132,7 @@ def completion(
     # model specific optional params
     top_k=40,# used by text-bison only
     task: Optional[str]="text-generation-inference", # used by huggingface inference endpoints
+    return_full_text: bool = False, # used by huggingface TGI
     remove_input: bool = True, # used by nlp cloud models - prevents input text from being returned as part of output
     request_timeout=0,  # unused var for old version of OpenAI API
     fallbacks=[],
@@ -181,7 +182,8 @@ def completion(
             custom_llm_provider=custom_llm_provider,
             top_k=top_k,
             task=task,
-            remove_input=remove_input
+            remove_input=remove_input,
+            return_full_text=return_full_text
         )
         # For logging - save the values of the litellm-specific params passed in
         litellm_params = get_litellm_params(
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index e46f6dbdf..26522a354 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -92,25 +92,6 @@ def test_completion_with_litellm_call_id():
         pytest.fail(f"Error occurred: {e}")
 
 
-def test_completion_claude_stream():
-    try:
-        messages = [
-            {"role": "system", "content": "You are a helpful assistant."},
-            {
-                "role": "user",
-                "content": "how does a court case get to the Supreme Court?",
-            },
-        ]
-        response = completion(model="claude-2", messages=messages, stream=True)
-        # Add any assertions here to check the response
-        for chunk in response:
-            print(chunk["choices"][0]["delta"])  # same as openai format
-            print(chunk["choices"][0]["finish_reason"])
-            print(chunk["choices"][0]["delta"]["content"])
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-# test_completion_claude_stream()
-
 def test_completion_nlp_cloud():
     try:
         messages = [
@@ -125,26 +106,6 @@ def test_completion_nlp_cloud():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
-def test_completion_nlp_cloud_streaming():
-    try:
-        messages = [
-            {"role": "system", "content": "You are a helpful assistant."},
-            {
-                "role": "user",
-                "content": "how does a court case get to the Supreme Court?",
-            },
-        ]
-        response = completion(model="dolphin", messages=messages, stream=True, logger_fn=logger_fn)
-        # Add any assertions here to check the response
-        for chunk in response:
-            print(chunk["choices"][0]["delta"]["content"])  # same as openai format
-            print(chunk["choices"][0]["finish_reason"])
-            print(chunk["choices"][0]["delta"]["content"])
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-# test_completion_nlp_cloud_streaming()
-
-# test_completion_nlp_cloud_streaming()
 # def test_completion_hf_api():
 #     try:
 #         user_message = "write some code to find the sum of two numbers"
@@ -327,69 +288,6 @@ def test_completion_openai_with_more_optional_params():
         pytest.fail(f"Error occurred: {e}")
 
 
-def test_completion_openai_with_stream():
-    try:
-        response = completion(
-            model="gpt-3.5-turbo",
-            messages=messages,
-            temperature=0.5,
-            top_p=0.1,
-            n=2,
-            max_tokens=150,
-            presence_penalty=0.5,
-            stream=True,
-            frequency_penalty=-0.5,
-            logit_bias={27000: 5},
-            user="ishaan_dev@berri.ai",
-        )
-        # Add any assertions here to check the response
-        print(response)
-        for chunk in response:
-            print(chunk)
-            if chunk["choices"][0]["finish_reason"] == "stop" or chunk["choices"][0]["finish_reason"] == "length":
-                break
-            print(chunk["choices"][0]["finish_reason"])
-            print(chunk["choices"][0]["delta"]["content"])
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-# test_completion_openai_with_stream()
-
-def test_completion_openai_with_functions():
-    function1 = [
-        {
-            "name": "get_current_weather",
-            "description": "Get the current weather in a given location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "The city and state, e.g. San Francisco, CA",
-                    },
-                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
-                },
-                "required": ["location"],
-            },
-        }
-    ]
-    try:
-        response = completion(
-            model="gpt-3.5-turbo", messages=messages, functions=function1, stream=True
-        )
-        # Add any assertions here to check the response
-        print(response)
-        for chunk in response:
-            print(chunk)
-            if chunk["choices"][0]["finish_reason"] == "stop":
-                break
-            print(chunk["choices"][0]["finish_reason"])
-            print(chunk["choices"][0]["delta"]["content"])
-    
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-# test_completion_openai_with_functions()
-
-
 # def test_completion_openai_azure_with_functions():
 #     function1 = [
 #         {
@@ -544,20 +442,6 @@ def test_completion_replicate_vicuna():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
-# test_completion_replicate_vicuna()
-
-def test_completion_replicate_llama_stream():
-    model_name = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
-    try:
-        response = completion(model=model_name, messages=messages, stream=True)
-        # Add any assertions here to check the response
-        for chunk in response:
-            print(chunk)
-            print(chunk["choices"][0]["delta"]["content"])
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-# test_completion_replicate_llama_stream()
-
 # def test_completion_replicate_stability_stream():
 #     model_name = "stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb"
 #     try:
@@ -653,26 +537,7 @@ def test_completion_bedrock_ai21():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
-def test_completion_bedrock_ai21_stream():
-    try:
-        litellm.set_verbose = False
-        response = completion(
-            model="bedrock/amazon.titan-tg1-large", 
-            messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
-            temperature=1,
-            max_tokens=4096,
-            stream=True,
-        )
-        # Add any assertions here to check the response 
-        print(response)
-        for chunk in response:
-            print(chunk)
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-# test_completion_bedrock_ai21_stream()
 
-
-# test_completion_sagemaker()
 ######## Test VLLM ########
 # def test_completion_vllm():
 #     try:
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index f51efb55c..b7a82356c 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -213,7 +213,31 @@ def test_completion_cohere_stream():
         print(f"completion_response: {complete_response}")
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
-        
+
+
+def test_completion_bedrock_ai21_stream():
+    try:
+        litellm.set_verbose = False
+        response = completion(
+            model="bedrock/amazon.titan-tg1-large", 
+            messages=[{"role": "user", "content": "Be as verbose as possible and give as many details as possible, how does a court case get to the Supreme Court?"}],
+            temperature=1,
+            max_tokens=4096,
+            stream=True,
+        )
+        # Add any assertions here to check the response 
+        print(response)
+        for idx, chunk in enumerate(response):
+            chunk, finished = streaming_format_tests(idx, chunk)
+            if finished:
+                break
+            complete_response += chunk
+        if complete_response.strip() == "": 
+            raise Exception("Empty response received")
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
 # test_completion_cohere_stream()
 
 # test on openai completion call
@@ -301,34 +325,66 @@ def test_together_ai_completion_call_starcoder():
     except:
         print(f"error occurred: {traceback.format_exc()}")
         pass
-# test_together_ai_completion_call_starcoder()
-# test on aleph alpha completion call - commented out as it's expensive to run this on circle ci for every build
-# def test_aleph_alpha_call():
-#     try:
-#         start_time = time.time()
-#         response = completion(
-#             model="luminous-base",
-#             messages=messages,
-#             logger_fn=logger_fn,
-#             stream=True,
-#         )
-#         complete_response = ""
-#         print(f"returned response object: {response}")
-#         for chunk in response:
-#             chunk_time = time.time()
-#             complete_response += (
-#                 chunk["choices"][0]["delta"]["content"]
-#                 if len(chunk["choices"][0]["delta"].keys()) > 0
-#                 else ""
-#             )
-#             if len(complete_response) > 0:
-#                 print(complete_response)
-#         if complete_response == "":
-#             raise Exception("Empty response received")
-#     except:
-#         print(f"error occurred: {traceback.format_exc()}")
-#         pass
-#### Test Async streaming 
+
+def test_completion_nlp_cloud_streaming():
+    try:
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {
+                "role": "user",
+                "content": "how does a court case get to the Supreme Court?",
+            },
+        ]
+        response = completion(model="dolphin", messages=messages, stream=True, logger_fn=logger_fn)
+        # Add any assertions here to check the response
+        for idx, chunk in enumerate(response):
+            chunk, finished = streaming_format_tests(idx, chunk)
+            if finished:
+                break
+            complete_response += chunk
+        if complete_response == "":
+            raise Exception("Empty response received")
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+#### Test Function calling + streaming ####
+
+def test_completion_openai_with_functions():
+    function1 = [
+        {
+            "name": "get_current_weather",
+            "description": "Get the current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city and state, e.g. San Francisco, CA",
+                    },
+                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                },
+                "required": ["location"],
+            },
+        }
+    ]
+    try:
+        response = completion(
+            model="gpt-3.5-turbo", messages=messages, functions=function1, stream=True
+        )
+        # Add any assertions here to check the response
+        print(response)
+        for chunk in response:
+            print(chunk)
+            if chunk["choices"][0]["finish_reason"] == "stop":
+                break
+            print(chunk["choices"][0]["finish_reason"])
+            print(chunk["choices"][0]["delta"]["content"])
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+test_completion_openai_with_functions()
+
+#### Test Async streaming ####
 
 # # test on ai21 completion call
 async def ai21_async_completion_call():
diff --git a/litellm/utils.py b/litellm/utils.py
index c9a9a33f1..5865557da 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -828,6 +828,7 @@ def get_optional_params(  # use the openai defaults
     model=None,
     custom_llm_provider="",
     top_k=40,
+    return_full_text=False,
     task=None
 ):
     optional_params = {}
@@ -885,6 +886,7 @@ def get_optional_params(  # use the openai defaults
             optional_params["max_new_tokens"] = max_tokens
         if presence_penalty != 0:
             optional_params["repetition_penalty"] = presence_penalty
+        optional_params["return_full_text"] = return_full_text
         optional_params["details"] = True
         optional_params["task"] = task
     elif custom_llm_provider == "together_ai" or ("togethercomputer" in model):
@@ -2507,7 +2509,6 @@ class CustomStreamWrapper:
         model_response = ModelResponse(stream=True, model=self.model)
         try:
             # return this for all models
-            print_verbose(f"self.sent_first_chunk: {self.sent_first_chunk}")
             if self.sent_first_chunk == False:
                 model_response.choices[0].delta.role = "assistant"
                 self.sent_first_chunk = True
diff --git a/pyproject.toml b/pyproject.toml
index 32d4bec96..ffd51a9df 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.677"
+version = "0.1.678"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"