diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index e2293109d..d8a3221f4 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 93adb354b..7e2174fb4 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 0ff9280c4..f71218ef0 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -245,9 +245,8 @@ def deploy_proxy(model, api_base, debug, temperature, max_tokens, telemetry, dep
 
 # for streaming
 def data_generator(response):
-    print("inside generator")
+    print_verbose("inside generator")
     for chunk in response:
-        print(f"chunk: {chunk}")
         print_verbose(f"returned chunk: {chunk}")
         yield f"data: {json.dumps(chunk)}\n\n"
 
@@ -302,26 +301,6 @@ def litellm_completion(data, type):
             data["max_tokens"] = user_max_tokens
         if user_api_base: 
             data["api_base"] = user_api_base
-        ## CUSTOM PROMPT TEMPLATE ##  - run `litellm --config` to set this
-        litellm.register_prompt_template(
-            model=user_model, 
-            roles={
-                "system": {
-                    "pre_message": os.getenv("MODEL_SYSTEM_MESSAGE_START_TOKEN", ""),
-                    "post_message": os.getenv("MODEL_SYSTEM_MESSAGE_END_TOKEN", ""),    
-                }, 
-                "assistant": {
-                    "pre_message": os.getenv("MODEL_ASSISTANT_MESSAGE_START_TOKEN", ""), 
-                    "post_message": os.getenv("MODEL_ASSISTANT_MESSAGE_END_TOKEN", "")
-                }, 
-                "user": {
-                    "pre_message": os.getenv("MODEL_USER_MESSAGE_START_TOKEN", ""), 
-                    "post_message": os.getenv("MODEL_USER_MESSAGE_END_TOKEN", "")
-                }
-            },
-            initial_prompt_value=os.getenv("MODEL_PRE_PROMPT", ""), 
-            final_prompt_value=os.getenv("MODEL_POST_PROMPT", "")
-        )
         if type == "completion": 
             response = litellm.text_completion(**data)
         elif type == "chat_completion": 
diff --git a/litellm/tests/test_prompt_factory.py b/litellm/tests/test_prompt_factory.py
new file mode 100644
index 000000000..11ebbb424
--- /dev/null
+++ b/litellm/tests/test_prompt_factory.py
@@ -0,0 +1,23 @@
+#### What this tests ####
+#    This tests if prompts are being correctly formatted
+import sys
+import os
+import io
+
+sys.path.insert(0, os.path.abspath('../..'))
+
+# from litellm.llms.prompt_templates.factory import prompt_factory
+from litellm import completion
+
+def codellama_prompt_format():
+    model = "huggingface/codellama/CodeLlama-7b-Instruct-hf"
+    messages = [{"role": "system", "content": "You are a good bot"}, {"role": "user", "content": "Hey, how's it going?"}]
+    expected_response = """[INST] <<SYS>>
+You are a good bot
+<</SYS>>
+ [/INST]
+[INST] Hey, how's it going? [/INST]"""
+    response = completion(model=model, messages=messages)
+    print(response)
+
+# codellama_prompt_format()
\ No newline at end of file