diff --git a/litellm/proxy/api_log.json b/litellm/proxy/api_log.json
deleted file mode 100644
index 62020b025..000000000
--- a/litellm/proxy/api_log.json
+++ /dev/null
@@ -1,87 +0,0 @@
-{
-  "20231014160921359878": {
-    "pre_api_call": {
-      "model": "codellama/CodeLlama-7b-Instruct-hf",
-      "messages": [
-        {
-          "role": "user",
-          "content": "hey"
-        }
-      ],
-      "optional_params": {
-        "temperature": 0.5,
-        "stream": true,
-        "max_new_tokens": 1024,
-        "details": true,
-        "return_full_text": false
-      },
-      "litellm_params": {
-        "return_async": false,
-        "api_key": null,
-        "force_timeout": 600,
-        "logger_fn": null,
-        "verbose": false,
-        "custom_llm_provider": "huggingface",
-        "api_base": "https://app.baseten.co/models/pP8JeaB/predict",
-        "litellm_call_id": "d75891a0-d567-470a-a6cd-137e698da092",
-        "model_alias_map": {},
-        "completion_call_id": null,
-        "metadata": null,
-        "stream_response": {}
-      },
-      "input": "<s>[INST] hey [/INST]\n",
-      "api_key": "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",
-      "additional_args": {
-        "complete_input_dict": {
-          "inputs": "<s>[INST] hey [/INST]\n",
-          "parameters": {
-            "temperature": 0.5,
-            "stream": true,
-            "max_new_tokens": 1024,
-            "details": true,
-            "return_full_text": false
-          },
-          "stream": true
-        },
-        "task": "text-generation-inference",
-        "headers": {
-          "Authorization": "Api-Key SQqH1uZg.SSN79Bq997k4TRdzW9HBCghx9KyL0EJA"
-        }
-      },
-      "log_event_type": "pre_api_call"
-    },
-    "post_api_call": {
-      "model": "codellama/CodeLlama-7b-Instruct-hf",
-      "messages": [
-        {
-          "role": "user",
-          "content": "hey"
-        }
-      ],
-      "optional_params": {
-        "temperature": 0.5,
-        "stream": true,
-        "max_new_tokens": 1024,
-        "details": true,
-        "return_full_text": false
-      },
-      "litellm_params": {
-        "return_async": false,
-        "api_key": null,
-        "force_timeout": 600,
-        "logger_fn": null,
-        "verbose": false,
-        "custom_llm_provider": "huggingface",
-        "api_base": "https://app.baseten.co/models/pP8JeaB/predict",
-        "litellm_call_id": "d75891a0-d567-470a-a6cd-137e698da092",
-        "model_alias_map": {},
-        "completion_call_id": null,
-        "metadata": null,
-        "stream_response": {}
-      },
-      "input": null,
-      "api_key": null,
-      "additional_args": {},
-      "log_event_type": "post_api_call",
-      "original_response": "<class 'generator'>",
-      "end_time": 
\ No newline at end of file
diff --git a/litellm/proxy/cost.log b/litellm/proxy/cost.log
deleted file mode 100644
index e69de29bb..000000000
diff --git a/litellm/proxy/costs.json b/litellm/proxy/costs.json
deleted file mode 100644
index 8211cec22..000000000
--- a/litellm/proxy/costs.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-  "Oct-12-2023": {
-    "claude-2": {
-      "cost": 0.02365918,
-      "num_requests": 1
-    }
-  }
-}
\ No newline at end of file
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index b4965dc64..a6cf52a6c 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -120,6 +120,7 @@ config_dir = appdirs.user_config_dir("litellm")
 user_config_path = os.getenv(
     "LITELLM_CONFIG_PATH", os.path.join(config_dir, config_filename)
 )
+experimental = False
 #### GLOBAL VARIABLES ####
 llm_router: Optional[litellm.Router] = None
 llm_model_list: Optional[list] = None
@@ -354,7 +355,7 @@ def initialize(
     save,
     config
 ):
-    global user_model, user_api_base, user_debug, user_max_tokens, user_request_timeout, user_temperature, user_telemetry, user_headers, llm_model_list, llm_router, server_settings
+    global user_model, user_api_base, user_debug, user_max_tokens, user_request_timeout, user_temperature, user_telemetry, user_headers, experimental, llm_model_list, llm_router, server_settings
     generate_feedback_box()
     user_model = model
     user_debug = debug
@@ -393,6 +394,8 @@ def initialize(
         dynamic_config["general"]["max_budget"] = max_budget
     if debug:  # litellm-specific param
         litellm.set_verbose = True
+    if experimental: 
+        pass
     if save:
         save_params_to_config(dynamic_config)
         with open(user_config_path) as f:
@@ -537,6 +540,22 @@ async def chat_completion(request: Request, model: Optional[str] = None):
         return {"error": error_msg}
 
 
+@router.post("/router/chat/completions")
+async def router_completion(request: Request):
+    try: 
+        body = await request.body()
+        body_str = body.decode()
+        try:
+            data = ast.literal_eval(body_str)
+        except: 
+            data = json.loads(body_str)
+        return {"data": data}
+    except Exception as e: 
+        print(f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`")
+        error_traceback = traceback.format_exc()
+        error_msg = f"{str(e)}\n\n{error_traceback}"
+        return {"error": error_msg}
+
 @router.get("/ollama_logs")
 async def retrieve_server_log(request: Request):
     filepath = os.path.expanduser("~/.ollama/logs/server.log")
diff --git a/litellm/proxy/start.sh b/litellm/proxy/start.sh
index 586366108..44df50aaa 100755
--- a/litellm/proxy/start.sh
+++ b/litellm/proxy/start.sh
@@ -1,3 +1,2 @@
 #!/bin/bash
-python3 proxy_cli.py --config -f ../../secrets_template.toml
 python3 proxy_cli.py
\ No newline at end of file
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
new file mode 100644
index 000000000..348dd19f0
--- /dev/null
+++ b/litellm/proxy/utils.py
@@ -0,0 +1,98 @@
+# import threading, time, litellm
+# import concurrent.futures
+# """
+# v1: 
+
+# 1. `--experimental_async` starts 2 background threads:
+#     - 1. to check the redis queue:
+#         - if job available 
+#         - it dequeues as many jobs as healthy endpoints 
+#         - calls llm api -> saves response in redis cache
+#     - 2. to check the llm apis: 
+#         - check if endpoints are healthy (unhealthy = 4xx / 5xx call or >1min. queue)
+#         - which one is least busy 
+# 2. /router/chat/completions: receives request -> adds to redis queue -> returns {run_id, started_at, request_obj}
+# 3. /router/chat/completions/runs/{run_id}: returns {status: _, [optional] response_obj: _}
+# """
+
+# def _start_health_check_thread():
+#     """
+#     Starts a separate thread to perform health checks periodically.
+#     """
+#     health_check_thread = threading.Thread(target=_perform_health_checks, daemon=True)
+#     health_check_thread.start()
+#     llm_call_thread = threading.Thread(target=_llm_call_thread, daemon=True)
+#     llm_call_thread.start()
+
+
+# def _llm_call_thread():
+#     """
+#     Periodically performs job checks on the redis queue.
+#     If available, make llm api calls. 
+#     Write result to redis cache (1 min ttl)
+#     """
+#     with concurrent.futures.ThreadPoolExecutor() as executor:
+#         while True: 
+#             job_checks = _job_check() 
+#             future_to_job = {executor.submit(_llm_api_call, job): job for job in job_checks}
+#             for future in concurrent.futures.as_completed(future_to_job):
+#                 job = future_to_job[future]
+#                 try:
+#                     result = future.result()
+#                 except Exception as exc:
+#                     print(f'{job} generated an exception: {exc}')
+#                 else:
+#                     _write_to_cache(job, result, ttl=1*60)
+#             time.sleep(1)  # sleep 1 second to avoid overloading the server
+
+        
+
+# def _perform_health_checks():
+#     """
+#     Periodically performs health checks on the servers.
+#     Updates the list of healthy servers accordingly.
+#     """
+#     while True:
+#         healthy_deployments = _health_check()
+#         # Adjust the time interval based on your needs
+#         time.sleep(15)
+
+# def _job_check(): 
+#     """
+#     Periodically performs job checks on the redis queue.
+#     Returns the list of available jobs - len(available_jobs) == len(healthy_endpoints),
+#     e.g. don't dequeue a gpt-3.5-turbo job if there's no healthy deployments left 
+#     """
+#     pass
+
+# def _llm_api_call(**data):
+#     """
+#     Makes the litellm.completion() call with 3 retries 
+#     """ 
+#     return litellm.completion(num_retries=3, **data)
+
+# def _write_to_cache(): 
+#     """
+#     Writes the result to a redis cache in the form (key:job_id, value: <response_object>) 
+#     """ 
+#     pass
+
+# def _health_check():
+#     """
+#     Performs a health check on the deployments
+#     Returns the list of healthy deployments
+#     """
+#     healthy_deployments = []
+#     for deployment in model_list: 
+#         litellm_args = deployment["litellm_params"]
+#         try: 
+#             start_time = time.time()
+#             litellm.completion(messages=[{"role": "user", "content": ""}], max_tokens=1, **litellm_args) # hit the server with a blank message to see how long it takes to respond
+#             end_time = time.time() 
+#             response_time = end_time - start_time
+#             logging.debug(f"response_time: {response_time}")
+#             healthy_deployments.append((deployment, response_time))
+#             healthy_deployments.sort(key=lambda x: x[1])
+#         except Exception as e: 
+#             pass
+#     return healthy_deployments