diff --git a/litellm/deprecated_litellm_server/.env.template b/litellm/deprecated_litellm_server/.env.template
index a87ae9cf39..a1c32a4549 100644
--- a/litellm/deprecated_litellm_server/.env.template
+++ b/litellm/deprecated_litellm_server/.env.template
@@ -1,43 +1,43 @@
-# set AUTH STRATEGY FOR LLM APIs - Defaults to using Environment Variables
-AUTH_STRATEGY = "ENV" # ENV or DYNAMIC, ENV always reads from environment variables, DYNAMIC reads request headers to set LLM api keys
+# # set AUTH STRATEGY FOR LLM APIs - Defaults to using Environment Variables
+# AUTH_STRATEGY = "ENV" # ENV or DYNAMIC, ENV always reads from environment variables, DYNAMIC reads request headers to set LLM api keys
 
-OPENAI_API_KEY = ""
+# OPENAI_API_KEY = ""
 
-HUGGINGFACE_API_KEY="" 
+# HUGGINGFACE_API_KEY="" 
 
-TOGETHERAI_API_KEY=""
+# TOGETHERAI_API_KEY=""
 
-REPLICATE_API_KEY="" 
+# REPLICATE_API_KEY="" 
 
-## bedrock / sagemaker
-AWS_ACCESS_KEY_ID = "" 
-AWS_SECRET_ACCESS_KEY = ""
+# ## bedrock / sagemaker
+# AWS_ACCESS_KEY_ID = "" 
+# AWS_SECRET_ACCESS_KEY = ""
 
-AZURE_API_KEY = ""
-AZURE_API_BASE = ""
-AZURE_API_VERSION = ""
+# AZURE_API_KEY = ""
+# AZURE_API_BASE = ""
+# AZURE_API_VERSION = ""
 
-ANTHROPIC_API_KEY = ""
+# ANTHROPIC_API_KEY = ""
 
-COHERE_API_KEY = ""
+# COHERE_API_KEY = ""
 
-## CONFIG FILE ## 
-# CONFIG_FILE_PATH = ""  # uncomment to point to config file  
+# ## CONFIG FILE ## 
+# # CONFIG_FILE_PATH = ""  # uncomment to point to config file  
 
-## LOGGING ## 
+# ## LOGGING ## 
 
-SET_VERBOSE = "False" # set to 'True' to see detailed input/output logs
+# SET_VERBOSE = "False" # set to 'True' to see detailed input/output logs
 
-### LANGFUSE
-LANGFUSE_PUBLIC_KEY = ""
-LANGFUSE_SECRET_KEY = ""
-# Optional, defaults to https://cloud.langfuse.com
-LANGFUSE_HOST = "" # optional
+# ### LANGFUSE
+# LANGFUSE_PUBLIC_KEY = ""
+# LANGFUSE_SECRET_KEY = ""
+# # Optional, defaults to https://cloud.langfuse.com
+# LANGFUSE_HOST = "" # optional
 
 
-## CACHING ## 
+# ## CACHING ## 
 
-### REDIS
-REDIS_HOST = "" 
-REDIS_PORT = "" 
-REDIS_PASSWORD = "" 
+# ### REDIS
+# REDIS_HOST = "" 
+# REDIS_PORT = "" 
+# REDIS_PASSWORD = "" 
diff --git a/litellm/deprecated_litellm_server/Dockerfile b/litellm/deprecated_litellm_server/Dockerfile
index 7be7ba4c91..9b3b314c4b 100644
--- a/litellm/deprecated_litellm_server/Dockerfile
+++ b/litellm/deprecated_litellm_server/Dockerfile
@@ -1,10 +1,10 @@
-FROM python:3.10
+# FROM python:3.10
 
-ENV LITELLM_CONFIG_PATH="/litellm.secrets.toml"
-COPY . /app
-WORKDIR /app
-RUN pip install -r requirements.txt
+# ENV LITELLM_CONFIG_PATH="/litellm.secrets.toml"
+# COPY . /app
+# WORKDIR /app
+# RUN pip install -r requirements.txt
 
-EXPOSE $PORT 
+# EXPOSE $PORT 
 
-CMD exec uvicorn main:app --host 0.0.0.0 --port $PORT --workers 10 
\ No newline at end of file
+# CMD exec uvicorn main:app --host 0.0.0.0 --port $PORT --workers 10 
\ No newline at end of file
diff --git a/litellm/deprecated_litellm_server/README.md b/litellm/deprecated_litellm_server/README.md
index 4c2442aa7c..142bad1850 100644
--- a/litellm/deprecated_litellm_server/README.md
+++ b/litellm/deprecated_litellm_server/README.md
@@ -1,124 +1,3 @@
 # litellm-server [experimental]
 
-Load balancer for multiple API Deployments (eg. Azure/OpenAI)
-
-<img width="1036" alt="Screenshot 2023-11-06 at 6 54 16 PM" src="https://github.com/BerriAI/litellm/assets/17561003/d32da338-1d72-45bb-bca8-ac70f1d3e980">
-
-LiteLLM Server supports: 
-- LLM API Calls in the OpenAI ChatCompletions format 
-- Caching + Logging capabilities (Redis and Langfuse, respectively)
-- Setting API keys in the request headers or in the .env 
-
-## Usage 
-
-```shell
-docker run -e PORT=8000 -e OPENAI_API_KEY=<your-openai-key> -p 8000:8000 ghcr.io/berriai/litellm:latest
-```
-OpenAI Proxy running on http://0.0.0.0:8000
-
-```shell
-curl http://0.0.0.0:8000/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-     "model": "gpt-3.5-turbo",
-     "messages": [{"role": "user", "content": "Say this is a test!"}],
-     "temperature": 0.7
-   }'
-```
-
-[**See how to call Huggingface,Bedrock,TogetherAI,Anthropic, etc.**](https://docs.litellm.ai/docs/providers)
-## Endpoints:
-- `/chat/completions` - chat completions endpoint to call 100+ LLMs
-- `/models` - available models on server
-
-## Save Model-specific params (API Base, API Keys, Temperature, etc.)
-Use the [router_config_template.yaml](https://github.com/BerriAI/litellm/blob/main/router_config_template.yaml) to save model-specific information like api_base, api_key, temperature, max_tokens, etc. 
-
-1. Create a `config.yaml` file
-```shell
-model_list:
-  - model_name: gpt-3.5-turbo # set model alias 
-    litellm_params: # params for litellm.completion() - https://docs.litellm.ai/docs/completion/input#input---request-body
-      model: azure/chatgpt-v-2 # azure/<your-deployment-name> <- actual name used for litellm.completion()
-      api_key: your_azure_api_key
-      api_version: your_azure_api_version
-      api_base: your_azure_api_base
-  - model_name: mistral-7b
-    litellm_params:
-      model: ollama/mistral
-      api_base: your_ollama_api_base
-```
-
-2. Start the server
-
-```shell
-docker run -e PORT=8000 -p 8000:8000 -v $(pwd)/config.yaml:/app/config.yaml ghcr.io/berriai/litellm:latest
-```
-## Caching 
-
-Add Redis Caching to your server via environment variables  
-
-```env
-### REDIS
-REDIS_HOST = "" 
-REDIS_PORT = "" 
-REDIS_PASSWORD = "" 
-```
-
-Docker command: 
-
-```shell
-docker run -e REDIST_HOST=<your-redis-host> -e REDIS_PORT=<your-redis-port> -e REDIS_PASSWORD=<your-redis-password> -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest
-```
-
-## Logging 
-
-1. Debug Logs
-Print the input/output params by setting `SET_VERBOSE = "True"`.
-
-Docker command:
-
-```shell
-docker run -e SET_VERBOSE="True" -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest
-```
-
-Add Langfuse Logging to your server via environment variables  
-
-```env
-### LANGFUSE
-LANGFUSE_PUBLIC_KEY = ""
-LANGFUSE_SECRET_KEY = ""
-# Optional, defaults to https://cloud.langfuse.com
-LANGFUSE_HOST = "" # optional
-```
-
-Docker command: 
-
-```shell
-docker run -e LANGFUSE_PUBLIC_KEY=<your-public-key> -e LANGFUSE_SECRET_KEY=<your-secret-key> -e LANGFUSE_HOST=<your-langfuse-host> -e PORT=8000 -p 8000:8000 ghcr.io/berriai/litellm:latest
-```
-
-## Running Locally
-```shell 
-$ git clone https://github.com/BerriAI/litellm.git
-```
-```shell
-$ cd ./litellm/litellm_server
-```
-
-```shell
-$ uvicorn main:app --host 0.0.0.0 --port 8000
-```
-### Custom Config 
-1. Create + Modify [router_config.yaml](https://github.com/BerriAI/litellm/blob/main/router_config_template.yaml) (save your azure/openai/etc. deployment info)
-```shell
-cp ./router_config_template.yaml ./router_config.yaml
-```
-2. Build Docker Image
-```shell
-docker build -t litellm_server . --build-arg CONFIG_FILE=./router_config.yaml 
-```
-3. Run Docker Image
-```shell
-docker run --name litellm_server -e PORT=8000 -p 8000:8000 litellm_server
-```
+Deprecated. See litellm/proxy
\ No newline at end of file
diff --git a/litellm/deprecated_litellm_server/__init__.py b/litellm/deprecated_litellm_server/__init__.py
index 8335f23396..019bc5a117 100644
--- a/litellm/deprecated_litellm_server/__init__.py
+++ b/litellm/deprecated_litellm_server/__init__.py
@@ -1,2 +1,2 @@
-from .main import *
-from .server_utils import *
\ No newline at end of file
+# from .main import *
+# from .server_utils import *
\ No newline at end of file
diff --git a/litellm/deprecated_litellm_server/main.py b/litellm/deprecated_litellm_server/main.py
index 3bfc40f91e..11f011db3c 100644
--- a/litellm/deprecated_litellm_server/main.py
+++ b/litellm/deprecated_litellm_server/main.py
@@ -1,193 +1,193 @@
-import os, traceback
-from fastapi import FastAPI, Request, HTTPException
-from fastapi.routing import APIRouter
-from fastapi.responses import StreamingResponse, FileResponse
-from fastapi.middleware.cors import CORSMiddleware
-import json, sys
-from typing import Optional
-sys.path.insert(
-    0, os.path.abspath("../")
-)  # Adds the parent directory to the system path - for litellm local dev
-import litellm
+# import os, traceback
+# from fastapi import FastAPI, Request, HTTPException
+# from fastapi.routing import APIRouter
+# from fastapi.responses import StreamingResponse, FileResponse
+# from fastapi.middleware.cors import CORSMiddleware
+# import json, sys
+# from typing import Optional
+# sys.path.insert(
+#     0, os.path.abspath("../")
+# )  # Adds the parent directory to the system path - for litellm local dev
+# import litellm
 
-try:
-    from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
-except ImportError:
-    from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
-import dotenv
-dotenv.load_dotenv() # load env variables
+# try:
+#     from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
+# except ImportError:
+#     from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
+# import dotenv
+# dotenv.load_dotenv() # load env variables
 
-app = FastAPI(docs_url="/", title="LiteLLM API")
-router = APIRouter()
-origins = ["*"]
+# app = FastAPI(docs_url="/", title="LiteLLM API")
+# router = APIRouter()
+# origins = ["*"]
 
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=origins,
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-#### GLOBAL VARIABLES ####
-llm_router: Optional[litellm.Router] = None
-llm_model_list: Optional[list] = None
-server_settings: Optional[dict] = None
+# app.add_middleware(
+#     CORSMiddleware,
+#     allow_origins=origins,
+#     allow_credentials=True,
+#     allow_methods=["*"],
+#     allow_headers=["*"],
+# )
+# #### GLOBAL VARIABLES ####
+# llm_router: Optional[litellm.Router] = None
+# llm_model_list: Optional[list] = None
+# server_settings: Optional[dict] = None
 
-set_callbacks() # sets litellm callbacks for logging if they exist in the environment 
+# set_callbacks() # sets litellm callbacks for logging if they exist in the environment 
 
-if "CONFIG_FILE_PATH" in os.environ:
-    llm_router, llm_model_list, server_settings = load_router_config(router=llm_router, config_file_path=os.getenv("CONFIG_FILE_PATH"))
-else:
-    llm_router, llm_model_list, server_settings = load_router_config(router=llm_router)
-#### API ENDPOINTS ####
-@router.get("/v1/models")
-@router.get("/models")  # if project requires model list
-def model_list():
-    all_models = litellm.utils.get_valid_models()
-    if llm_model_list: 
-        all_models += llm_model_list
-    return dict(
-        data=[
-            {
-                "id": model,
-                "object": "model",
-                "created": 1677610602,
-                "owned_by": "openai",
-            }
-            for model in all_models
-        ],
-        object="list",
-    )
-# for streaming
-def data_generator(response):
+# if "CONFIG_FILE_PATH" in os.environ:
+#     llm_router, llm_model_list, server_settings = load_router_config(router=llm_router, config_file_path=os.getenv("CONFIG_FILE_PATH"))
+# else:
+#     llm_router, llm_model_list, server_settings = load_router_config(router=llm_router)
+# #### API ENDPOINTS ####
+# @router.get("/v1/models")
+# @router.get("/models")  # if project requires model list
+# def model_list():
+#     all_models = litellm.utils.get_valid_models()
+#     if llm_model_list: 
+#         all_models += llm_model_list
+#     return dict(
+#         data=[
+#             {
+#                 "id": model,
+#                 "object": "model",
+#                 "created": 1677610602,
+#                 "owned_by": "openai",
+#             }
+#             for model in all_models
+#         ],
+#         object="list",
+#     )
+# # for streaming
+# def data_generator(response):
 
-    for chunk in response:
+#     for chunk in response:
 
-        yield f"data: {json.dumps(chunk)}\n\n"
+#         yield f"data: {json.dumps(chunk)}\n\n"
 
-@router.post("/v1/completions")
-@router.post("/completions")
-async def completion(request: Request):
-    data = await request.json()
-    response = litellm.completion(
-        **data
-    )
-    if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
-            return StreamingResponse(data_generator(response), media_type='text/event-stream')
-    return response
+# @router.post("/v1/completions")
+# @router.post("/completions")
+# async def completion(request: Request):
+#     data = await request.json()
+#     response = litellm.completion(
+#         **data
+#     )
+#     if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+#             return StreamingResponse(data_generator(response), media_type='text/event-stream')
+#     return response
 
-@router.post("/v1/embeddings")
-@router.post("/embeddings")
-async def embedding(request: Request):
-    try: 
-        data = await request.json() 
-        # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
-        if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header
-            api_key = request.headers.get("authorization")
-            api_key = api_key.replace("Bearer", "").strip() # type: ignore
-            if len(api_key.strip()) > 0:
-                api_key = api_key
-                data["api_key"] = api_key
-        response = litellm.embedding(
-            **data
-        )
-        return response
-    except Exception as e:
-        error_traceback = traceback.format_exc()
-        error_msg = f"{str(e)}\n\n{error_traceback}"
-        return {"error": error_msg}
+# @router.post("/v1/embeddings")
+# @router.post("/embeddings")
+# async def embedding(request: Request):
+#     try: 
+#         data = await request.json() 
+#         # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
+#         if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header
+#             api_key = request.headers.get("authorization")
+#             api_key = api_key.replace("Bearer", "").strip() # type: ignore
+#             if len(api_key.strip()) > 0:
+#                 api_key = api_key
+#                 data["api_key"] = api_key
+#         response = litellm.embedding(
+#             **data
+#         )
+#         return response
+#     except Exception as e:
+#         error_traceback = traceback.format_exc()
+#         error_msg = f"{str(e)}\n\n{error_traceback}"
+#         return {"error": error_msg}
 
-@router.post("/v1/chat/completions")
-@router.post("/chat/completions")
-@router.post("/openai/deployments/{model:path}/chat/completions") # azure compatible endpoint
-async def chat_completion(request: Request, model: Optional[str] = None):
-    global llm_model_list, server_settings
-    try:
-        data = await request.json()
-        server_model = server_settings.get("completion_model", None) if server_settings else None
-        data["model"] = server_model or model or data["model"]
-        ## CHECK KEYS ## 
-        # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
-        # env_validation = litellm.validate_environment(model=data["model"])
-        # if (env_validation['keys_in_environment'] is False or os.getenv("AUTH_STRATEGY", None) == "DYNAMIC") and ("authorization" in request.headers or "api-key" in request.headers): # if users pass LLM api keys as part of header
-        #     if "authorization" in request.headers:
-        #         api_key = request.headers.get("authorization")
-        #     elif "api-key" in request.headers: 
-        #         api_key = request.headers.get("api-key")
-        #     print(f"api_key in headers: {api_key}")
-        #     if " " in api_key:
-        #         api_key = api_key.split(" ")[1]
-        #     print(f"api_key split: {api_key}")
-        #     if len(api_key) > 0:
-        #         api_key = api_key
-        #         data["api_key"] = api_key
-        #         print(f"api_key in data: {api_key}")
-        ## CHECK CONFIG ## 
-        if llm_model_list and data["model"] in [m["model_name"] for m in llm_model_list]:
-            for m in llm_model_list: 
-                if data["model"] == m["model_name"]: 
-                    for key, value in m["litellm_params"].items(): 
-                        data[key] = value
-                    break
-        response = litellm.completion(
-            **data
-        )
-        if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
-                return StreamingResponse(data_generator(response), media_type='text/event-stream')
-        return response
-    except Exception as e:
-        error_traceback = traceback.format_exc()
+# @router.post("/v1/chat/completions")
+# @router.post("/chat/completions")
+# @router.post("/openai/deployments/{model:path}/chat/completions") # azure compatible endpoint
+# async def chat_completion(request: Request, model: Optional[str] = None):
+#     global llm_model_list, server_settings
+#     try:
+#         data = await request.json()
+#         server_model = server_settings.get("completion_model", None) if server_settings else None
+#         data["model"] = server_model or model or data["model"]
+#         ## CHECK KEYS ## 
+#         # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
+#         # env_validation = litellm.validate_environment(model=data["model"])
+#         # if (env_validation['keys_in_environment'] is False or os.getenv("AUTH_STRATEGY", None) == "DYNAMIC") and ("authorization" in request.headers or "api-key" in request.headers): # if users pass LLM api keys as part of header
+#         #     if "authorization" in request.headers:
+#         #         api_key = request.headers.get("authorization")
+#         #     elif "api-key" in request.headers: 
+#         #         api_key = request.headers.get("api-key")
+#         #     print(f"api_key in headers: {api_key}")
+#         #     if " " in api_key:
+#         #         api_key = api_key.split(" ")[1]
+#         #     print(f"api_key split: {api_key}")
+#         #     if len(api_key) > 0:
+#         #         api_key = api_key
+#         #         data["api_key"] = api_key
+#         #         print(f"api_key in data: {api_key}")
+#         ## CHECK CONFIG ## 
+#         if llm_model_list and data["model"] in [m["model_name"] for m in llm_model_list]:
+#             for m in llm_model_list: 
+#                 if data["model"] == m["model_name"]: 
+#                     for key, value in m["litellm_params"].items(): 
+#                         data[key] = value
+#                     break
+#         response = litellm.completion(
+#             **data
+#         )
+#         if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+#                 return StreamingResponse(data_generator(response), media_type='text/event-stream')
+#         return response
+#     except Exception as e:
+#         error_traceback = traceback.format_exc()
 
-        error_msg = f"{str(e)}\n\n{error_traceback}"
-        # return {"error": error_msg}
-        raise HTTPException(status_code=500, detail=error_msg)
+#         error_msg = f"{str(e)}\n\n{error_traceback}"
+#         # return {"error": error_msg}
+#         raise HTTPException(status_code=500, detail=error_msg)
 
-@router.post("/router/completions")
-async def router_completion(request: Request):
-    global llm_router
-    try: 
-        data = await request.json()
-        if "model_list" in data: 
-            llm_router = litellm.Router(model_list=data.pop("model_list"))
-        if llm_router is None: 
-            raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
+# @router.post("/router/completions")
+# async def router_completion(request: Request):
+#     global llm_router
+#     try: 
+#         data = await request.json()
+#         if "model_list" in data: 
+#             llm_router = litellm.Router(model_list=data.pop("model_list"))
+#         if llm_router is None: 
+#             raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
         
-        # openai.ChatCompletion.create replacement
-        response = await llm_router.acompletion(model="gpt-3.5-turbo", 
-                        messages=[{"role": "user", "content": "Hey, how's it going?"}])
+#         # openai.ChatCompletion.create replacement
+#         response = await llm_router.acompletion(model="gpt-3.5-turbo", 
+#                         messages=[{"role": "user", "content": "Hey, how's it going?"}])
 
-        if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
-                return StreamingResponse(data_generator(response), media_type='text/event-stream')
-        return response
-    except Exception as e: 
-        error_traceback = traceback.format_exc()
-        error_msg = f"{str(e)}\n\n{error_traceback}"
-        return {"error": error_msg}
+#         if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+#                 return StreamingResponse(data_generator(response), media_type='text/event-stream')
+#         return response
+#     except Exception as e: 
+#         error_traceback = traceback.format_exc()
+#         error_msg = f"{str(e)}\n\n{error_traceback}"
+#         return {"error": error_msg}
 
-@router.post("/router/embedding")
-async def router_embedding(request: Request):
-    global llm_router
-    try: 
-        data = await request.json()
-        if "model_list" in data: 
-            llm_router = litellm.Router(model_list=data.pop("model_list"))
-        if llm_router is None: 
-            raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
+# @router.post("/router/embedding")
+# async def router_embedding(request: Request):
+#     global llm_router
+#     try: 
+#         data = await request.json()
+#         if "model_list" in data: 
+#             llm_router = litellm.Router(model_list=data.pop("model_list"))
+#         if llm_router is None: 
+#             raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
 
-        response = await llm_router.aembedding(model="gpt-3.5-turbo",  # type: ignore
-                        messages=[{"role": "user", "content": "Hey, how's it going?"}])
+#         response = await llm_router.aembedding(model="gpt-3.5-turbo",  # type: ignore
+#                         messages=[{"role": "user", "content": "Hey, how's it going?"}])
 
-        if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
-                return StreamingResponse(data_generator(response), media_type='text/event-stream')
-        return response
-    except Exception as e: 
-        error_traceback = traceback.format_exc()
-        error_msg = f"{str(e)}\n\n{error_traceback}"
-        return {"error": error_msg}
+#         if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+#                 return StreamingResponse(data_generator(response), media_type='text/event-stream')
+#         return response
+#     except Exception as e: 
+#         error_traceback = traceback.format_exc()
+#         error_msg = f"{str(e)}\n\n{error_traceback}"
+#         return {"error": error_msg}
 
-@router.get("/")
-async def home(request: Request):
-    return "LiteLLM: RUNNING"
+# @router.get("/")
+# async def home(request: Request):
+#     return "LiteLLM: RUNNING"
 
 
-app.include_router(router)
\ No newline at end of file
+# app.include_router(router)
\ No newline at end of file
diff --git a/litellm/deprecated_litellm_server/openapi.json b/litellm/deprecated_litellm_server/openapi.json
deleted file mode 100644
index 2879b331cc..0000000000
--- a/litellm/deprecated_litellm_server/openapi.json
+++ /dev/null
@@ -1,245 +0,0 @@
-{
-    "openapi": "3.0.0",
-    "info": {
-      "version": "1.0.0",
-      "title": "LiteLLM API",
-      "description": "API for LiteLLM"
-    },
-    "paths": {
-      "/chat/completions": {
-        "post": {
-          "summary": "Create chat completion for 100+ LLM APIs",
-          "requestBody": {
-            "description": "Input parameters for chat completions",
-            "required": true,
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/ChatCompletionsRequest"
-                },
-                "example": {
-                    "model": "gpt-3.5-turbo",
-                    "messages": [
-                      {
-                        "role": "system",
-                        "content": "You are a helpful assistant."
-                      },
-                      {
-                        "role": "user",
-                        "content": "this is a test message from litellm proxy, can you ack"
-                      }
-                    ],
-                    "frequency_penalty": 0.0,
-                    "max_tokens": 500,
-                    "n": 1,
-                    "presence_penalty": 0.0,
-                    "stop": "###",
-                    "stream": false,
-                    "temperature": 0.7,
-                    "top_p": 0.8,
-                    "user": "test-litellm"
-                  }
-  
-              }
-            }
-          },
-          "responses": {
-            "200": {
-              "description": "Successful operation",
-              "content": {
-                "application/json": {
-                  "schema": {
-                    "$ref": "#/components/schemas/ChatCompletionsResponse"
-                  },
-                  "example": {
-                  "object": "chat.completion",
-                  "id": "chatcmpl-92861fad-b36c-41a1-88db-139344819276",
-                  "choices": [
-                    {
-                      "finish_reason": "stop_sequence",
-                      "index": 0,
-                      "message": {
-                        "content": "I'm a large language model trained by OpenAI, ACK receiving this message",
-                        "role": "assistant"
-                      }
-                    }
-                  ],
-                  "created": 1698253693.169062,
-                  "model": "gpt-3.5-turbo",
-                  "usage": {
-                    "prompt_tokens": 14,
-                    "completion_tokens": 102,
-                    "total_tokens": 116
-                  }
-                }
-  
-                }
-              }
-            },
-            "500": {
-              "description": "Server error"
-            }
-          }
-        }
-      },
-      "/models": {
-          "get": {
-            "summary": "Get models",
-            "responses": {
-              "200": {
-                "description": "Successful operation"
-              }
-            }
-          }
-        },
-        "/": {
-          "get": {
-            "summary": "Swagger docs",
-            "responses": {
-              "200": {
-                "description": "Successful operation"
-              }
-            }
-          }
-        }
-    },
-    "components": {
-      "schemas": {
-        "ChatCompletionsRequest": {
-          "type": "object",
-          "properties": {
-            "messages": {
-              "type": "array",
-              "items": {
-                "type": "object",
-                "properties": {
-                  "role": {
-                    "type": "string"
-                  },
-                  "content": {
-                    "type": "string"
-                  }
-                },
-                "required": ["role", "content"]
-              }
-            },
-            "model": {
-              "type": "string"
-            },
-            "frequency_penalty": {
-              "type": "number"
-            },
-            "function_call": {
-              "type": ["string", "object"]
-            },
-            "functions": {
-              "type": "array"
-            },
-            "logit_bias": {
-              "type": "object"
-            },
-            "max_tokens": {
-              "type": "integer"
-            },
-            "n": {
-              "type": "integer"
-            },
-            "presence_penalty": {
-              "type": "number"
-            },
-            "stop": {
-              "oneOf": [
-                {
-                  "type": "string"
-                },
-                {
-                  "type": "array",
-                  "items": {
-                    "type": "string"
-                  }
-                }
-              ]
-            },
-            "stream": {
-              "type": "boolean"
-            },
-            "temperature": {
-              "type": "number"
-            },
-            "top_p": {
-              "type": "number"
-            },
-            "user": {
-              "type": "string"
-            },
-            "caching": {
-              "type": "boolean"
-            }
-          },
-          "required": ["messages", "model"]
-        },
-        "ChatCompletionsResponse": {
-        "type": "object",
-        "properties": {
-          "object": {
-            "type": "string"
-          },
-          "choices": {
-            "type": "array",
-            "items": {
-              "type": "object",
-              "properties": {
-                "finish_reason": {
-                  "type": "string"
-                },
-                "index": {
-                  "type": "integer"
-                },
-                "message": {
-                  "type": "object",
-                  "properties": {
-                    "content": {
-                      "type": "string"
-                    },
-                    "role": {
-                      "type": "string"
-                    }
-                  },
-                  "required": ["content", "role"]
-                },
-                "usage": {
-                  "type": "object",
-                  "properties": {
-                    "prompt_tokens": {
-                      "type": "integer"
-                    },
-                    "completion_tokens": {
-                      "type": "integer"
-                    },
-                    "total_tokens": {
-                      "type": "integer"
-                    }
-                  },
-                  "required": ["prompt_tokens", "completion_tokens", "total_tokens"]
-                }
-              },
-              "required": ["finish_reason", "index", "message", "usage"]
-            }
-          },
-          "id": {
-            "type": "string"
-          },
-          "created": {
-            "type": "number"
-          },
-          "model": {
-            "type": "string"
-          }
-        },
-        "required": ["object", "choices", "id", "created", "model"]
-      }
-  
-      }
-    }
-  }
-  
\ No newline at end of file
diff --git a/litellm/deprecated_litellm_server/requirements.txt b/litellm/deprecated_litellm_server/requirements.txt
index 3f473acd1e..09f6dba572 100644
--- a/litellm/deprecated_litellm_server/requirements.txt
+++ b/litellm/deprecated_litellm_server/requirements.txt
@@ -1,7 +1,7 @@
-openai
-fastapi
-uvicorn
-boto3
-litellm
-python-dotenv
-redis
\ No newline at end of file
+# openai
+# fastapi
+# uvicorn
+# boto3
+# litellm
+# python-dotenv
+# redis
\ No newline at end of file
diff --git a/litellm/deprecated_litellm_server/server_utils.py b/litellm/deprecated_litellm_server/server_utils.py
index 9f9096fb6e..209acc8b9a 100644
--- a/litellm/deprecated_litellm_server/server_utils.py
+++ b/litellm/deprecated_litellm_server/server_utils.py
@@ -1,86 +1,86 @@
-import os, litellm
-import pkg_resources
-import dotenv
-dotenv.load_dotenv() # load env variables
+# import os, litellm
+# import pkg_resources
+# import dotenv
+# dotenv.load_dotenv() # load env variables
 
-def print_verbose(print_statement): 
-    pass
+# def print_verbose(print_statement): 
+#     pass
 
-def get_package_version(package_name):
-    try:
-        package = pkg_resources.get_distribution(package_name)
-        return package.version
-    except pkg_resources.DistributionNotFound:
-        return None
+# def get_package_version(package_name):
+#     try:
+#         package = pkg_resources.get_distribution(package_name)
+#         return package.version
+#     except pkg_resources.DistributionNotFound:
+#         return None
 
-# Usage example
-package_name = "litellm"
-version = get_package_version(package_name)
-if version:
-    print_verbose(f"The version of {package_name} is {version}")
-else:
-    print_verbose(f"{package_name} is not installed")
-import yaml
-import dotenv
-from typing import Optional
-dotenv.load_dotenv() # load env variables
+# # Usage example
+# package_name = "litellm"
+# version = get_package_version(package_name)
+# if version:
+#     print_verbose(f"The version of {package_name} is {version}")
+# else:
+#     print_verbose(f"{package_name} is not installed")
+# import yaml
+# import dotenv
+# from typing import Optional
+# dotenv.load_dotenv() # load env variables
 
-def set_callbacks():
-    ## LOGGING
-    if len(os.getenv("SET_VERBOSE", "")) > 0: 
-        if os.getenv("SET_VERBOSE") == "True": 
-            litellm.set_verbose = True
-            print_verbose("\033[92mLiteLLM: Switched on verbose logging\033[0m")
-        else: 
-            litellm.set_verbose = False
+# def set_callbacks():
+#     ## LOGGING
+#     if len(os.getenv("SET_VERBOSE", "")) > 0: 
+#         if os.getenv("SET_VERBOSE") == "True": 
+#             litellm.set_verbose = True
+#             print_verbose("\033[92mLiteLLM: Switched on verbose logging\033[0m")
+#         else: 
+#             litellm.set_verbose = False
 
-    ### LANGFUSE
-    if (len(os.getenv("LANGFUSE_PUBLIC_KEY", "")) > 0 and len(os.getenv("LANGFUSE_SECRET_KEY", ""))) > 0 or len(os.getenv("LANGFUSE_HOST", "")) > 0:
-        litellm.success_callback = ["langfuse"] 
-        print_verbose("\033[92mLiteLLM: Switched on Langfuse feature\033[0m")
+#     ### LANGFUSE
+#     if (len(os.getenv("LANGFUSE_PUBLIC_KEY", "")) > 0 and len(os.getenv("LANGFUSE_SECRET_KEY", ""))) > 0 or len(os.getenv("LANGFUSE_HOST", "")) > 0:
+#         litellm.success_callback = ["langfuse"] 
+#         print_verbose("\033[92mLiteLLM: Switched on Langfuse feature\033[0m")
     
-    ## CACHING 
-    ### REDIS
-    # if len(os.getenv("REDIS_HOST", "")) >  0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0: 
-    #     print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}")
-    #     from litellm.caching import Cache
-    #     litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
-    #     print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
+#     ## CACHING 
+#     ### REDIS
+#     # if len(os.getenv("REDIS_HOST", "")) >  0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0: 
+#     #     print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}")
+#     #     from litellm.caching import Cache
+#     #     litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
+#     #     print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
 
 
 
-def load_router_config(router: Optional[litellm.Router], config_file_path: Optional[str]='/app/config.yaml'):
-    config = {}
-    server_settings  = {} 
-    try: 
-        if os.path.exists(config_file_path): # type: ignore
-            with open(config_file_path, 'r') as file: # type: ignore
-                config = yaml.safe_load(file)
-        else:
-            pass
-    except:
-        pass
+# def load_router_config(router: Optional[litellm.Router], config_file_path: Optional[str]='/app/config.yaml'):
+#     config = {}
+#     server_settings  = {} 
+#     try: 
+#         if os.path.exists(config_file_path): # type: ignore
+#             with open(config_file_path, 'r') as file: # type: ignore
+#                 config = yaml.safe_load(file)
+#         else:
+#             pass
+#     except:
+#         pass
 
-    ## SERVER SETTINGS (e.g. default completion model = 'ollama/mistral')
-    server_settings = config.get("server_settings", None)
-    if server_settings: 
-        server_settings = server_settings
+#     ## SERVER SETTINGS (e.g. default completion model = 'ollama/mistral')
+#     server_settings = config.get("server_settings", None)
+#     if server_settings: 
+#         server_settings = server_settings
 
-    ## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..)
-    litellm_settings = config.get('litellm_settings', None)
-    if litellm_settings: 
-        for key, value in litellm_settings.items(): 
-            setattr(litellm, key, value)
+#     ## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..)
+#     litellm_settings = config.get('litellm_settings', None)
+#     if litellm_settings: 
+#         for key, value in litellm_settings.items(): 
+#             setattr(litellm, key, value)
 
-    ## MODEL LIST
-    model_list = config.get('model_list', None)
-    if model_list: 
-        router = litellm.Router(model_list=model_list)
+#     ## MODEL LIST
+#     model_list = config.get('model_list', None)
+#     if model_list: 
+#         router = litellm.Router(model_list=model_list)
     
-    ## ENVIRONMENT VARIABLES
-    environment_variables = config.get('environment_variables', None)
-    if environment_variables: 
-        for key, value in environment_variables.items(): 
-            os.environ[key] = value
+#     ## ENVIRONMENT VARIABLES
+#     environment_variables = config.get('environment_variables', None)
+#     if environment_variables: 
+#         for key, value in environment_variables.items(): 
+#             os.environ[key] = value
 
-    return router, model_list, server_settings
+#     return router, model_list, server_settings