diff --git a/.gitignore b/.gitignore
index b31366a33..360bba798 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,6 @@ litellm/tests/langfuse.log
 litellm/tests/dynamo*.log
 .vscode/settings.json
 litellm/proxy/log.txt
+proxy_server_config_@.yaml
+.gitignore
+proxy_server_config_2.yaml
diff --git a/Dockerfile b/Dockerfile
index c9dba6615..b76aaf1d1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -19,35 +19,32 @@ RUN pip install --upgrade pip && \
     pip install build
 
 # Copy the current directory contents into the container at /app
-COPY requirements.txt .
+COPY . .
 
 # Build the package
 RUN rm -rf dist/* && python -m build
 
+# There should be only one wheel file now, assume the build only creates one
+RUN ls -1 dist/*.whl | head -1
+
 # Install the package
 RUN pip install dist/*.whl
 
-# Install any needed packages specified in requirements.txt
-RUN pip install wheel && \
-    pip wheel --no-cache-dir --wheel-dir=/app/wheels -r requirements.txt
-
-# Clear out any existing builds and build the package
-RUN rm -rf dist/* && python -m build
-
-# There should be only one wheel file now, assume the build only creates one
-RUN ls -1 dist/*.whl | head -1
+# install dependencies as wheels
+RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt
 
 # Runtime stage
 FROM $LITELLM_RUNTIME_IMAGE as runtime
 
 WORKDIR /app
 
-# Depending on wheel naming patterns, use a wildcard if multiple versions are possible
 # Copy the built wheel from the builder stage to the runtime stage; assumes only one wheel file is present
 COPY --from=builder /app/dist/*.whl .
+COPY --from=builder /wheels/ /wheels/
 
 # Install the built wheel using pip; again using a wildcard if it's the only file
-RUN pip install *.whl && rm -f *.whl
+RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
+
 
 EXPOSE 4000/tcp
 
diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py
index b014667df..70036ae74 100644
--- a/litellm/llms/azure.py
+++ b/litellm/llms/azure.py
@@ -153,29 +153,29 @@ class AzureChatCompletion(BaseLLM):
                     "messages": messages, 
                     **optional_params
                 }
-            ## LOGGING
-            logging_obj.pre_call(
-                input=messages,
-                api_key=api_key,
-                additional_args={
-                    "headers": {
-                        "api_key": api_key, 
-                        "azure_ad_token": azure_ad_token
-                    },
-                    "api_version": api_version,
-                    "api_base": api_base,
-                    "complete_input_dict": data,
-                },
-            )
             
             if acompletion is True: 
                 if optional_params.get("stream", False):
                     return self.async_streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
                 else:
-                    return self.acompletion(api_base=api_base, data=data, model_response=model_response, api_key=api_key, api_version=api_version, model=model, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
+                    return self.acompletion(api_base=api_base, data=data, model_response=model_response, api_key=api_key, api_version=api_version, model=model, azure_ad_token=azure_ad_token, timeout=timeout, client=client, logging_obj=logging_obj)
             elif "stream" in optional_params and optional_params["stream"] == True:
                 return self.streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
             else:
+                ## LOGGING
+                logging_obj.pre_call(
+                    input=messages,
+                    api_key=api_key,
+                    additional_args={
+                        "headers": {
+                            "api_key": api_key, 
+                            "azure_ad_token": azure_ad_token
+                        },
+                        "api_version": api_version,
+                        "api_base": api_base,
+                        "complete_input_dict": data,
+                    },
+                )
                 if not isinstance(max_retries, int): 
                     raise AzureOpenAIError(status_code=422, message="max retries must be an int")
                 # init AzureOpenAI Client
@@ -225,6 +225,7 @@ class AzureChatCompletion(BaseLLM):
                           model_response: ModelResponse,
                           azure_ad_token: Optional[str]=None, 
                           client = None, # this is the AsyncAzureOpenAI
+                          logging_obj=None,
                           ): 
        response = None
        try:
@@ -248,13 +249,19 @@ class AzureChatCompletion(BaseLLM):
                 azure_client = AsyncAzureOpenAI(**azure_client_params)
             else:
                 azure_client = client
+            ## LOGGING
+            logging_obj.pre_call(
+                input=data['messages'],
+                api_key=azure_client.api_key,
+                additional_args={"headers": {"Authorization": f"Bearer {azure_client.api_key}"}, "api_base": azure_client._base_url._uri_reference, "acompletion": True, "complete_input_dict": data},
+            )
             response = await azure_client.chat.completions.create(**data) 
             return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
        except AzureOpenAIError as e: 
             exception_mapping_worked = True
             raise e
        except Exception as e: 
-            raise e
+            raise AzureOpenAIError(status_code=500, message=str(e))
 
     def streaming(self,
                   logging_obj,
@@ -319,6 +326,12 @@ class AzureChatCompletion(BaseLLM):
                 azure_client = AsyncAzureOpenAI(**azure_client_params)
         else:
             azure_client = client
+        ## LOGGING
+        logging_obj.pre_call(
+            input=data['messages'],
+            api_key=azure_client.api_key,
+            additional_args={"headers": {"Authorization": f"Bearer {azure_client.api_key}"}, "api_base": azure_client._base_url._uri_reference, "acompletion": True, "complete_input_dict": data},
+        )
         response = await azure_client.chat.completions.create(**data)
         streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="azure",logging_obj=logging_obj)
         async for transformed_chunk in streamwrapper:
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index e942d4d41..13cdc2d02 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -18,7 +18,6 @@ try:
     import appdirs
     import backoff
     import yaml
-    import rq
     import orjson
 except ImportError:
     import sys
@@ -34,7 +33,6 @@ except ImportError:
             "appdirs",
             "backoff",
             "pyyaml", 
-            "rq",
             "orjson"
         ]
     )
@@ -111,7 +109,6 @@ from fastapi.security.api_key import APIKeyHeader
 import json
 import logging
 from typing import Union
-# from litellm.proxy.queue import start_rq_worker_in_background
 
 app = FastAPI(docs_url="/", title="LiteLLM API")
 router = APIRouter()
diff --git a/requirements.txt b/requirements.txt
index 986580d7a..3cf315935 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,21 +1,26 @@
 # LITELLM PROXY DEPENDENCIES #
-openai>=1.0.0
-fastapi
-tomli
-pydantic>=2.5
-appdirs
-tomli_w
-backoff
-pyyaml
-uvicorn
-boto3
-redis
-pyyaml
-rq
-prisma
-celery
-psutil
-mangum
-google-generativeai 
-traceloop-sdk==0.5.3
-langfuse==1.14.0
+anyio==4.2.0 # openai + http req.
+openai>=1.0.0 # openai req. 
+fastapi # server dep
+pydantic>=2.5 # openai req. 
+appdirs # server dep
+backoff # server dep
+pyyaml # server dep
+uvicorn # server dep
+boto3 # aws bedrock/sagemaker calls
+redis # caching
+prisma # for db
+mangum # for aws lambda functions
+google-generativeai # for vertex ai calls
+traceloop-sdk==0.5.3 # for open telemetry logging
+langfuse==1.14.0 # for langfuse self-hosted logging
+### LITELLM PACKAGE DEPENDENCIES
+python-dotenv>=0.2.0 # for env 
+tiktoken>=0.4.0 # for calculating usage
+importlib-metadata>=6.8.0 # for random utils
+tokenizers # for calculating usage
+click # for proxy cli 
+jinja2==3.1.2 # for prompt templates
+certifi>=2023.7.22 # [TODO] clean up 
+aiohttp # for network calls
+####
\ No newline at end of file