diff --git a/README.md b/README.md
index 1c4e148443..93f013ea92 100644
--- a/README.md
+++ b/README.md
@@ -402,7 +402,7 @@ If you have suggestions on how to improve the code quality feel free to open an
 1. (In root) create virtual environment `python -m venv .venv`
 2. Activate virtual environment `source .venv/bin/activate`
 3. Install dependencies `pip install -e ".[all]"`
-4. Start proxy backend `uvicorn litellm.proxy.proxy_server:app --host localhost --port 4000 --reload`
+4. Start proxy backend `uvicorn litellm_proxy.proxy_server:app --host localhost --port 4000 --reload`
 
 ### Frontend
 1. Navigate to `ui/litellm-dashboard`
diff --git a/docs/my-website/docs/proxy/call_hooks.md b/docs/my-website/docs/proxy/call_hooks.md
index a7b0afcc18..d93d1971e4 100644
--- a/docs/my-website/docs/proxy/call_hooks.md
+++ b/docs/my-website/docs/proxy/call_hooks.md
@@ -17,7 +17,7 @@ This function is called just before a litellm completion call is made, and allow
 ```python
 from litellm.integrations.custom_logger import CustomLogger
 import litellm
-from litellm.proxy.proxy_server import UserAPIKeyAuth, DualCache
+from litellm_proxy.proxy_server import UserAPIKeyAuth, DualCache
 from typing import Optional, Literal
 
 # This file includes the custom callbacks for LiteLLM Proxy
diff --git a/docs/my-website/docs/proxy/custom_auth.md b/docs/my-website/docs/proxy/custom_auth.md
index c98ad8e09d..07d308496d 100644
--- a/docs/my-website/docs/proxy/custom_auth.md
+++ b/docs/my-website/docs/proxy/custom_auth.md
@@ -9,7 +9,7 @@ Here's how:
 Make sure the response type follows the `UserAPIKeyAuth` pydantic object. This is used by for logging usage specific to that user key.
 
 ```python
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 
 async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: 
     try: 
diff --git a/docs/my-website/docs/proxy/custom_sso.md b/docs/my-website/docs/proxy/custom_sso.md
index a89de0f324..57ee0e0995 100644
--- a/docs/my-website/docs/proxy/custom_sso.md
+++ b/docs/my-website/docs/proxy/custom_sso.md
@@ -20,12 +20,12 @@ Make sure the response type follows the `SSOUserDefinedValues` pydantic object.
 from fastapi import Request
 from fastapi_sso.sso.base import OpenID
 
-from litellm.proxy._types import LitellmUserRoles, SSOUserDefinedValues
-from litellm.proxy.management_endpoints.internal_user_endpoints import (
+from litellm_proxy._types import LitellmUserRoles, SSOUserDefinedValues
+from litellm_proxy.management_endpoints.internal_user_endpoints import (
     new_user,
     user_info,
 )
-from litellm.proxy.management_endpoints.team_endpoints import add_new_member
+from litellm_proxy.management_endpoints.team_endpoints import add_new_member
 
 
 async def custom_sso_handler(userIDPInfo: OpenID) -> SSOUserDefinedValues:
diff --git a/docs/my-website/docs/proxy/guardrails/custom_guardrail.md b/docs/my-website/docs/proxy/guardrails/custom_guardrail.md
index 657ccab68e..c77a6fcab2 100644
--- a/docs/my-website/docs/proxy/guardrails/custom_guardrail.md
+++ b/docs/my-website/docs/proxy/guardrails/custom_guardrail.md
@@ -29,8 +29,8 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_guardrail import CustomGuardrail
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
 from litellm.types.guardrails import GuardrailEventHooks
 
 
@@ -449,7 +449,7 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_guardrail import CustomGuardrail
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 
 class myCustomGuardrail(CustomGuardrail):
     def __init__(self, **kwargs):
diff --git a/enterprise/enterprise_hooks/aporia_ai.py b/enterprise/enterprise_hooks/aporia_ai.py
index 2b427bea5c..0e32d62ec3 100644
--- a/enterprise/enterprise_hooks/aporia_ai.py
+++ b/enterprise/enterprise_hooks/aporia_ai.py
@@ -14,11 +14,11 @@ sys.path.insert(
 from typing import Optional, Literal, Any
 import litellm
 import sys
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_guardrail import CustomGuardrail
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
+from litellm_proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
 from litellm.litellm_core_utils.logging_utils import (
     convert_litellm_response_object_to_str,
 )
@@ -140,7 +140,7 @@ class AporiaGuardrail(CustomGuardrail):
         user_api_key_dict: UserAPIKeyAuth,
         response,
     ):
-        from litellm.proxy.common_utils.callback_utils import (
+        from litellm_proxy.common_utils.callback_utils import (
             add_guardrail_to_applied_guardrails_header,
         )
 
@@ -176,7 +176,7 @@ class AporiaGuardrail(CustomGuardrail):
             "responses",
         ],
     ):
-        from litellm.proxy.common_utils.callback_utils import (
+        from litellm_proxy.common_utils.callback_utils import (
             add_guardrail_to_applied_guardrails_header,
         )
 
diff --git a/enterprise/enterprise_hooks/banned_keywords.py b/enterprise/enterprise_hooks/banned_keywords.py
index 4df138939a..46d7a49e5b 100644
--- a/enterprise/enterprise_hooks/banned_keywords.py
+++ b/enterprise/enterprise_hooks/banned_keywords.py
@@ -10,7 +10,7 @@
 from typing import Literal
 import litellm
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from litellm._logging import verbose_proxy_logger
 from fastapi import HTTPException
diff --git a/enterprise/enterprise_hooks/blocked_user_list.py b/enterprise/enterprise_hooks/blocked_user_list.py
index 09fb1735a0..22077e8c0f 100644
--- a/enterprise/enterprise_hooks/blocked_user_list.py
+++ b/enterprise/enterprise_hooks/blocked_user_list.py
@@ -9,9 +9,9 @@
 
 from typing import Optional, Literal
 import litellm
-from litellm.proxy.utils import PrismaClient
+from litellm_proxy.utils import PrismaClient
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable
+from litellm_proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable
 from litellm.integrations.custom_logger import CustomLogger
 from litellm._logging import verbose_proxy_logger
 from fastapi import HTTPException
diff --git a/enterprise/enterprise_hooks/google_text_moderation.py b/enterprise/enterprise_hooks/google_text_moderation.py
index fe26a03207..41507cef9b 100644
--- a/enterprise/enterprise_hooks/google_text_moderation.py
+++ b/enterprise/enterprise_hooks/google_text_moderation.py
@@ -9,7 +9,7 @@
 
 from typing import Literal
 import litellm
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
diff --git a/enterprise/enterprise_hooks/llama_guard.py b/enterprise/enterprise_hooks/llama_guard.py
index 2c53fafa5b..e6ac25dceb 100644
--- a/enterprise/enterprise_hooks/llama_guard.py
+++ b/enterprise/enterprise_hooks/llama_guard.py
@@ -17,7 +17,7 @@ sys.path.insert(
 from typing import Optional, Literal
 import litellm
 import sys
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
diff --git a/enterprise/enterprise_hooks/llm_guard.py b/enterprise/enterprise_hooks/llm_guard.py
index 078b8e216e..92f5e5ae3f 100644
--- a/enterprise/enterprise_hooks/llm_guard.py
+++ b/enterprise/enterprise_hooks/llm_guard.py
@@ -9,7 +9,7 @@
 
 from typing import Optional, Literal
 import litellm
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
diff --git a/enterprise/enterprise_hooks/openai_moderation.py b/enterprise/enterprise_hooks/openai_moderation.py
index 1db932c853..39c66944cf 100644
--- a/enterprise/enterprise_hooks/openai_moderation.py
+++ b/enterprise/enterprise_hooks/openai_moderation.py
@@ -14,7 +14,7 @@ sys.path.insert(
 from typing import Literal
 import litellm
 import sys
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
@@ -50,7 +50,7 @@ class _ENTERPRISE_OpenAI_Moderation(CustomLogger):
                 if "content" in m and isinstance(m["content"], str):
                     text += m["content"]
 
-        from litellm.proxy.proxy_server import llm_router
+        from litellm_proxy.proxy_server import llm_router
 
         if llm_router is None:
             return
diff --git a/enterprise/enterprise_hooks/secret_detection.py b/enterprise/enterprise_hooks/secret_detection.py
index 158f26efa3..0dabbcd882 100644
--- a/enterprise/enterprise_hooks/secret_detection.py
+++ b/enterprise/enterprise_hooks/secret_detection.py
@@ -11,13 +11,18 @@ import os
 sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-from typing import Optional
+from typing import Any, Optional
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import UserAPIKeyAuth
+from typing import TYPE_CHECKING
 from litellm._logging import verbose_proxy_logger
 import tempfile
 from litellm.integrations.custom_guardrail import CustomGuardrail
 
+if TYPE_CHECKING:
+    from litellm_proxy._types import UserAPIKeyAuth
+else:
+    UserAPIKeyAuth = Any
+
 GUARDRAIL_NAME = "hide_secrets"
 
 _custom_plugins_path = "file://" + os.path.join(
diff --git a/litellm/__init__.py b/litellm/__init__.py
index 59c8c78eb9..5ff730464a 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -61,13 +61,14 @@ from litellm.constants import (
     DEFAULT_ALLOWED_FAILS,
 )
 from litellm.types.guardrails import GuardrailItem
-from litellm.proxy._types import (
+from litellm.types.proxy.management_endpoints.ui_sso import DefaultTeamSSOParams
+from litellm.types.utils import (
+    StandardKeyGenerationConfig,
+    LlmProviders,
     KeyManagementSystem,
     KeyManagementSettings,
     LiteLLM_UpperboundKeyGenerateParams,
 )
-from litellm.types.proxy.management_endpoints.ui_sso import DefaultTeamSSOParams
-from litellm.types.utils import StandardKeyGenerationConfig, LlmProviders
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.logging_callback_manager import LoggingCallbackManager
 import httpx
@@ -1048,7 +1049,6 @@ from .exceptions import (
     MockException,
 )
 from .budget_manager import BudgetManager
-from .proxy.proxy_cli import run_server
 from .router import Router
 from .assistants.main import *
 from .batches.main import *
diff --git a/litellm/_service_logger.py b/litellm/_service_logger.py
index 7a60359d54..5debc5108d 100644
--- a/litellm/_service_logger.py
+++ b/litellm/_service_logger.py
@@ -4,7 +4,6 @@ from typing import TYPE_CHECKING, Any, Optional, Union
 
 import litellm
 from litellm._logging import verbose_logger
-from litellm.proxy._types import UserAPIKeyAuth
 
 from .integrations.custom_logger import CustomLogger
 from .integrations.datadog.datadog import DataDogLogger
@@ -15,11 +14,14 @@ from .types.services import ServiceLoggerPayload, ServiceTypes
 if TYPE_CHECKING:
     from opentelemetry.trace import Span as _Span
 
+    from litellm_proxy._types import UserAPIKeyAuth
+
     Span = Union[_Span, Any]
     OTELClass = OpenTelemetry
 else:
     Span = Any
     OTELClass = Any
+    UserAPIKeyAuth = Any
 
 
 class ServiceLogging(CustomLogger):
@@ -143,7 +145,7 @@ class ServiceLogging(CustomLogger):
                     event_metadata=event_metadata,
                 )
             elif callback == "otel" or isinstance(callback, OpenTelemetry):
-                from litellm.proxy.proxy_server import open_telemetry_logger
+                from litellm_proxy.proxy_server import open_telemetry_logger
 
                 await self.init_otel_logger_if_none()
 
@@ -188,7 +190,7 @@ class ServiceLogging(CustomLogger):
         initializes otel_logger if it is None or no attribute exists on ServiceLogging Object
 
         """
-        from litellm.proxy.proxy_server import open_telemetry_logger
+        from litellm_proxy.proxy_server import open_telemetry_logger
 
         if not hasattr(self, "otel_logger"):
             if open_telemetry_logger is not None and isinstance(
@@ -251,7 +253,7 @@ class ServiceLogging(CustomLogger):
                     event_metadata=event_metadata,
                 )
             elif callback == "otel" or isinstance(callback, OpenTelemetry):
-                from litellm.proxy.proxy_server import open_telemetry_logger
+                from litellm_proxy.proxy_server import open_telemetry_logger
 
                 await self.init_otel_logger_if_none()
 
diff --git a/litellm/caching/qdrant_semantic_cache.py b/litellm/caching/qdrant_semantic_cache.py
index 32d4d8b0fd..da816240ea 100644
--- a/litellm/caching/qdrant_semantic_cache.py
+++ b/litellm/caching/qdrant_semantic_cache.py
@@ -281,7 +281,7 @@ class QdrantSemanticCache(BaseCache):
     async def async_set_cache(self, key, value, **kwargs):
         import uuid
 
-        from litellm.proxy.proxy_server import llm_model_list, llm_router
+        from litellm_proxy.proxy_server import llm_model_list, llm_router
 
         print_verbose(f"async qdrant semantic-cache set_cache, kwargs: {kwargs}")
 
@@ -344,7 +344,7 @@ class QdrantSemanticCache(BaseCache):
 
     async def async_get_cache(self, key, **kwargs):
         print_verbose(f"async qdrant semantic-cache get_cache, kwargs: {kwargs}")
-        from litellm.proxy.proxy_server import llm_model_list, llm_router
+        from litellm_proxy.proxy_server import llm_model_list, llm_router
 
         # get the messages
         messages = kwargs["messages"]
diff --git a/litellm/caching/redis_semantic_cache.py b/litellm/caching/redis_semantic_cache.py
index c76f27377d..13a4990ddf 100644
--- a/litellm/caching/redis_semantic_cache.py
+++ b/litellm/caching/redis_semantic_cache.py
@@ -279,7 +279,7 @@ class RedisSemanticCache(BaseCache):
         Returns:
             List[float]: The embedding vector
         """
-        from litellm.proxy.proxy_server import llm_model_list, llm_router
+        from litellm_proxy.proxy_server import llm_model_list, llm_router
 
         # Route the embedding request through the proxy if appropriate
         router_model_names = (
diff --git a/litellm/integrations/SlackAlerting/slack_alerting.py b/litellm/integrations/SlackAlerting/slack_alerting.py
index 9fde042ae7..c1ad590792 100644
--- a/litellm/integrations/SlackAlerting/slack_alerting.py
+++ b/litellm/integrations/SlackAlerting/slack_alerting.py
@@ -26,8 +26,8 @@ from litellm.llms.custom_httpx.http_handler import (
     get_async_httpx_client,
     httpxSpecialProvider,
 )
-from litellm.proxy._types import AlertType, CallInfo, VirtualKeyEvent, WebhookEvent
 from litellm.types.integrations.slack_alerting import *
+from litellm_proxy._types import AlertType, CallInfo, VirtualKeyEvent, WebhookEvent
 
 from ..email_templates.templates import *
 from .batching_handler import send_to_webhook, squash_payloads
@@ -823,9 +823,9 @@ class SlackAlerting(CustomBatchLogger):
         ### UNIQUE CACHE KEY ###
         cache_key = provider + region_name
 
-        outage_value: Optional[
-            ProviderRegionOutageModel
-        ] = await self.internal_usage_cache.async_get_cache(key=cache_key)
+        outage_value: Optional[ProviderRegionOutageModel] = (
+            await self.internal_usage_cache.async_get_cache(key=cache_key)
+        )
 
         if (
             getattr(exception, "status_code", None) is None
@@ -1148,7 +1148,7 @@ Model Info:
         email_logo_url: Optional[str] = None,
         email_support_contact: Optional[str] = None,
     ):
-        from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
+        from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
 
         if premium_user is not True:
             if email_logo_url is not None or email_support_contact is not None:
@@ -1161,7 +1161,7 @@ Model Info:
         self, webhook_event: WebhookEvent
     ) -> bool:
         try:
-            from litellm.proxy.utils import send_email
+            from litellm_proxy.utils import send_email
 
             if self.alerting is None or "email" not in self.alerting:
                 # do nothing if user does not want email alerts
@@ -1170,7 +1170,7 @@ Model Info:
                     self.alerting,
                 )
                 return False
-            from litellm.proxy.proxy_server import premium_user, prisma_client
+            from litellm_proxy.proxy_server import premium_user, prisma_client
 
             email_logo_url = os.getenv(
                 "SMTP_SENDER_LOGO", os.getenv("EMAIL_LOGO_URL", None)
@@ -1271,8 +1271,8 @@ Model Info:
 
         Returns -> True if sent, False if not.
         """
-        from litellm.proxy.proxy_server import premium_user
-        from litellm.proxy.utils import send_email
+        from litellm_proxy.proxy_server import premium_user
+        from litellm_proxy.utils import send_email
 
         email_logo_url = os.getenv(
             "SMTP_SENDER_LOGO", os.getenv("EMAIL_LOGO_URL", None)
@@ -1406,9 +1406,9 @@ Model Info:
             self.alert_to_webhook_url is not None
             and alert_type in self.alert_to_webhook_url
         ):
-            slack_webhook_url: Optional[
-                Union[str, List[str]]
-            ] = self.alert_to_webhook_url[alert_type]
+            slack_webhook_url: Optional[Union[str, List[str]]] = (
+                self.alert_to_webhook_url[alert_type]
+            )
         elif self.default_webhook_url is not None:
             slack_webhook_url = self.default_webhook_url
         else:
@@ -1598,7 +1598,7 @@ Model Info:
             return
 
         try:
-            from litellm.proxy.spend_tracking.spend_management_endpoints import (
+            from litellm_proxy.spend_tracking.spend_management_endpoints import (
                 _get_spend_report_for_time_range,
             )
 
@@ -1662,7 +1662,7 @@ Model Info:
         try:
             from calendar import monthrange
 
-            from litellm.proxy.spend_tracking.spend_management_endpoints import (
+            from litellm_proxy.spend_tracking.spend_management_endpoints import (
                 _get_spend_report_for_time_range,
             )
 
diff --git a/litellm/integrations/SlackAlerting/utils.py b/litellm/integrations/SlackAlerting/utils.py
index 0dc8bae5a6..ee4952cf9b 100644
--- a/litellm/integrations/SlackAlerting/utils.py
+++ b/litellm/integrations/SlackAlerting/utils.py
@@ -5,8 +5,8 @@ Utils used for slack alerting
 import asyncio
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 
-from litellm.proxy._types import AlertType
 from litellm.secret_managers.main import get_secret
+from litellm_proxy._types import AlertType
 
 if TYPE_CHECKING:
     from litellm.litellm_core_utils.litellm_logging import Logging as _Logging
@@ -17,7 +17,7 @@ else:
 
 
 def process_slack_alerting_variables(
-    alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]]
+    alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]],
 ) -> Optional[Dict[AlertType, Union[List[str], str]]]:
     """
     process alert_to_webhook_url
diff --git a/litellm/integrations/azure_storage/azure_storage.py b/litellm/integrations/azure_storage/azure_storage.py
index 6ffb1e542f..ba454d65f5 100644
--- a/litellm/integrations/azure_storage/azure_storage.py
+++ b/litellm/integrations/azure_storage/azure_storage.py
@@ -321,7 +321,7 @@ class AzureBlobStorageLogger(CustomBatchLogger):
         """
         Checks if the user is a premium user, raises an error if not
         """
-        from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
+        from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
 
         if premium_user is not True:
             raise ValueError(
diff --git a/litellm/integrations/custom_guardrail.py b/litellm/integrations/custom_guardrail.py
index 41a3800116..d95b9d17e3 100644
--- a/litellm/integrations/custom_guardrail.py
+++ b/litellm/integrations/custom_guardrail.py
@@ -165,7 +165,7 @@ class CustomGuardrail(CustomLogger):
         """
         Returns True if the user is a premium user
         """
-        from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
+        from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
 
         if premium_user is not True:
             verbose_logger.warning(
@@ -183,7 +183,7 @@ class CustomGuardrail(CustomLogger):
         """
         Builds `StandardLoggingGuardrailInformation` and adds it to the request metadata so it can be used for logging to DataDog, Langfuse, etc.
         """
-        from litellm.proxy.proxy_server import premium_user
+        from litellm_proxy.proxy_server import premium_user
 
         if premium_user is not True:
             verbose_logger.warning(
diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
index 18cb8e8d7f..977a408c26 100644
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@@ -15,7 +15,6 @@ from typing import (
 from pydantic import BaseModel
 
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import UserAPIKeyAuth
 from litellm.types.integrations.argilla import ArgillaItem
 from litellm.types.llms.openai import AllMessageValues, ChatCompletionRequest
 from litellm.types.utils import (
@@ -30,9 +29,12 @@ from litellm.types.utils import (
 if TYPE_CHECKING:
     from opentelemetry.trace import Span as _Span
 
+    from litellm_proxy._types import UserAPIKeyAuth
+
     Span = Union[_Span, Any]
 else:
     Span = Any
+    UserAPIKeyAuth = Any
 
 
 class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
diff --git a/litellm/integrations/email_alerting.py b/litellm/integrations/email_alerting.py
index b45b9aa7f5..03bcf9560e 100644
--- a/litellm/integrations/email_alerting.py
+++ b/litellm/integrations/email_alerting.py
@@ -3,10 +3,14 @@ Functions for sending Email Alerts
 """
 
 import os
-from typing import List, Optional
+from typing import TYPE_CHECKING, Any, List, Optional
 
 from litellm._logging import verbose_logger, verbose_proxy_logger
-from litellm.proxy._types import WebhookEvent
+
+if TYPE_CHECKING:
+    from litellm_proxy._types import WebhookEvent
+else:
+    WebhookEvent = Any
 
 # we use this for the email header, please send a test email if you change this. verify it looks good on email
 LITELLM_LOGO_URL = "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
@@ -19,7 +23,7 @@ async def get_all_team_member_emails(team_id: Optional[str] = None) -> list:
     )
     if team_id is None:
         return []
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise Exception("Not connected to DB!")
@@ -71,7 +75,7 @@ async def send_team_budget_alert(webhook_event: WebhookEvent) -> bool:
     Send an Email Alert to All Team Members when the Team Budget is crossed
     Returns -> True if sent, False if not.
     """
-    from litellm.proxy.utils import send_email
+    from litellm_proxy.utils import send_email
 
     _team_id = webhook_event.team_id
     team_alias = webhook_event.team_alias
diff --git a/litellm/integrations/gcs_bucket/gcs_bucket.py b/litellm/integrations/gcs_bucket/gcs_bucket.py
index 972a023666..ebec3e4837 100644
--- a/litellm/integrations/gcs_bucket/gcs_bucket.py
+++ b/litellm/integrations/gcs_bucket/gcs_bucket.py
@@ -9,10 +9,10 @@ from urllib.parse import quote
 from litellm._logging import verbose_logger
 from litellm.integrations.additional_logging_utils import AdditionalLoggingUtils
 from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
-from litellm.proxy._types import CommonProxyErrors
 from litellm.types.integrations.base_health_check import IntegrationHealthCheckStatus
 from litellm.types.integrations.gcs_bucket import *
 from litellm.types.utils import StandardLoggingPayload
+from litellm_proxy._types import CommonProxyErrors
 
 if TYPE_CHECKING:
     from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
@@ -22,7 +22,7 @@ else:
 
 class GCSBucketLogger(GCSBucketBase, AdditionalLoggingUtils):
     def __init__(self, bucket_name: Optional[str] = None) -> None:
-        from litellm.proxy.proxy_server import premium_user
+        from litellm_proxy.proxy_server import premium_user
 
         super().__init__(bucket_name=bucket_name)
 
@@ -48,7 +48,7 @@ class GCSBucketLogger(GCSBucketBase, AdditionalLoggingUtils):
 
     #### ASYNC ####
     async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
-        from litellm.proxy.proxy_server import premium_user
+        from litellm_proxy.proxy_server import premium_user
 
         if premium_user is not True:
             raise ValueError(
diff --git a/litellm/integrations/gcs_pubsub/pub_sub.py b/litellm/integrations/gcs_pubsub/pub_sub.py
index db7f9bb4d0..d3270daaca 100644
--- a/litellm/integrations/gcs_pubsub/pub_sub.py
+++ b/litellm/integrations/gcs_pubsub/pub_sub.py
@@ -15,7 +15,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 from litellm.types.utils import StandardLoggingPayload
 
 if TYPE_CHECKING:
-    from litellm.proxy._types import SpendLogsPayload
+    from litellm_proxy._types import SpendLogsPayload
 else:
     SpendLogsPayload = Any
 
@@ -44,7 +44,7 @@ class GcsPubSubLogger(CustomBatchLogger):
             topic_id (str): Pub/Sub topic ID
             credentials_path (str, optional): Path to Google Cloud credentials JSON file
         """
-        from litellm.proxy.utils import _premium_user_check
+        from litellm_proxy.utils import _premium_user_check
 
         _premium_user_check()
 
@@ -108,10 +108,10 @@ class GcsPubSubLogger(CustomBatchLogger):
         Raises:
             Raises a NON Blocking verbose_logger.exception if an error occurs
         """
-        from litellm.proxy.spend_tracking.spend_tracking_utils import (
+        from litellm_proxy.spend_tracking.spend_tracking_utils import (
             get_logging_payload,
         )
-        from litellm.proxy.utils import _premium_user_check
+        from litellm_proxy.utils import _premium_user_check
 
         _premium_user_check()
 
diff --git a/litellm/integrations/langtrace.py b/litellm/integrations/langtrace.py
index ac1069f440..81ce87d870 100644
--- a/litellm/integrations/langtrace.py
+++ b/litellm/integrations/langtrace.py
@@ -1,7 +1,7 @@
 import json
 from typing import TYPE_CHECKING, Any, Union
 
-from litellm.proxy._types import SpanAttributes
+from litellm.types.integrations.opentelemetry import SpanAttributes
 
 if TYPE_CHECKING:
     from opentelemetry.trace import Span as _Span
diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py
index f4fe40738b..191a46ffeb 100644
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@@ -18,10 +18,10 @@ if TYPE_CHECKING:
     from opentelemetry.sdk.trace.export import SpanExporter as _SpanExporter
     from opentelemetry.trace import Span as _Span
 
-    from litellm.proxy._types import (
+    from litellm_proxy._types import (
         ManagementEndpointLoggingPayload as _ManagementEndpointLoggingPayload,
     )
-    from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
+    from litellm_proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
 
     Span = Union[_Span, Any]
     SpanExporter = Union[_SpanExporter, Any]
@@ -126,7 +126,7 @@ class OpenTelemetry(CustomLogger):
         - Adds Otel as a service callback
         - Sets `proxy_server.open_telemetry_logger` to self
         """
-        from litellm.proxy import proxy_server
+        from litellm_proxy import proxy_server
 
         # Add Otel as a service callback
         if "otel" not in litellm.service_callback:
@@ -350,9 +350,9 @@ class OpenTelemetry(CustomLogger):
         """
         from opentelemetry import trace
 
-        standard_callback_dynamic_params: Optional[
-            StandardCallbackDynamicParams
-        ] = kwargs.get("standard_callback_dynamic_params")
+        standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
+            kwargs.get("standard_callback_dynamic_params")
+        )
         if not standard_callback_dynamic_params:
             return
 
@@ -406,7 +406,7 @@ class OpenTelemetry(CustomLogger):
     def set_tools_attributes(self, span: Span, tools):
         import json
 
-        from litellm.proxy._types import SpanAttributes
+        from litellm.types.integrations.opentelemetry import SpanAttributes
 
         if not tools:
             return
@@ -460,7 +460,7 @@ class OpenTelemetry(CustomLogger):
     def _tool_calls_kv_pair(
         tool_calls: List[ChatCompletionMessageToolCall],
     ) -> Dict[str, Any]:
-        from litellm.proxy._types import SpanAttributes
+        from litellm.types.integrations.opentelemetry import SpanAttributes
 
         kv_pairs: Dict[str, Any] = {}
         for idx, tool_call in enumerate(tool_calls):
@@ -496,7 +496,7 @@ class OpenTelemetry(CustomLogger):
                     span, kwargs, response_obj
                 )
                 return
-            from litellm.proxy._types import SpanAttributes
+            from litellm.types.integrations.opentelemetry import SpanAttributes
 
             optional_params = kwargs.get("optional_params", {})
             litellm_params = kwargs.get("litellm_params", {}) or {}
diff --git a/litellm/integrations/pagerduty/pagerduty.py b/litellm/integrations/pagerduty/pagerduty.py
index 6085bc237a..4140f6ed58 100644
--- a/litellm/integrations/pagerduty/pagerduty.py
+++ b/litellm/integrations/pagerduty/pagerduty.py
@@ -9,7 +9,7 @@ Handles two types of alerts:
 import asyncio
 import os
 from datetime import datetime, timedelta, timezone
-from typing import List, Literal, Optional, Union
+from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union
 
 from litellm._logging import verbose_logger
 from litellm.caching import DualCache
@@ -19,7 +19,6 @@ from litellm.llms.custom_httpx.http_handler import (
     get_async_httpx_client,
     httpxSpecialProvider,
 )
-from litellm.proxy._types import UserAPIKeyAuth
 from litellm.types.integrations.pagerduty import (
     AlertingConfig,
     PagerDutyInternalEvent,
@@ -31,6 +30,12 @@ from litellm.types.utils import (
     StandardLoggingPayloadErrorInformation,
 )
 
+if TYPE_CHECKING:
+    from litellm_proxy._types import UserAPIKeyAuth
+else:
+    UserAPIKeyAuth = Any
+
+
 PAGERDUTY_DEFAULT_FAILURE_THRESHOLD = 60
 PAGERDUTY_DEFAULT_FAILURE_THRESHOLD_WINDOW_SECONDS = 60
 PAGERDUTY_DEFAULT_HANGING_THRESHOLD_SECONDS = 60
@@ -46,7 +51,7 @@ class PagerDutyAlerting(SlackAlerting):
     def __init__(
         self, alerting_args: Optional[Union[AlertingConfig, dict]] = None, **kwargs
     ):
-        from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
+        from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
 
         super().__init__()
         _api_key = os.getenv("PAGERDUTY_API_KEY")
diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
index f61321e53d..8edf1cbd39 100644
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@@ -18,10 +18,10 @@ from typing import (
 import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.proxy._types import LiteLLM_TeamTable, UserAPIKeyAuth
 from litellm.types.integrations.prometheus import *
 from litellm.types.utils import StandardLoggingPayload
 from litellm.utils import get_end_user_id_for_cost_tracking
+from litellm_proxy._types import LiteLLM_TeamTable, UserAPIKeyAuth
 
 if TYPE_CHECKING:
     from apscheduler.schedulers.asyncio import AsyncIOScheduler
@@ -38,7 +38,7 @@ class PrometheusLogger(CustomLogger):
         try:
             from prometheus_client import Counter, Gauge, Histogram
 
-            from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
+            from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
 
             if premium_user is not True:
                 verbose_logger.warning(
@@ -456,7 +456,7 @@ class PrometheusLogger(CustomLogger):
             and isinstance(user_api_key, str)
             and user_api_key.startswith("sk-")
         ):
-            from litellm.proxy.utils import hash_token
+            from litellm_proxy.utils import hash_token
 
             user_api_key = hash_token(user_api_key)
 
@@ -661,7 +661,7 @@ class PrometheusLogger(CustomLogger):
         kwargs: dict,
         metadata: dict,
     ):
-        from litellm.proxy.common_utils.callback_utils import (
+        from litellm_proxy.common_utils.callback_utils import (
             get_model_group_from_litellm_kwargs,
         )
 
@@ -1363,7 +1363,7 @@ class PrometheusLogger(CustomLogger):
             set_metrics_function: Function to set metrics for the fetched data.
             data_type: String representing the type of data ("teams" or "keys") for logging purposes.
         """
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
 
         if prisma_client is None:
             return
@@ -1398,10 +1398,10 @@ class PrometheusLogger(CustomLogger):
         """
         Initialize team budget metrics by reusing the generic pagination logic.
         """
-        from litellm.proxy.management_endpoints.team_endpoints import (
+        from litellm_proxy.management_endpoints.team_endpoints import (
             get_paginated_teams,
         )
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
 
         if prisma_client is None:
             verbose_logger.debug(
@@ -1432,10 +1432,10 @@ class PrometheusLogger(CustomLogger):
         from typing import Union
 
         from litellm.constants import UI_SESSION_TOKEN_TEAM_ID
-        from litellm.proxy.management_endpoints.key_management_endpoints import (
+        from litellm_proxy.management_endpoints.key_management_endpoints import (
             _list_key_helper,
         )
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
 
         if prisma_client is None:
             verbose_logger.debug(
@@ -1480,7 +1480,7 @@ class PrometheusLogger(CustomLogger):
         - If redis cache is not available, we initialize the metrics directly.
         """
         from litellm.constants import PROMETHEUS_EMIT_BUDGET_METRICS_JOB_NAME
-        from litellm.proxy.proxy_server import proxy_logging_obj
+        from litellm_proxy.proxy_server import proxy_logging_obj
 
         pod_lock_manager = proxy_logging_obj.db_spend_update_writer.pod_lock_manager
 
@@ -1561,8 +1561,8 @@ class PrometheusLogger(CustomLogger):
         Fields not available in metadata:
         - `budget_reset_at`
         """
-        from litellm.proxy.auth.auth_checks import get_team_object
-        from litellm.proxy.proxy_server import prisma_client, user_api_key_cache
+        from litellm_proxy.auth.auth_checks import get_team_object
+        from litellm_proxy.proxy_server import prisma_client, user_api_key_cache
 
         _total_team_spend = (spend or 0) + response_cost
         team_object = LiteLLM_TeamTable(
@@ -1711,8 +1711,8 @@ class PrometheusLogger(CustomLogger):
         """
         Assemble a UserAPIKeyAuth object
         """
-        from litellm.proxy.auth.auth_checks import get_key_object
-        from litellm.proxy.proxy_server import prisma_client, user_api_key_cache
+        from litellm_proxy.auth.auth_checks import get_key_object
+        from litellm_proxy.proxy_server import prisma_client, user_api_key_cache
 
         _total_key_spend = (key_spend or 0) + response_cost
         user_api_key_dict = UserAPIKeyAuth(
@@ -1803,8 +1803,8 @@ class PrometheusLogger(CustomLogger):
         from prometheus_client import make_asgi_app
 
         from litellm._logging import verbose_proxy_logger
-        from litellm.proxy._types import CommonProxyErrors
-        from litellm.proxy.proxy_server import app
+        from litellm_proxy._types import CommonProxyErrors
+        from litellm_proxy.proxy_server import app
 
         if premium_user is not True:
             verbose_proxy_logger.warning(
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index 77d4fd7d5d..6613c73963 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -28,7 +28,6 @@ from litellm._logging import _is_debugging_on, verbose_logger
 from litellm.batches.batch_utils import _handle_completed_batch
 from litellm.caching.caching import DualCache, InMemoryCache
 from litellm.caching.caching_handler import LLMCachingHandler
-
 from litellm.constants import (
     DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT,
     DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT,
@@ -2886,7 +2885,7 @@ def _init_custom_logger_compatible_class(  # noqa: PLR0915
             _in_memory_loggers.append(_otel_logger)
             return _otel_logger  # type: ignore
         elif logging_integration == "dynamic_rate_limiter":
-            from litellm.proxy.hooks.dynamic_rate_limiter import (
+            from litellm_proxy.hooks.dynamic_rate_limiter import (
                 _PROXY_DynamicRateLimitHandler,
             )
 
@@ -3074,7 +3073,7 @@ def get_custom_logger_compatible_class(  # noqa: PLR0915
                     return callback  # type: ignore
 
         elif logging_integration == "dynamic_rate_limiter":
-            from litellm.proxy.hooks.dynamic_rate_limiter import (
+            from litellm_proxy.hooks.dynamic_rate_limiter import (
                 _PROXY_DynamicRateLimitHandler,
             )
 
@@ -3130,7 +3129,7 @@ def _get_custom_logger_settings_from_proxy_server(callback_name: str) -> Dict:
         otel:
             message_logging: False
     """
-    from litellm.proxy.proxy_server import callback_settings
+    from litellm_proxy.proxy_server import callback_settings
 
     if callback_settings:
         return dict(callback_settings.get(callback_name, {}))
diff --git a/litellm/litellm_core_utils/prompt_templates/common_utils.py b/litellm/litellm_core_utils/prompt_templates/common_utils.py
index 963ab33f52..94a447a98d 100644
--- a/litellm/litellm_core_utils/prompt_templates/common_utils.py
+++ b/litellm/litellm_core_utils/prompt_templates/common_utils.py
@@ -342,7 +342,7 @@ def get_format_from_file_id(file_id: Optional[str]) -> Optional[str]:
     unified_file_id = litellm_proxy:{};unified_id,{}
     If not a unified file id, returns 'file' as default format
     """
-    from litellm.proxy.hooks.managed_files import _PROXY_LiteLLMManagedFiles
+    from litellm_proxy.hooks.managed_files import _PROXY_LiteLLMManagedFiles
 
     if not file_id:
         return None
diff --git a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
index ab335ca7c1..aa01990e02 100644
--- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
+++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
@@ -37,15 +37,15 @@ class AnthropicMessagesHandler:
         """Helper function to handle Anthropic streaming responses using the existing logging handlers"""
         from datetime import datetime
 
-        from litellm.proxy.pass_through_endpoints.streaming_handler import (
-            PassThroughStreamingHandler,
-        )
-        from litellm.proxy.pass_through_endpoints.success_handler import (
-            PassThroughEndpointLogging,
-        )
         from litellm.types.passthrough_endpoints.pass_through_endpoints import (
             EndpointType,
         )
+        from litellm_proxy.pass_through_endpoints.streaming_handler import (
+            PassThroughStreamingHandler,
+        )
+        from litellm_proxy.pass_through_endpoints.success_handler import (
+            PassThroughEndpointLogging,
+        )
 
         # Create success handler object
         passthrough_success_handler_obj = PassThroughEndpointLogging()
diff --git a/litellm/proxy/.gitignore b/litellm/proxy/.gitignore
deleted file mode 100644
index caa4783d90..0000000000
--- a/litellm/proxy/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-.env
-secrets.toml
\ No newline at end of file
diff --git a/litellm/router.py b/litellm/router.py
index dba886b856..269ac4556a 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -4550,7 +4550,7 @@ class Router:
         Each provider uses diff .env vars for pass-through endpoints, this helper uses the deployment credentials to set the .env vars for pass-through endpoints
         """
         if deployment.litellm_params.use_in_pass_through is True:
-            from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+            from litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints import (
                 passthrough_endpoint_router,
             )
 
diff --git a/litellm/router_strategy/budget_limiter.py b/litellm/router_strategy/budget_limiter.py
index 9e4001b67b..b82eeefbdc 100644
--- a/litellm/router_strategy/budget_limiter.py
+++ b/litellm/router_strategy/budget_limiter.py
@@ -10,11 +10,11 @@ This means you can use this with weighted-pick, lowest-latency, simple-shuffle,
 Example:
 ```
 openai:
-	budget_limit: 0.000000000001
-	time_period: 1d
+        budget_limit: 0.000000000001
+        time_period: 1d
 anthropic:
-	budget_limit: 100
-	time_period: 7d
+        budget_limit: 100
+        time_period: 7d
 ```
 """
 
@@ -53,9 +53,9 @@ class RouterBudgetLimiting(CustomLogger):
         self.dual_cache = dual_cache
         self.redis_increment_operation_queue: List[RedisPipelineIncrementOperation] = []
         asyncio.create_task(self.periodic_sync_in_memory_spend_with_redis())
-        self.provider_budget_config: Optional[
-            GenericBudgetConfigType
-        ] = provider_budget_config
+        self.provider_budget_config: Optional[GenericBudgetConfigType] = (
+            provider_budget_config
+        )
         self.deployment_budget_config: Optional[GenericBudgetConfigType] = None
         self.tag_budget_config: Optional[GenericBudgetConfigType] = None
         self._init_provider_budgets()
@@ -797,7 +797,7 @@ class RouterBudgetLimiting(CustomLogger):
     def _init_tag_budgets(self):
         if litellm.tag_budget_config is None:
             return
-        from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
+        from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
 
         if premium_user is not True:
             raise ValueError(
diff --git a/litellm/router_strategy/lowest_cost.py b/litellm/router_strategy/lowest_cost.py
index bd28f6dc5a..0f127e2f97 100644
--- a/litellm/router_strategy/lowest_cost.py
+++ b/litellm/router_strategy/lowest_cost.py
@@ -187,7 +187,7 @@ class LowestCostLoggingHandler(CustomLogger):
                     self.logged_success += 1
         except Exception as e:
             verbose_logger.exception(
-                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
+                "litellm_proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
                     str(e)
                 )
             )
diff --git a/litellm/router_strategy/lowest_latency.py b/litellm/router_strategy/lowest_latency.py
index 55ca98843d..418ca1d18f 100644
--- a/litellm/router_strategy/lowest_latency.py
+++ b/litellm/router_strategy/lowest_latency.py
@@ -170,7 +170,7 @@ class LowestLatencyLoggingHandler(CustomLogger):
                     self.logged_success += 1
         except Exception as e:
             verbose_logger.exception(
-                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
+                "litellm_proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
                     str(e)
                 )
             )
@@ -238,7 +238,7 @@ class LowestLatencyLoggingHandler(CustomLogger):
                 return
         except Exception as e:
             verbose_logger.exception(
-                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
+                "litellm_proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
                     str(e)
                 )
             )
diff --git a/litellm/router_strategy/lowest_tpm_rpm_v2.py b/litellm/router_strategy/lowest_tpm_rpm_v2.py
index 9e6c139314..a3a4e6945d 100644
--- a/litellm/router_strategy/lowest_tpm_rpm_v2.py
+++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py
@@ -270,7 +270,7 @@ class LowestTPMLoggingHandler_v2(BaseRoutingStrategy, CustomLogger):
                 self.logged_success += 1
         except Exception as e:
             verbose_logger.exception(
-                "litellm.proxy.hooks.lowest_tpm_rpm_v2.py::log_success_event(): Exception occured - {}".format(
+                "litellm_proxy.hooks.lowest_tpm_rpm_v2.py::log_success_event(): Exception occured - {}".format(
                     str(e)
                 )
             )
@@ -321,7 +321,7 @@ class LowestTPMLoggingHandler_v2(BaseRoutingStrategy, CustomLogger):
                 self.logged_success += 1
         except Exception as e:
             verbose_logger.exception(
-                "litellm.proxy.hooks.lowest_tpm_rpm_v2.py::async_log_success_event(): Exception occured - {}".format(
+                "litellm_proxy.hooks.lowest_tpm_rpm_v2.py::async_log_success_event(): Exception occured - {}".format(
                     str(e)
                 )
             )
diff --git a/litellm/secret_managers/aws_secret_manager.py b/litellm/secret_managers/aws_secret_manager.py
index fbe951e649..38a0d90c6a 100644
--- a/litellm/secret_managers/aws_secret_manager.py
+++ b/litellm/secret_managers/aws_secret_manager.py
@@ -4,7 +4,7 @@ This is a file for the AWS Secret Manager Integration
 Relevant issue: https://github.com/BerriAI/litellm/issues/1883
 
 Requires:
-* `os.environ["AWS_REGION_NAME"], 
+* `os.environ["AWS_REGION_NAME"],
 * `pip install boto3>=1.28.57`
 """
 
@@ -15,7 +15,7 @@ import re
 from typing import Any, Dict, Optional
 
 import litellm
-from litellm.proxy._types import KeyManagementSystem
+from litellm_proxy._types import KeyManagementSystem
 
 
 def validate_environment():
diff --git a/litellm/secret_managers/aws_secret_manager_v2.py b/litellm/secret_managers/aws_secret_manager_v2.py
index 327dbf3d19..9c5ddcec9f 100644
--- a/litellm/secret_managers/aws_secret_manager_v2.py
+++ b/litellm/secret_managers/aws_secret_manager_v2.py
@@ -9,7 +9,7 @@ Handles Async Operations for:
 Relevant issue: https://github.com/BerriAI/litellm/issues/1883
 
 Requires:
-* `os.environ["AWS_REGION_NAME"], 
+* `os.environ["AWS_REGION_NAME"],
 * `pip install boto3>=1.28.57`
 """
 
@@ -26,8 +26,8 @@ from litellm.llms.custom_httpx.http_handler import (
     _get_httpx_client,
     get_async_httpx_client,
 )
-from litellm.proxy._types import KeyManagementSystem
 from litellm.types.llms.custom_http import httpxSpecialProvider
+from litellm_proxy._types import KeyManagementSystem
 
 from .base_secret_manager import BaseSecretManager
 
diff --git a/litellm/secret_managers/google_kms.py b/litellm/secret_managers/google_kms.py
index 18e25abeb2..419e0eb419 100644
--- a/litellm/secret_managers/google_kms.py
+++ b/litellm/secret_managers/google_kms.py
@@ -12,7 +12,7 @@ import os
 from typing import Optional
 
 import litellm
-from litellm.proxy._types import KeyManagementSystem
+from litellm_proxy._types import KeyManagementSystem
 
 
 def validate_environment():
diff --git a/litellm/secret_managers/google_secret_manager.py b/litellm/secret_managers/google_secret_manager.py
index 2fd35ced6e..bedd2f201c 100644
--- a/litellm/secret_managers/google_secret_manager.py
+++ b/litellm/secret_managers/google_secret_manager.py
@@ -8,7 +8,7 @@ from litellm.caching.caching import InMemoryCache
 from litellm.constants import SECRET_MANAGER_REFRESH_INTERVAL
 from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
 from litellm.llms.custom_httpx.http_handler import _get_httpx_client
-from litellm.proxy._types import CommonProxyErrors, KeyManagementSystem
+from litellm_proxy._types import CommonProxyErrors, KeyManagementSystem
 
 
 class GoogleSecretManager(GCSBucketBase):
@@ -22,7 +22,7 @@ class GoogleSecretManager(GCSBucketBase):
             refresh_interval (int, optional): The refresh interval in seconds. Defaults to 86400. (24 hours)
             always_read_secret_manager (bool, optional): Whether to always read from the secret manager. Defaults to False. Since we do want to cache values
         """
-        from litellm.proxy.proxy_server import premium_user
+        from litellm_proxy.proxy_server import premium_user
 
         if premium_user is not True:
             raise ValueError(
diff --git a/litellm/secret_managers/hashicorp_secret_manager.py b/litellm/secret_managers/hashicorp_secret_manager.py
index e5911ffa9b..dd93eeb34e 100644
--- a/litellm/secret_managers/hashicorp_secret_manager.py
+++ b/litellm/secret_managers/hashicorp_secret_manager.py
@@ -12,14 +12,14 @@ from litellm.llms.custom_httpx.http_handler import (
     get_async_httpx_client,
     httpxSpecialProvider,
 )
-from litellm.proxy._types import KeyManagementSystem
+from litellm_proxy._types import KeyManagementSystem
 
 from .base_secret_manager import BaseSecretManager
 
 
 class HashicorpSecretManager(BaseSecretManager):
     def __init__(self):
-        from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
+        from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
 
         # Vault-specific config
         self.vault_addr = os.getenv("HCP_VAULT_ADDR", "http://127.0.0.1:8200")
diff --git a/litellm/secret_managers/main.py b/litellm/secret_managers/main.py
index e505484b4b..649671522b 100644
--- a/litellm/secret_managers/main.py
+++ b/litellm/secret_managers/main.py
@@ -11,7 +11,7 @@ import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.caching.caching import DualCache
 from litellm.llms.custom_httpx.http_handler import HTTPHandler
-from litellm.proxy._types import KeyManagementSystem
+from litellm.types.utils import KeyManagementSystem
 
 oidc_cache = DualCache()
 
diff --git a/litellm/types/integrations/opentelemetry.py b/litellm/types/integrations/opentelemetry.py
new file mode 100644
index 0000000000..28205b82dc
--- /dev/null
+++ b/litellm/types/integrations/opentelemetry.py
@@ -0,0 +1,49 @@
+import enum
+
+
+class SpanAttributes(str, enum.Enum):
+    # Note: We've taken this from opentelemetry-semantic-conventions-ai
+    # I chose to not add a new dependency to litellm for this
+
+    # Semantic Conventions for LLM requests, this needs to be removed after
+    # OpenTelemetry Semantic Conventions support Gen AI.
+    # Issue at https://github.com/open-telemetry/opentelemetry-python/issues/3868
+    # Refer to https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/llm-spans.md
+
+    LLM_SYSTEM = "gen_ai.system"
+    LLM_REQUEST_MODEL = "gen_ai.request.model"
+    LLM_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
+    LLM_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
+    LLM_REQUEST_TOP_P = "gen_ai.request.top_p"
+    LLM_PROMPTS = "gen_ai.prompt"
+    LLM_COMPLETIONS = "gen_ai.completion"
+    LLM_RESPONSE_MODEL = "gen_ai.response.model"
+    LLM_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens"
+    LLM_USAGE_PROMPT_TOKENS = "gen_ai.usage.prompt_tokens"
+    LLM_TOKEN_TYPE = "gen_ai.token.type"
+    # To be added
+    # LLM_RESPONSE_FINISH_REASON = "gen_ai.response.finish_reasons"
+    # LLM_RESPONSE_ID = "gen_ai.response.id"
+
+    # LLM
+    LLM_REQUEST_TYPE = "llm.request.type"
+    LLM_USAGE_TOTAL_TOKENS = "llm.usage.total_tokens"
+    LLM_USAGE_TOKEN_TYPE = "llm.usage.token_type"
+    LLM_USER = "llm.user"
+    LLM_HEADERS = "llm.headers"
+    LLM_TOP_K = "llm.top_k"
+    LLM_IS_STREAMING = "llm.is_streaming"
+    LLM_FREQUENCY_PENALTY = "llm.frequency_penalty"
+    LLM_PRESENCE_PENALTY = "llm.presence_penalty"
+    LLM_CHAT_STOP_SEQUENCES = "llm.chat.stop_sequences"
+    LLM_REQUEST_FUNCTIONS = "llm.request.functions"
+    LLM_REQUEST_REPETITION_PENALTY = "llm.request.repetition_penalty"
+    LLM_RESPONSE_FINISH_REASON = "llm.response.finish_reason"
+    LLM_RESPONSE_STOP_REASON = "llm.response.stop_reason"
+    LLM_CONTENT_COMPLETION_CHUNK = "llm.content.completion.chunk"
+
+    # OpenAI
+    LLM_OPENAI_RESPONSE_SYSTEM_FINGERPRINT = "gen_ai.openai.system_fingerprint"
+    LLM_OPENAI_API_BASE = "gen_ai.openai.api_base"
+    LLM_OPENAI_API_VERSION = "gen_ai.openai.api_version"
+    LLM_OPENAI_API_TYPE = "gen_ai.openai.api_type"
diff --git a/litellm/types/proxy/management_endpoints/internal_user_endpoints.py b/litellm/types/proxy/management_endpoints/internal_user_endpoints.py
index 5c2c5bf371..bacb1456c7 100644
--- a/litellm/types/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/types/proxy/management_endpoints/internal_user_endpoints.py
@@ -3,7 +3,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
 from fastapi import HTTPException
 from pydantic import BaseModel, EmailStr
 
-from litellm.proxy._types import LiteLLM_UserTableWithKeyCount
+from litellm_proxy._types import LiteLLM_UserTableWithKeyCount
 
 
 class UserListResponse(BaseModel):
diff --git a/litellm/types/proxy/management_endpoints/ui_sso.py b/litellm/types/proxy/management_endpoints/ui_sso.py
index 0e6f8739fa..d0adbb663f 100644
--- a/litellm/types/proxy/management_endpoints/ui_sso.py
+++ b/litellm/types/proxy/management_endpoints/ui_sso.py
@@ -2,7 +2,7 @@ from typing import List, Literal, Optional, TypedDict
 
 from pydantic import Field
 
-from litellm.proxy._types import LiteLLMPydanticObjectBase, LitellmUserRoles
+from litellm.types.utils import LiteLLMPydanticObjectBase
 
 
 class MicrosoftGraphAPIUserGroupDirectoryObject(TypedDict, total=False):
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index 532162e60f..526a097606 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -1,3 +1,4 @@
+import enum
 import json
 import time
 import uuid
@@ -2262,3 +2263,61 @@ class SpecialEnums(Enum):
 LLMResponseTypes = Union[
     ModelResponse, EmbeddingResponse, ImageResponse, OpenAIFileObject
 ]
+
+
+AllowedModelRegion = Literal["eu", "us"]
+
+
+class KeyManagementSystem(enum.Enum):
+    GOOGLE_KMS = "google_kms"
+    AZURE_KEY_VAULT = "azure_key_vault"
+    AWS_SECRET_MANAGER = "aws_secret_manager"
+    GOOGLE_SECRET_MANAGER = "google_secret_manager"
+    HASHICORP_VAULT = "hashicorp_vault"
+    LOCAL = "local"
+    AWS_KMS = "aws_kms"
+
+
+class KeyManagementSettings(LiteLLMPydanticObjectBase):
+    hosted_keys: Optional[List] = None
+    store_virtual_keys: Optional[bool] = False
+    """
+    If True, virtual keys created by litellm will be stored in the secret manager
+    """
+    prefix_for_stored_virtual_keys: str = "litellm/"
+    """
+    If set, this prefix will be used for stored virtual keys in the secret manager
+    """
+
+    access_mode: Literal["read_only", "write_only", "read_and_write"] = "read_only"
+    """
+    Access mode for the secret manager, when write_only will only use for writing secrets
+    """
+
+    primary_secret_name: Optional[str] = None
+    """
+    If set, will read secrets from this primary secret in the secret manager
+
+    eg. on AWS you can store multiple secret values as K/V pairs in a single secret
+    """
+
+
+class LiteLLM_UpperboundKeyGenerateParams(LiteLLMPydanticObjectBase):
+    """
+    Set default upperbound to max budget a key called via `/key/generate` can be.
+
+    Args:
+        max_budget (Optional[float], optional): Max budget a key can be. Defaults to None.
+        budget_duration (Optional[str], optional): Duration of the budget. Defaults to None.
+        duration (Optional[str], optional): Duration of the key. Defaults to None.
+        max_parallel_requests (Optional[int], optional): Max number of requests that can be made in parallel. Defaults to None.
+        tpm_limit (Optional[int], optional): Tpm limit. Defaults to None.
+        rpm_limit (Optional[int], optional): Rpm limit. Defaults to None.
+    """
+
+    max_budget: Optional[float] = None
+    budget_duration: Optional[str] = None
+    duration: Optional[str] = None
+    max_parallel_requests: Optional[int] = None
+    tpm_limit: Optional[int] = None
+    rpm_limit: Optional[int] = None
diff --git a/litellm/utils.py b/litellm/utils.py
index 98a9c34b47..b4b338f52a 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -259,13 +259,13 @@ from .exceptions import (
     UnprocessableEntityError,
     UnsupportedParamsError,
 )
-from .proxy._types import AllowedModelRegion, KeyManagementSystem
 from .types.llms.openai import (
     ChatCompletionDeltaToolCallChunk,
     ChatCompletionToolCallChunk,
     ChatCompletionToolCallFunctionChunk,
 )
 from .types.router import LiteLLM_Params
+from .types.utils import AllowedModelRegion, KeyManagementSystem
 
 ####### ENVIRONMENT VARIABLES ####################
 # Adjust to your specific application needs / system capabilities.
diff --git a/litellm/proxy/README.md b/litellm_proxy/README.md
similarity index 100%
rename from litellm/proxy/README.md
rename to litellm_proxy/README.md
diff --git a/litellm/proxy/__init__.py b/litellm_proxy/__init__.py
similarity index 100%
rename from litellm/proxy/__init__.py
rename to litellm_proxy/__init__.py
diff --git a/litellm/proxy/_experimental/mcp_server/mcp_server_manager.py b/litellm_proxy/_experimental/mcp_server/mcp_server_manager.py
similarity index 100%
rename from litellm/proxy/_experimental/mcp_server/mcp_server_manager.py
rename to litellm_proxy/_experimental/mcp_server/mcp_server_manager.py
diff --git a/litellm/proxy/_experimental/mcp_server/server.py b/litellm_proxy/_experimental/mcp_server/server.py
similarity index 98%
rename from litellm/proxy/_experimental/mcp_server/server.py
rename to litellm_proxy/_experimental/mcp_server/server.py
index fe1eccb048..cd90731857 100644
--- a/litellm/proxy/_experimental/mcp_server/server.py
+++ b/litellm_proxy/_experimental/mcp_server/server.py
@@ -13,8 +13,8 @@ from pydantic import ConfigDict, ValidationError
 from litellm._logging import verbose_logger
 from litellm.constants import MCP_TOOL_NAME_PREFIX
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
 from litellm.types.mcp_server.mcp_server_manager import MCPInfo
 from litellm.types.utils import StandardLoggingMCPToolCall
 from litellm.utils import client
@@ -288,7 +288,7 @@ if MCP_AVAILABLE:
         """
         REST API to call a specific MCP tool with the provided arguments
         """
-        from litellm.proxy.proxy_server import add_litellm_data_to_request, proxy_config
+        from litellm_proxy.proxy_server import add_litellm_data_to_request, proxy_config
 
         data = await request.json()
         data = await add_litellm_data_to_request(
diff --git a/litellm/proxy/_experimental/mcp_server/sse_transport.py b/litellm_proxy/_experimental/mcp_server/sse_transport.py
similarity index 100%
rename from litellm/proxy/_experimental/mcp_server/sse_transport.py
rename to litellm_proxy/_experimental/mcp_server/sse_transport.py
diff --git a/litellm/proxy/_experimental/mcp_server/tool_registry.py b/litellm_proxy/_experimental/mcp_server/tool_registry.py
similarity index 98%
rename from litellm/proxy/_experimental/mcp_server/tool_registry.py
rename to litellm_proxy/_experimental/mcp_server/tool_registry.py
index c08b797968..e612953c17 100644
--- a/litellm/proxy/_experimental/mcp_server/tool_registry.py
+++ b/litellm_proxy/_experimental/mcp_server/tool_registry.py
@@ -2,7 +2,7 @@ import json
 from typing import Any, Callable, Dict, List, Optional
 
 from litellm._logging import verbose_logger
-from litellm.proxy.types_utils.utils import get_instance_fn
+from litellm_proxy.types_utils.utils import get_instance_fn
 from litellm.types.mcp_server.tool_registry import MCPTool
 
 
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/117-1c5bfc45bfc4237d.js b/litellm_proxy/_experimental/out/_next/static/chunks/117-1c5bfc45bfc4237d.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/117-1c5bfc45bfc4237d.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/117-1c5bfc45bfc4237d.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/13b76428-ebdf3012af0e4489.js b/litellm_proxy/_experimental/out/_next/static/chunks/13b76428-ebdf3012af0e4489.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/13b76428-ebdf3012af0e4489.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/13b76428-ebdf3012af0e4489.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/250-7d480872c0e251dc.js b/litellm_proxy/_experimental/out/_next/static/chunks/250-7d480872c0e251dc.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/250-7d480872c0e251dc.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/250-7d480872c0e251dc.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/261-ee7f0f1f1c8c22a0.js b/litellm_proxy/_experimental/out/_next/static/chunks/261-ee7f0f1f1c8c22a0.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/261-ee7f0f1f1c8c22a0.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/261-ee7f0f1f1c8c22a0.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/3014691f-b7b79b78e27792f3.js b/litellm_proxy/_experimental/out/_next/static/chunks/3014691f-b7b79b78e27792f3.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/3014691f-b7b79b78e27792f3.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/3014691f-b7b79b78e27792f3.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/42-69f5b4e6a9942a9f.js b/litellm_proxy/_experimental/out/_next/static/chunks/42-69f5b4e6a9942a9f.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/42-69f5b4e6a9942a9f.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/42-69f5b4e6a9942a9f.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/466-65538e7f331af98e.js b/litellm_proxy/_experimental/out/_next/static/chunks/466-65538e7f331af98e.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/466-65538e7f331af98e.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/466-65538e7f331af98e.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/699-2176ba2273e4676d.js b/litellm_proxy/_experimental/out/_next/static/chunks/699-2176ba2273e4676d.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/699-2176ba2273e4676d.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/699-2176ba2273e4676d.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/899-57685cedd1dcbc78.js b/litellm_proxy/_experimental/out/_next/static/chunks/899-57685cedd1dcbc78.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/899-57685cedd1dcbc78.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/899-57685cedd1dcbc78.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/app/_not-found/page-3b0daafcbe368586.js b/litellm_proxy/_experimental/out/_next/static/chunks/app/_not-found/page-3b0daafcbe368586.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/app/_not-found/page-3b0daafcbe368586.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/app/_not-found/page-3b0daafcbe368586.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-311f9b6ff79980ae.js b/litellm_proxy/_experimental/out/_next/static/chunks/app/layout-311f9b6ff79980ae.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/app/layout-311f9b6ff79980ae.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/app/layout-311f9b6ff79980ae.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-a965e43ba9638156.js b/litellm_proxy/_experimental/out/_next/static/chunks/app/model_hub/page-a965e43ba9638156.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-a965e43ba9638156.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/app/model_hub/page-a965e43ba9638156.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-9598003bc1e91371.js b/litellm_proxy/_experimental/out/_next/static/chunks/app/onboarding/page-9598003bc1e91371.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-9598003bc1e91371.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/app/onboarding/page-9598003bc1e91371.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-36914b80c40b5032.js b/litellm_proxy/_experimental/out/_next/static/chunks/app/page-36914b80c40b5032.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/app/page-36914b80c40b5032.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/app/page-36914b80c40b5032.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/fd9d1056-205af899b895cbac.js b/litellm_proxy/_experimental/out/_next/static/chunks/fd9d1056-205af899b895cbac.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/fd9d1056-205af899b895cbac.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/fd9d1056-205af899b895cbac.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/framework-b370f160bb96059c.js b/litellm_proxy/_experimental/out/_next/static/chunks/framework-b370f160bb96059c.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/framework-b370f160bb96059c.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/framework-b370f160bb96059c.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/main-8ee698634884314e.js b/litellm_proxy/_experimental/out/_next/static/chunks/main-8ee698634884314e.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/main-8ee698634884314e.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/main-8ee698634884314e.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/main-app-2b16cdb7ff4e1af7.js b/litellm_proxy/_experimental/out/_next/static/chunks/main-app-2b16cdb7ff4e1af7.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/main-app-2b16cdb7ff4e1af7.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/main-app-2b16cdb7ff4e1af7.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/pages/_app-15e2daefa259f0b5.js b/litellm_proxy/_experimental/out/_next/static/chunks/pages/_app-15e2daefa259f0b5.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/pages/_app-15e2daefa259f0b5.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/pages/_app-15e2daefa259f0b5.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/pages/_error-28b803cb2479b966.js b/litellm_proxy/_experimental/out/_next/static/chunks/pages/_error-28b803cb2479b966.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/pages/_error-28b803cb2479b966.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/pages/_error-28b803cb2479b966.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/polyfills-42372ed130431b0a.js b/litellm_proxy/_experimental/out/_next/static/chunks/polyfills-42372ed130431b0a.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/polyfills-42372ed130431b0a.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/polyfills-42372ed130431b0a.js
diff --git a/litellm/proxy/_experimental/out/_next/static/chunks/webpack-75a5453f51d60261.js b/litellm_proxy/_experimental/out/_next/static/chunks/webpack-75a5453f51d60261.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/chunks/webpack-75a5453f51d60261.js
rename to litellm_proxy/_experimental/out/_next/static/chunks/webpack-75a5453f51d60261.js
diff --git a/litellm/proxy/_experimental/out/_next/static/css/005c96178151b9fd.css b/litellm_proxy/_experimental/out/_next/static/css/005c96178151b9fd.css
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/css/005c96178151b9fd.css
rename to litellm_proxy/_experimental/out/_next/static/css/005c96178151b9fd.css
diff --git a/litellm/proxy/_experimental/out/_next/static/css/86f6cc749f6b8493.css b/litellm_proxy/_experimental/out/_next/static/css/86f6cc749f6b8493.css
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/css/86f6cc749f6b8493.css
rename to litellm_proxy/_experimental/out/_next/static/css/86f6cc749f6b8493.css
diff --git a/litellm/proxy/_experimental/out/_next/static/fzhvjOFL6KeNsWYrLD4ya/_buildManifest.js b/litellm_proxy/_experimental/out/_next/static/fzhvjOFL6KeNsWYrLD4ya/_buildManifest.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/fzhvjOFL6KeNsWYrLD4ya/_buildManifest.js
rename to litellm_proxy/_experimental/out/_next/static/fzhvjOFL6KeNsWYrLD4ya/_buildManifest.js
diff --git a/litellm/proxy/_experimental/out/_next/static/fzhvjOFL6KeNsWYrLD4ya/_ssgManifest.js b/litellm_proxy/_experimental/out/_next/static/fzhvjOFL6KeNsWYrLD4ya/_ssgManifest.js
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/fzhvjOFL6KeNsWYrLD4ya/_ssgManifest.js
rename to litellm_proxy/_experimental/out/_next/static/fzhvjOFL6KeNsWYrLD4ya/_ssgManifest.js
diff --git a/litellm/proxy/_experimental/out/_next/static/media/26a46d62cd723877-s.woff2 b/litellm_proxy/_experimental/out/_next/static/media/26a46d62cd723877-s.woff2
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/media/26a46d62cd723877-s.woff2
rename to litellm_proxy/_experimental/out/_next/static/media/26a46d62cd723877-s.woff2
diff --git a/litellm/proxy/_experimental/out/_next/static/media/55c55f0601d81cf3-s.woff2 b/litellm_proxy/_experimental/out/_next/static/media/55c55f0601d81cf3-s.woff2
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/media/55c55f0601d81cf3-s.woff2
rename to litellm_proxy/_experimental/out/_next/static/media/55c55f0601d81cf3-s.woff2
diff --git a/litellm/proxy/_experimental/out/_next/static/media/581909926a08bbc8-s.woff2 b/litellm_proxy/_experimental/out/_next/static/media/581909926a08bbc8-s.woff2
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/media/581909926a08bbc8-s.woff2
rename to litellm_proxy/_experimental/out/_next/static/media/581909926a08bbc8-s.woff2
diff --git a/litellm/proxy/_experimental/out/_next/static/media/6d93bde91c0c2823-s.woff2 b/litellm_proxy/_experimental/out/_next/static/media/6d93bde91c0c2823-s.woff2
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/media/6d93bde91c0c2823-s.woff2
rename to litellm_proxy/_experimental/out/_next/static/media/6d93bde91c0c2823-s.woff2
diff --git a/litellm/proxy/_experimental/out/_next/static/media/97e0cb1ae144a2a9-s.woff2 b/litellm_proxy/_experimental/out/_next/static/media/97e0cb1ae144a2a9-s.woff2
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/media/97e0cb1ae144a2a9-s.woff2
rename to litellm_proxy/_experimental/out/_next/static/media/97e0cb1ae144a2a9-s.woff2
diff --git a/litellm/proxy/_experimental/out/_next/static/media/a34f9d1faa5f3315-s.p.woff2 b/litellm_proxy/_experimental/out/_next/static/media/a34f9d1faa5f3315-s.p.woff2
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/media/a34f9d1faa5f3315-s.p.woff2
rename to litellm_proxy/_experimental/out/_next/static/media/a34f9d1faa5f3315-s.p.woff2
diff --git a/litellm/proxy/_experimental/out/_next/static/media/df0a9ae256c0569c-s.woff2 b/litellm_proxy/_experimental/out/_next/static/media/df0a9ae256c0569c-s.woff2
similarity index 100%
rename from litellm/proxy/_experimental/out/_next/static/media/df0a9ae256c0569c-s.woff2
rename to litellm_proxy/_experimental/out/_next/static/media/df0a9ae256c0569c-s.woff2
diff --git a/litellm/proxy/_experimental/out/assets/logos/anthropic.svg b/litellm_proxy/_experimental/out/assets/logos/anthropic.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/anthropic.svg
rename to litellm_proxy/_experimental/out/assets/logos/anthropic.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/assemblyai_small.png b/litellm_proxy/_experimental/out/assets/logos/assemblyai_small.png
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/assemblyai_small.png
rename to litellm_proxy/_experimental/out/assets/logos/assemblyai_small.png
diff --git a/litellm/proxy/_experimental/out/assets/logos/aws.svg b/litellm_proxy/_experimental/out/assets/logos/aws.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/aws.svg
rename to litellm_proxy/_experimental/out/assets/logos/aws.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/bedrock.svg b/litellm_proxy/_experimental/out/assets/logos/bedrock.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/bedrock.svg
rename to litellm_proxy/_experimental/out/assets/logos/bedrock.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/cerebras.svg b/litellm_proxy/_experimental/out/assets/logos/cerebras.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/cerebras.svg
rename to litellm_proxy/_experimental/out/assets/logos/cerebras.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/cohere.svg b/litellm_proxy/_experimental/out/assets/logos/cohere.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/cohere.svg
rename to litellm_proxy/_experimental/out/assets/logos/cohere.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/databricks.svg b/litellm_proxy/_experimental/out/assets/logos/databricks.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/databricks.svg
rename to litellm_proxy/_experimental/out/assets/logos/databricks.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/deepseek.svg b/litellm_proxy/_experimental/out/assets/logos/deepseek.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/deepseek.svg
rename to litellm_proxy/_experimental/out/assets/logos/deepseek.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/fireworks.svg b/litellm_proxy/_experimental/out/assets/logos/fireworks.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/fireworks.svg
rename to litellm_proxy/_experimental/out/assets/logos/fireworks.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/google.svg b/litellm_proxy/_experimental/out/assets/logos/google.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/google.svg
rename to litellm_proxy/_experimental/out/assets/logos/google.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/groq.svg b/litellm_proxy/_experimental/out/assets/logos/groq.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/groq.svg
rename to litellm_proxy/_experimental/out/assets/logos/groq.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/microsoft_azure.svg b/litellm_proxy/_experimental/out/assets/logos/microsoft_azure.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/microsoft_azure.svg
rename to litellm_proxy/_experimental/out/assets/logos/microsoft_azure.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/mistral.svg b/litellm_proxy/_experimental/out/assets/logos/mistral.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/mistral.svg
rename to litellm_proxy/_experimental/out/assets/logos/mistral.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/ollama.svg b/litellm_proxy/_experimental/out/assets/logos/ollama.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/ollama.svg
rename to litellm_proxy/_experimental/out/assets/logos/ollama.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/openai_small.svg b/litellm_proxy/_experimental/out/assets/logos/openai_small.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/openai_small.svg
rename to litellm_proxy/_experimental/out/assets/logos/openai_small.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/openrouter.svg b/litellm_proxy/_experimental/out/assets/logos/openrouter.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/openrouter.svg
rename to litellm_proxy/_experimental/out/assets/logos/openrouter.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/perplexity-ai.svg b/litellm_proxy/_experimental/out/assets/logos/perplexity-ai.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/perplexity-ai.svg
rename to litellm_proxy/_experimental/out/assets/logos/perplexity-ai.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/sambanova.svg b/litellm_proxy/_experimental/out/assets/logos/sambanova.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/sambanova.svg
rename to litellm_proxy/_experimental/out/assets/logos/sambanova.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/togetherai.svg b/litellm_proxy/_experimental/out/assets/logos/togetherai.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/togetherai.svg
rename to litellm_proxy/_experimental/out/assets/logos/togetherai.svg
diff --git a/litellm/proxy/_experimental/out/assets/logos/xai.svg b/litellm_proxy/_experimental/out/assets/logos/xai.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/assets/logos/xai.svg
rename to litellm_proxy/_experimental/out/assets/logos/xai.svg
diff --git a/litellm/proxy/_experimental/out/favicon.ico b/litellm_proxy/_experimental/out/favicon.ico
similarity index 100%
rename from litellm/proxy/_experimental/out/favicon.ico
rename to litellm_proxy/_experimental/out/favicon.ico
diff --git a/litellm/proxy/_experimental/out/index.html b/litellm_proxy/_experimental/out/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/index.html
rename to litellm_proxy/_experimental/out/index.html
diff --git a/litellm/proxy/_experimental/out/index.txt b/litellm_proxy/_experimental/out/index.txt
similarity index 100%
rename from litellm/proxy/_experimental/out/index.txt
rename to litellm_proxy/_experimental/out/index.txt
diff --git a/litellm/proxy/_experimental/out/model_hub.txt b/litellm_proxy/_experimental/out/model_hub.txt
similarity index 100%
rename from litellm/proxy/_experimental/out/model_hub.txt
rename to litellm_proxy/_experimental/out/model_hub.txt
diff --git a/litellm/proxy/_experimental/out/next.svg b/litellm_proxy/_experimental/out/next.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/next.svg
rename to litellm_proxy/_experimental/out/next.svg
diff --git a/litellm/proxy/_experimental/out/onboarding.txt b/litellm_proxy/_experimental/out/onboarding.txt
similarity index 100%
rename from litellm/proxy/_experimental/out/onboarding.txt
rename to litellm_proxy/_experimental/out/onboarding.txt
diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm_proxy/_experimental/out/onboarding/index.html
similarity index 100%
rename from litellm/proxy/_experimental/out/onboarding.html
rename to litellm_proxy/_experimental/out/onboarding/index.html
diff --git a/litellm/proxy/_experimental/out/vercel.svg b/litellm_proxy/_experimental/out/vercel.svg
similarity index 100%
rename from litellm/proxy/_experimental/out/vercel.svg
rename to litellm_proxy/_experimental/out/vercel.svg
diff --git a/litellm/proxy/_experimental/post_call_rules.py b/litellm_proxy/_experimental/post_call_rules.py
similarity index 100%
rename from litellm/proxy/_experimental/post_call_rules.py
rename to litellm_proxy/_experimental/post_call_rules.py
diff --git a/litellm/proxy/_logging.py b/litellm_proxy/_logging.py
similarity index 100%
rename from litellm/proxy/_logging.py
rename to litellm_proxy/_logging.py
diff --git a/litellm/proxy/_new_new_secret_config.yaml b/litellm_proxy/_new_new_secret_config.yaml
similarity index 100%
rename from litellm/proxy/_new_new_secret_config.yaml
rename to litellm_proxy/_new_new_secret_config.yaml
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm_proxy/_new_secret_config.yaml
similarity index 100%
rename from litellm/proxy/_new_secret_config.yaml
rename to litellm_proxy/_new_secret_config.yaml
diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm_proxy/_super_secret_config.yaml
similarity index 100%
rename from litellm/proxy/_super_secret_config.yaml
rename to litellm_proxy/_super_secret_config.yaml
diff --git a/litellm/proxy/_types.py b/litellm_proxy/_types.py
similarity index 95%
rename from litellm/proxy/_types.py
rename to litellm_proxy/_types.py
index 354f6bb54c..11bec0d248 100644
--- a/litellm/proxy/_types.py
+++ b/litellm_proxy/_types.py
@@ -33,6 +33,10 @@ from litellm.types.utils import (
     StandardLoggingPayloadStatus,
     StandardPassThroughResponseObject,
     TextCompletionResponse,
+    AllowedModelRegion,
+    KeyManagementSystem,
+    KeyManagementSettings, # noqa: F401
+    LiteLLM_UpperboundKeyGenerateParams # noqa: F401
 )
 
 from .types_utils.utils import get_instance_fn, validate_custom_validate_return_type
@@ -171,27 +175,6 @@ def hash_token(token: str):
     return hashed_token
 
 
-class LiteLLM_UpperboundKeyGenerateParams(LiteLLMPydanticObjectBase):
-    """
-    Set default upperbound to max budget a key called via `/key/generate` can be.
-
-    Args:
-        max_budget (Optional[float], optional): Max budget a key can be. Defaults to None.
-        budget_duration (Optional[str], optional): Duration of the budget. Defaults to None.
-        duration (Optional[str], optional): Duration of the key. Defaults to None.
-        max_parallel_requests (Optional[int], optional): Max number of requests that can be made in parallel. Defaults to None.
-        tpm_limit (Optional[int], optional): Tpm limit. Defaults to None.
-        rpm_limit (Optional[int], optional): Rpm limit. Defaults to None.
-    """
-
-    max_budget: Optional[float] = None
-    budget_duration: Optional[str] = None
-    duration: Optional[str] = None
-    max_parallel_requests: Optional[int] = None
-    tpm_limit: Optional[int] = None
-    rpm_limit: Optional[int] = None
-
-
 class KeyManagementRoutes(str, enum.Enum):
     """
     Enum for key management routes
@@ -855,9 +838,6 @@ class DeleteUserRequest(LiteLLMPydanticObjectBase):
     user_ids: List[str]  # required
 
 
-AllowedModelRegion = Literal["eu", "us"]
-
-
 class BudgetNewRequest(LiteLLMPydanticObjectBase):
     budget_id: Optional[str] = Field(default=None, description="The unique budget id.")
     max_budget: Optional[float] = Field(
@@ -1238,41 +1218,6 @@ class OrganizationRequest(LiteLLMPydanticObjectBase):
 class DeleteOrganizationRequest(LiteLLMPydanticObjectBase):
     organization_ids: List[str]  # required
 
-
-class KeyManagementSystem(enum.Enum):
-    GOOGLE_KMS = "google_kms"
-    AZURE_KEY_VAULT = "azure_key_vault"
-    AWS_SECRET_MANAGER = "aws_secret_manager"
-    GOOGLE_SECRET_MANAGER = "google_secret_manager"
-    HASHICORP_VAULT = "hashicorp_vault"
-    LOCAL = "local"
-    AWS_KMS = "aws_kms"
-
-
-class KeyManagementSettings(LiteLLMPydanticObjectBase):
-    hosted_keys: Optional[List] = None
-    store_virtual_keys: Optional[bool] = False
-    """
-    If True, virtual keys created by litellm will be stored in the secret manager
-    """
-    prefix_for_stored_virtual_keys: str = "litellm/"
-    """
-    If set, this prefix will be used for stored virtual keys in the secret manager
-    """
-
-    access_mode: Literal["read_only", "write_only", "read_and_write"] = "read_only"
-    """
-    Access mode for the secret manager, when write_only will only use for writing secrets
-    """
-
-    primary_secret_name: Optional[str] = None
-    """
-    If set, will read secrets from this primary secret in the secret manager
-
-    eg. on AWS you can store multiple secret values as K/V pairs in a single secret
-    """
-
-
 class TeamDefaultSettings(LiteLLMPydanticObjectBase):
     team_id: str
 
@@ -2014,54 +1959,6 @@ class SpendLogsPayload(TypedDict):
     response: Optional[Union[str, list, dict]]
 
 
-class SpanAttributes(str, enum.Enum):
-    # Note: We've taken this from opentelemetry-semantic-conventions-ai
-    # I chose to not add a new dependency to litellm for this
-
-    # Semantic Conventions for LLM requests, this needs to be removed after
-    # OpenTelemetry Semantic Conventions support Gen AI.
-    # Issue at https://github.com/open-telemetry/opentelemetry-python/issues/3868
-    # Refer to https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/llm-spans.md
-
-    LLM_SYSTEM = "gen_ai.system"
-    LLM_REQUEST_MODEL = "gen_ai.request.model"
-    LLM_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
-    LLM_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
-    LLM_REQUEST_TOP_P = "gen_ai.request.top_p"
-    LLM_PROMPTS = "gen_ai.prompt"
-    LLM_COMPLETIONS = "gen_ai.completion"
-    LLM_RESPONSE_MODEL = "gen_ai.response.model"
-    LLM_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens"
-    LLM_USAGE_PROMPT_TOKENS = "gen_ai.usage.prompt_tokens"
-    LLM_TOKEN_TYPE = "gen_ai.token.type"
-    # To be added
-    # LLM_RESPONSE_FINISH_REASON = "gen_ai.response.finish_reasons"
-    # LLM_RESPONSE_ID = "gen_ai.response.id"
-
-    # LLM
-    LLM_REQUEST_TYPE = "llm.request.type"
-    LLM_USAGE_TOTAL_TOKENS = "llm.usage.total_tokens"
-    LLM_USAGE_TOKEN_TYPE = "llm.usage.token_type"
-    LLM_USER = "llm.user"
-    LLM_HEADERS = "llm.headers"
-    LLM_TOP_K = "llm.top_k"
-    LLM_IS_STREAMING = "llm.is_streaming"
-    LLM_FREQUENCY_PENALTY = "llm.frequency_penalty"
-    LLM_PRESENCE_PENALTY = "llm.presence_penalty"
-    LLM_CHAT_STOP_SEQUENCES = "llm.chat.stop_sequences"
-    LLM_REQUEST_FUNCTIONS = "llm.request.functions"
-    LLM_REQUEST_REPETITION_PENALTY = "llm.request.repetition_penalty"
-    LLM_RESPONSE_FINISH_REASON = "llm.response.finish_reason"
-    LLM_RESPONSE_STOP_REASON = "llm.response.stop_reason"
-    LLM_CONTENT_COMPLETION_CHUNK = "llm.content.completion.chunk"
-
-    # OpenAI
-    LLM_OPENAI_RESPONSE_SYSTEM_FINGERPRINT = "gen_ai.openai.system_fingerprint"
-    LLM_OPENAI_API_BASE = "gen_ai.openai.api_base"
-    LLM_OPENAI_API_VERSION = "gen_ai.openai.api_version"
-    LLM_OPENAI_API_TYPE = "gen_ai.openai.api_type"
-
-
 class ManagementEndpointLoggingPayload(LiteLLMPydanticObjectBase):
     route: str
     request_data: dict
diff --git a/litellm/proxy/analytics_endpoints/analytics_endpoints.py b/litellm_proxy/analytics_endpoints/analytics_endpoints.py
similarity index 95%
rename from litellm/proxy/analytics_endpoints/analytics_endpoints.py
rename to litellm_proxy/analytics_endpoints/analytics_endpoints.py
index f929cb74e4..5391dfe584 100644
--- a/litellm/proxy/analytics_endpoints/analytics_endpoints.py
+++ b/litellm_proxy/analytics_endpoints/analytics_endpoints.py
@@ -5,8 +5,8 @@ from typing import List, Optional
 import fastapi
 from fastapi import APIRouter, Depends, HTTPException, status
 
-from litellm.proxy._types import *
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy._types import *
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
 
 router = APIRouter()
 
@@ -61,7 +61,7 @@ async def get_global_activity(
     start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
     end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
 
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
diff --git a/litellm/proxy/anthropic_endpoints/endpoints.py b/litellm_proxy/anthropic_endpoints/endpoints.py
similarity index 94%
rename from litellm/proxy/anthropic_endpoints/endpoints.py
rename to litellm_proxy/anthropic_endpoints/endpoints.py
index 78078b93f8..dd07f09ef0 100644
--- a/litellm/proxy/anthropic_endpoints/endpoints.py
+++ b/litellm_proxy/anthropic_endpoints/endpoints.py
@@ -12,12 +12,12 @@ from fastapi.responses import StreamingResponse
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import *
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
-from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
-from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
-from litellm.proxy.utils import ProxyLogging
+from litellm_proxy._types import *
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.common_request_processing import ProxyBaseLLMRequestProcessing
+from litellm_proxy.common_utils.http_parsing_utils import _read_request_body
+from litellm_proxy.litellm_pre_call_utils import add_litellm_data_to_request
+from litellm_proxy.utils import ProxyLogging
 
 router = APIRouter()
 
@@ -43,7 +43,7 @@ async def async_data_generator_anthropic(
             yield chunk
     except Exception as e:
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.async_data_generator(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -88,7 +88,7 @@ async def anthropic_response(  # noqa: PLR0915
 
     This was a BETA endpoint that calls 100+ LLMs in the anthropic format.
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         general_settings,
         llm_router,
         proxy_config,
@@ -239,7 +239,7 @@ async def anthropic_response(  # noqa: PLR0915
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.anthropic_response(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.anthropic_response(): Exception occured - {}".format(
                 str(e)
             )
         )
diff --git a/litellm/proxy/auth/auth_checks.py b/litellm_proxy/auth/auth_checks.py
similarity index 99%
rename from litellm/proxy/auth/auth_checks.py
rename to litellm_proxy/auth/auth_checks.py
index ede3624da6..786aae79d2 100644
--- a/litellm/proxy/auth/auth_checks.py
+++ b/litellm_proxy/auth/auth_checks.py
@@ -22,7 +22,7 @@ from litellm.caching.caching import DualCache
 from litellm.caching.dual_cache import LimitedSizeOrderedDict
 from litellm.constants import DEFAULT_IN_MEMORY_TTL
 from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     RBAC_ROLES,
     CallInfo,
     LiteLLM_EndUserTable,
@@ -40,9 +40,9 @@ from litellm.proxy._types import (
     SpecialModelNames,
     UserAPIKeyAuth,
 )
-from litellm.proxy.auth.route_checks import RouteChecks
-from litellm.proxy.route_llm_request import route_request
-from litellm.proxy.utils import PrismaClient, ProxyLogging, log_db_metrics
+from litellm_proxy.auth.route_checks import RouteChecks
+from litellm_proxy.route_llm_request import route_request
+from litellm_proxy.utils import PrismaClient, ProxyLogging, log_db_metrics
 from litellm.router import Router
 
 from .auth_checks_organization import organization_role_based_access_check
@@ -179,7 +179,7 @@ async def common_checks(
     _request_metadata: dict = request_body.get("metadata", {}) or {}
     if _request_metadata.get("guardrails"):
         # check if team allowed to modify guardrails
-        from litellm.proxy.guardrails.guardrail_helpers import can_modify_guardrails
+        from litellm_proxy.guardrails.guardrail_helpers import can_modify_guardrails
 
         can_modify: bool = can_modify_guardrails(team_object)
         if can_modify is False:
diff --git a/litellm/proxy/auth/auth_checks_organization.py b/litellm_proxy/auth/auth_checks_organization.py
similarity index 99%
rename from litellm/proxy/auth/auth_checks_organization.py
rename to litellm_proxy/auth/auth_checks_organization.py
index e96a5c61fc..f73cf6d486 100644
--- a/litellm/proxy/auth/auth_checks_organization.py
+++ b/litellm_proxy/auth/auth_checks_organization.py
@@ -6,7 +6,7 @@ from typing import Dict, List, Optional, Tuple
 
 from fastapi import status
 
-from litellm.proxy._types import *
+from litellm_proxy._types import *
 
 
 def organization_role_based_access_check(
diff --git a/litellm/proxy/auth/auth_exception_handler.py b/litellm_proxy/auth/auth_exception_handler.py
similarity index 93%
rename from litellm/proxy/auth/auth_exception_handler.py
rename to litellm_proxy/auth/auth_exception_handler.py
index 268e3bb1b2..7ca6f653fd 100644
--- a/litellm/proxy/auth/auth_exception_handler.py
+++ b/litellm_proxy/auth/auth_exception_handler.py
@@ -9,9 +9,9 @@ from fastapi import HTTPException, Request, status
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import ProxyErrorTypes, ProxyException, UserAPIKeyAuth
-from litellm.proxy.auth.auth_utils import _get_request_ip_address
-from litellm.proxy.db.exception_handler import PrismaDBExceptionHandler
+from litellm_proxy._types import ProxyErrorTypes, ProxyException, UserAPIKeyAuth
+from litellm_proxy.auth.auth_utils import _get_request_ip_address
+from litellm_proxy.db.exception_handler import PrismaDBExceptionHandler
 from litellm.types.services import ServiceTypes
 
 if TYPE_CHECKING:
@@ -46,7 +46,7 @@ class UserAPIKeyAuthExceptionHandler:
         Raises:
             - Orignal Exception in all other cases
         """
-        from litellm.proxy.proxy_server import (
+        from litellm_proxy.proxy_server import (
             general_settings,
             litellm_proxy_admin_name,
             proxy_logging_obj,
@@ -77,7 +77,7 @@ class UserAPIKeyAuthExceptionHandler:
                 use_x_forwarded_for=general_settings.get("use_x_forwarded_for", False),
             )
             verbose_proxy_logger.exception(
-                "litellm.proxy.proxy_server.user_api_key_auth(): Exception occured - {}\nRequester IP Address:{}".format(
+                "litellm_proxy.proxy_server.user_api_key_auth(): Exception occured - {}\nRequester IP Address:{}".format(
                     str(e),
                     requester_ip,
                 ),
diff --git a/litellm/proxy/auth/auth_utils.py b/litellm_proxy/auth/auth_utils.py
similarity index 97%
rename from litellm/proxy/auth/auth_utils.py
rename to litellm_proxy/auth/auth_utils.py
index 0200457ef9..65be463ee4 100644
--- a/litellm/proxy/auth/auth_utils.py
+++ b/litellm_proxy/auth/auth_utils.py
@@ -7,7 +7,7 @@ from fastapi import HTTPException, Request, status
 
 from litellm import Router, provider_list
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import *
+from litellm_proxy._types import *
 from litellm.types.router import CONFIGURABLE_CLIENTSIDE_AUTH_PARAMS
 
 
@@ -195,7 +195,7 @@ async def pre_db_read_auth_checks(
     Raises:
     - HTTPException if request fails initial auth checks
     """
-    from litellm.proxy.proxy_server import general_settings, llm_router, premium_user
+    from litellm_proxy.proxy_server import general_settings, llm_router, premium_user
 
     # Check 1. request size
     await check_if_request_size_is_safe(request=request)
@@ -262,7 +262,7 @@ def route_in_additonal_public_routes(current_route: str):
     """
 
     # check if user is premium_user - if not do nothing
-    from litellm.proxy.proxy_server import general_settings, premium_user
+    from litellm_proxy.proxy_server import general_settings, premium_user
 
     try:
         if premium_user is not True:
@@ -317,7 +317,7 @@ async def check_if_request_size_is_safe(request: Request) -> bool:
         ProxyException: If the request size is too large
 
     """
-    from litellm.proxy.proxy_server import general_settings, premium_user
+    from litellm_proxy.proxy_server import general_settings, premium_user
 
     max_request_size_mb = general_settings.get("max_request_size_mb", None)
 
@@ -382,7 +382,7 @@ async def check_response_size_is_safe(response: Any) -> bool:
 
     """
 
-    from litellm.proxy.proxy_server import general_settings, premium_user
+    from litellm_proxy.proxy_server import general_settings, premium_user
 
     max_response_size_mb = general_settings.get("max_response_size_mb", None)
     if max_response_size_mb is not None:
@@ -465,7 +465,7 @@ def should_run_auth_on_pass_through_provider_route(route: str) -> bool:
     - User is premium_user
     - User has enabled litellm_setting.use_client_credentials_pass_through_routes
     """
-    from litellm.proxy.proxy_server import general_settings, premium_user
+    from litellm_proxy.proxy_server import general_settings, premium_user
 
     if premium_user is not True:
         return False
diff --git a/litellm/proxy/auth/handle_jwt.py b/litellm_proxy/auth/handle_jwt.py
similarity index 99%
rename from litellm/proxy/auth/handle_jwt.py
rename to litellm_proxy/auth/handle_jwt.py
index 783c2f1553..4d5bc4530c 100644
--- a/litellm/proxy/auth/handle_jwt.py
+++ b/litellm_proxy/auth/handle_jwt.py
@@ -19,7 +19,7 @@ from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
 from litellm.litellm_core_utils.dot_notation_indexing import get_nested_value
 from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     RBAC_ROLES,
     JWKKeyValue,
     JWTAuthBuilderResult,
@@ -33,8 +33,8 @@ from litellm.proxy._types import (
     ScopeMapping,
     Span,
 )
-from litellm.proxy.auth.auth_checks import can_team_access_model
-from litellm.proxy.utils import PrismaClient, ProxyLogging
+from litellm_proxy.auth.auth_checks import can_team_access_model
+from litellm_proxy.utils import PrismaClient, ProxyLogging
 
 from .auth_checks import (
     _allowed_routes_check,
diff --git a/litellm/proxy/auth/litellm_license.py b/litellm_proxy/auth/litellm_license.py
similarity index 93%
rename from litellm/proxy/auth/litellm_license.py
rename to litellm_proxy/auth/litellm_license.py
index 936f372181..d4af64b6f1 100644
--- a/litellm/proxy/auth/litellm_license.py
+++ b/litellm_proxy/auth/litellm_license.py
@@ -47,7 +47,7 @@ class LicenseCheck:
 
     def _verify(self, license_str: str) -> bool:
         verbose_proxy_logger.debug(
-            "litellm.proxy.auth.litellm_license.py::_verify - Checking license against {}/verify_license - {}".format(
+            "litellm_proxy.auth.litellm_license.py::_verify - Checking license against {}/verify_license - {}".format(
                 self.base_url, license_str
             )
         )
@@ -76,14 +76,14 @@ class LicenseCheck:
             assert isinstance(premium, bool)
 
             verbose_proxy_logger.debug(
-                "litellm.proxy.auth.litellm_license.py::_verify - License={} is premium={}".format(
+                "litellm_proxy.auth.litellm_license.py::_verify - License={} is premium={}".format(
                     license_str, premium
                 )
             )
             return premium
         except Exception as e:
             verbose_proxy_logger.exception(
-                "litellm.proxy.auth.litellm_license.py::_verify - Unable to verify License={} via api. - {}".format(
+                "litellm_proxy.auth.litellm_license.py::_verify - Unable to verify License={} via api. - {}".format(
                     license_str, str(e)
                 )
             )
@@ -96,7 +96,7 @@ class LicenseCheck:
         """
         try:
             verbose_proxy_logger.debug(
-                "litellm.proxy.auth.litellm_license.py::is_premium() - ENTERING 'IS_PREMIUM' - LiteLLM License={}".format(
+                "litellm_proxy.auth.litellm_license.py::is_premium() - ENTERING 'IS_PREMIUM' - LiteLLM License={}".format(
                     self.license_str
                 )
             )
@@ -105,7 +105,7 @@ class LicenseCheck:
                 self.license_str = os.getenv("LITELLM_LICENSE", None)
 
             verbose_proxy_logger.debug(
-                "litellm.proxy.auth.litellm_license.py::is_premium() - Updated 'self.license_str' - {}".format(
+                "litellm_proxy.auth.litellm_license.py::is_premium() - Updated 'self.license_str' - {}".format(
                     self.license_str
                 )
             )
@@ -162,7 +162,7 @@ class LicenseCheck:
 
         except Exception as e:
             verbose_proxy_logger.debug(
-                "litellm.proxy.auth.litellm_license.py::verify_license_without_api_request - Unable to verify License locally. - {}".format(
+                "litellm_proxy.auth.litellm_license.py::verify_license_without_api_request - Unable to verify License locally. - {}".format(
                     str(e)
                 )
             )
diff --git a/litellm/proxy/auth/model_checks.py b/litellm_proxy/auth/model_checks.py
similarity index 99%
rename from litellm/proxy/auth/model_checks.py
rename to litellm_proxy/auth/model_checks.py
index 87eafd1eb1..30d95335e2 100644
--- a/litellm/proxy/auth/model_checks.py
+++ b/litellm_proxy/auth/model_checks.py
@@ -4,7 +4,7 @@ from typing import Dict, List, Optional, Set
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import SpecialModelNames, UserAPIKeyAuth
+from litellm_proxy._types import SpecialModelNames, UserAPIKeyAuth
 from litellm.router import Router
 from litellm.types.router import LiteLLM_Params
 from litellm.utils import get_valid_models
diff --git a/litellm/proxy/auth/oauth2_check.py b/litellm_proxy/auth/oauth2_check.py
similarity index 94%
rename from litellm/proxy/auth/oauth2_check.py
rename to litellm_proxy/auth/oauth2_check.py
index 4851c27012..9dcc60fe5b 100644
--- a/litellm/proxy/auth/oauth2_check.py
+++ b/litellm_proxy/auth/oauth2_check.py
@@ -1,4 +1,4 @@
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 
 
 async def check_oauth2_token(token: str) -> UserAPIKeyAuth:
@@ -23,8 +23,8 @@ async def check_oauth2_token(token: str) -> UserAPIKeyAuth:
         get_async_httpx_client,
         httpxSpecialProvider,
     )
-    from litellm.proxy._types import CommonProxyErrors
-    from litellm.proxy.proxy_server import premium_user
+    from litellm_proxy._types import CommonProxyErrors
+    from litellm_proxy.proxy_server import premium_user
 
     if premium_user is not True:
         raise ValueError(
diff --git a/litellm/proxy/auth/oauth2_proxy_hook.py b/litellm_proxy/auth/oauth2_proxy_hook.py
similarity index 93%
rename from litellm/proxy/auth/oauth2_proxy_hook.py
rename to litellm_proxy/auth/oauth2_proxy_hook.py
index a1db5d842c..282cbf8467 100644
--- a/litellm/proxy/auth/oauth2_proxy_hook.py
+++ b/litellm_proxy/auth/oauth2_proxy_hook.py
@@ -3,14 +3,14 @@ from typing import Any, Dict
 from fastapi import Request
 
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 
 
 async def handle_oauth2_proxy_request(request: Request) -> UserAPIKeyAuth:
     """
     Handle request from oauth2 proxy.
     """
-    from litellm.proxy.proxy_server import general_settings
+    from litellm_proxy.proxy_server import general_settings
 
     verbose_proxy_logger.debug("Handling oauth2 proxy request")
     # Define the OAuth2 config mappings
diff --git a/litellm/proxy/auth/public_key.pem b/litellm_proxy/auth/public_key.pem
similarity index 100%
rename from litellm/proxy/auth/public_key.pem
rename to litellm_proxy/auth/public_key.pem
diff --git a/litellm/proxy/auth/rds_iam_token.py b/litellm_proxy/auth/rds_iam_token.py
similarity index 100%
rename from litellm/proxy/auth/rds_iam_token.py
rename to litellm_proxy/auth/rds_iam_token.py
diff --git a/litellm/proxy/auth/route_checks.py b/litellm_proxy/auth/route_checks.py
similarity index 99%
rename from litellm/proxy/auth/route_checks.py
rename to litellm_proxy/auth/route_checks.py
index 93bcb70a90..2b536dbc7a 100644
--- a/litellm/proxy/auth/route_checks.py
+++ b/litellm_proxy/auth/route_checks.py
@@ -4,7 +4,7 @@ from typing import List, Optional
 from fastapi import HTTPException, Request, status
 
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     CommonProxyErrors,
     LiteLLM_UserTable,
     LiteLLMRoutes,
@@ -161,7 +161,7 @@ class RouteChecks:
 
     @staticmethod
     def custom_admin_only_route_check(route: str):
-        from litellm.proxy.proxy_server import general_settings, premium_user
+        from litellm_proxy.proxy_server import general_settings, premium_user
 
         if "admin_only_routes" in general_settings:
             if premium_user is not True:
diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm_proxy/auth/user_api_key_auth.py
similarity index 98%
rename from litellm/proxy/auth/user_api_key_auth.py
rename to litellm_proxy/auth/user_api_key_auth.py
index 97e9fb8c73..6432d3b839 100644
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm_proxy/auth/user_api_key_auth.py
@@ -22,8 +22,8 @@ from litellm._logging import verbose_logger, verbose_proxy_logger
 from litellm._service_logger import ServiceLogging
 from litellm.caching import DualCache
 from litellm.litellm_core_utils.dd_tracing import tracer
-from litellm.proxy._types import *
-from litellm.proxy.auth.auth_checks import (
+from litellm_proxy._types import *
+from litellm_proxy.auth.auth_checks import (
     _cache_key_object,
     _get_user_role,
     _is_user_proxy_admin,
@@ -37,8 +37,8 @@ from litellm.proxy.auth.auth_checks import (
     get_user_object,
     is_valid_fallback_model,
 )
-from litellm.proxy.auth.auth_exception_handler import UserAPIKeyAuthExceptionHandler
-from litellm.proxy.auth.auth_utils import (
+from litellm_proxy.auth.auth_exception_handler import UserAPIKeyAuthExceptionHandler
+from litellm_proxy.auth.auth_utils import (
     get_end_user_id_from_request_body,
     get_request_route,
     is_pass_through_provider_route,
@@ -46,11 +46,11 @@ from litellm.proxy.auth.auth_utils import (
     route_in_additonal_public_routes,
     should_run_auth_on_pass_through_provider_route,
 )
-from litellm.proxy.auth.handle_jwt import JWTAuthManager, JWTHandler
-from litellm.proxy.auth.oauth2_check import check_oauth2_token
-from litellm.proxy.auth.oauth2_proxy_hook import handle_oauth2_proxy_request
-from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
-from litellm.proxy.utils import PrismaClient, ProxyLogging
+from litellm_proxy.auth.handle_jwt import JWTAuthManager, JWTHandler
+from litellm_proxy.auth.oauth2_check import check_oauth2_token
+from litellm_proxy.auth.oauth2_proxy_hook import handle_oauth2_proxy_request
+from litellm_proxy.common_utils.http_parsing_utils import _read_request_body
+from litellm_proxy.utils import PrismaClient, ProxyLogging
 from litellm.types.services import ServiceTypes
 
 user_api_key_service_logger_obj = ServiceLogging()  # used for tracking latency on OTEL
@@ -227,7 +227,7 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
     azure_apim_header: Optional[str],
     request_data: dict,
 ) -> UserAPIKeyAuth:
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         general_settings,
         jwt_handler,
         litellm_proxy_admin_name,
@@ -330,7 +330,7 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
         if general_settings.get("enable_oauth2_auth", False) is True:
             # return UserAPIKeyAuth object
             # helper to check if the api_key is a valid oauth2 token
-            from litellm.proxy.proxy_server import premium_user
+            from litellm_proxy.proxy_server import premium_user
 
             if premium_user is not True:
                 raise ValueError(
@@ -344,7 +344,7 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
             return await handle_oauth2_proxy_request(request=request)
 
         if general_settings.get("enable_jwt_auth", False) is True:
-            from litellm.proxy.proxy_server import premium_user
+            from litellm_proxy.proxy_server import premium_user
 
             if premium_user is not True:
                 raise ValueError(
@@ -662,7 +662,7 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
             )  # prevent token hashes from being used
         else:
             verbose_logger.warning(
-                "litellm.proxy.proxy_server.user_api_key_auth(): Warning - Key={} is not a string.".format(
+                "litellm_proxy.proxy_server.user_api_key_auth(): Warning - Key={} is not a string.".format(
                     api_key
                 )
             )
@@ -788,7 +788,7 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
                     )
                 except Exception as e:
                     verbose_logger.debug(
-                        "litellm.proxy.auth.user_api_key_auth.py::user_api_key_auth() - Unable to get user from db/cache. Setting user_obj to None. Exception received - {}".format(
+                        "litellm_proxy.auth.user_api_key_auth.py::user_api_key_auth() - Unable to get user from db/cache. Setting user_obj to None. Exception received - {}".format(
                             str(e)
                         )
                     )
diff --git a/litellm/proxy/batches_endpoints/endpoints.py b/litellm_proxy/batches_endpoints/endpoints.py
similarity index 94%
rename from litellm/proxy/batches_endpoints/endpoints.py
rename to litellm_proxy/batches_endpoints/endpoints.py
index 6b7651d48f..915d1cac89 100644
--- a/litellm/proxy/batches_endpoints/endpoints.py
+++ b/litellm_proxy/batches_endpoints/endpoints.py
@@ -16,15 +16,15 @@ from litellm.batches.main import (
     CreateBatchRequest,
     RetrieveBatchRequest,
 )
-from litellm.proxy._types import *
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
-from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
-from litellm.proxy.common_utils.openai_endpoint_utils import (
+from litellm_proxy._types import *
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.common_request_processing import ProxyBaseLLMRequestProcessing
+from litellm_proxy.common_utils.http_parsing_utils import _read_request_body
+from litellm_proxy.common_utils.openai_endpoint_utils import (
     get_custom_llm_provider_from_request_body,
 )
-from litellm.proxy.openai_files_endpoints.files_endpoints import is_known_model
-from litellm.proxy.utils import handle_exception_on_proxy
+from litellm_proxy.openai_files_endpoints.files_endpoints import is_known_model
+from litellm_proxy.utils import handle_exception_on_proxy
 
 router = APIRouter()
 
@@ -67,7 +67,7 @@ async def create_batch(
     }'
     ```
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
         llm_router,
@@ -154,7 +154,7 @@ async def create_batch(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.create_batch(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.create_batch(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -198,7 +198,7 @@ async def retrieve_batch(
 
     ```
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
         llm_router,
@@ -282,7 +282,7 @@ async def retrieve_batch(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.retrieve_batch(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.retrieve_batch(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -325,7 +325,7 @@ async def list_batches(
 
     ```
     """
-    from litellm.proxy.proxy_server import proxy_logging_obj, version
+    from litellm_proxy.proxy_server import proxy_logging_obj, version
 
     verbose_proxy_logger.debug("GET /v1/batches after={} limit={}".format(after, limit))
     try:
@@ -365,7 +365,7 @@ async def list_batches(
             request_data={"after": after, "limit": limit},
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.retrieve_batch(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.retrieve_batch(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -409,7 +409,7 @@ async def cancel_batch(
 
     ```
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
         proxy_config,
@@ -474,7 +474,7 @@ async def cancel_batch(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.create_batch(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.create_batch(): Exception occured - {}".format(
                 str(e)
             )
         )
diff --git a/litellm/proxy/cached_logo.jpg b/litellm_proxy/cached_logo.jpg
similarity index 100%
rename from litellm/proxy/cached_logo.jpg
rename to litellm_proxy/cached_logo.jpg
diff --git a/litellm/proxy/caching_routes.py b/litellm_proxy/caching_routes.py
similarity index 98%
rename from litellm/proxy/caching_routes.py
rename to litellm_proxy/caching_routes.py
index f25c273ae9..3fb98569dc 100644
--- a/litellm/proxy/caching_routes.py
+++ b/litellm_proxy/caching_routes.py
@@ -7,8 +7,8 @@ from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import RedisCache
 from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
 from litellm.litellm_core_utils.sensitive_data_masker import SensitiveDataMasker
-from litellm.proxy._types import ProxyErrorTypes, ProxyException
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy._types import ProxyErrorTypes, ProxyException
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
 from litellm.types.caching import CachePingResponse, HealthCheckCacheParams
 
 masker = SensitiveDataMasker()
diff --git a/litellm/proxy/common_request_processing.py b/litellm_proxy/common_request_processing.py
similarity index 96%
rename from litellm/proxy/common_request_processing.py
rename to litellm_proxy/common_request_processing.py
index 60050fbeb2..d8cd0a9125 100644
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm_proxy/common_request_processing.py
@@ -11,23 +11,23 @@ from fastapi.responses import Response, StreamingResponse
 import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
-from litellm.proxy._types import ProxyException, UserAPIKeyAuth
-from litellm.proxy.auth.auth_utils import check_response_size_is_safe
-from litellm.proxy.common_utils.callback_utils import (
+from litellm_proxy._types import ProxyException, UserAPIKeyAuth
+from litellm_proxy.auth.auth_utils import check_response_size_is_safe
+from litellm_proxy.common_utils.callback_utils import (
     get_logging_caching_headers,
     get_remaining_tokens_and_requests_from_request_data,
 )
-from litellm.proxy.route_llm_request import route_request
-from litellm.proxy.utils import ProxyLogging
+from litellm_proxy.route_llm_request import route_request
+from litellm_proxy.utils import ProxyLogging
 from litellm.router import Router
 
 if TYPE_CHECKING:
-    from litellm.proxy.proxy_server import ProxyConfig as _ProxyConfig
+    from litellm_proxy.proxy_server import ProxyConfig as _ProxyConfig
 
     ProxyConfig = _ProxyConfig
 else:
     ProxyConfig = Any
-from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
+from litellm_proxy.litellm_pre_call_utils import add_litellm_data_to_request
 
 
 class ProxyBaseLLMRequestProcessing:
@@ -330,7 +330,7 @@ class ProxyBaseLLMRequestProcessing:
     ):
         """Raises ProxyException (OpenAI API compatible) if an exception is raised"""
         verbose_proxy_logger.exception(
-            f"litellm.proxy.proxy_server._handle_llm_api_exception(): Exception occured - {str(e)}"
+            f"litellm_proxy.proxy_server._handle_llm_api_exception(): Exception occured - {str(e)}"
         )
         await proxy_logging_obj.post_call_failure_hook(
             user_api_key_dict=user_api_key_dict,
diff --git a/litellm/proxy/common_utils/admin_ui_utils.py b/litellm_proxy/common_utils/admin_ui_utils.py
similarity index 99%
rename from litellm/proxy/common_utils/admin_ui_utils.py
rename to litellm_proxy/common_utils/admin_ui_utils.py
index 41fd3a1a76..7478b00648 100644
--- a/litellm/proxy/common_utils/admin_ui_utils.py
+++ b/litellm_proxy/common_utils/admin_ui_utils.py
@@ -1,7 +1,7 @@
 def show_missing_vars_in_env():
     from fastapi.responses import HTMLResponse
 
-    from litellm.proxy.proxy_server import master_key, prisma_client
+    from litellm_proxy.proxy_server import master_key, prisma_client
 
     if prisma_client is None and master_key is None:
         return HTMLResponse(
diff --git a/litellm/proxy/common_utils/callback_utils.py b/litellm_proxy/common_utils/callback_utils.py
similarity index 95%
rename from litellm/proxy/common_utils/callback_utils.py
rename to litellm_proxy/common_utils/callback_utils.py
index 1c1b6f32c1..59d2e8f58e 100644
--- a/litellm/proxy/common_utils/callback_utils.py
+++ b/litellm_proxy/common_utils/callback_utils.py
@@ -3,8 +3,8 @@ from typing import Any, Dict, List, Optional
 import litellm
 from litellm import get_secret
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import CommonProxyErrors, LiteLLMPromptInjectionParams
-from litellm.proxy.types_utils.utils import get_instance_fn
+from litellm_proxy._types import CommonProxyErrors, LiteLLMPromptInjectionParams
+from litellm_proxy.types_utils.utils import get_instance_fn
 
 blue_color_code = "\033[94m"
 reset_color_code = "\033[0m"
@@ -17,7 +17,7 @@ def initialize_callbacks_on_proxy(  # noqa: PLR0915
     litellm_settings: dict,
     callback_specific_params: dict = {},
 ):
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     verbose_proxy_logger.debug(
         f"{blue_color_code}initializing callbacks={value} on proxy{reset_color_code}"
@@ -31,7 +31,7 @@ def initialize_callbacks_on_proxy(  # noqa: PLR0915
             ):
                 imported_list.append(callback)
             elif isinstance(callback, str) and callback == "presidio":
-                from litellm.proxy.guardrails.guardrail_hooks.presidio import (
+                from litellm_proxy.guardrails.guardrail_hooks.presidio import (
                     _OPTIONAL_PresidioPIIMasking,
                 )
 
@@ -95,7 +95,7 @@ def initialize_callbacks_on_proxy(  # noqa: PLR0915
                 openai_moderations_object = _ENTERPRISE_OpenAI_Moderation()
                 imported_list.append(openai_moderations_object)
             elif isinstance(callback, str) and callback == "lakera_prompt_injection":
-                from litellm.proxy.guardrails.guardrail_hooks.lakera_ai import (
+                from litellm_proxy.guardrails.guardrail_hooks.lakera_ai import (
                     lakeraAI_Moderation,
                 )
 
@@ -105,7 +105,7 @@ def initialize_callbacks_on_proxy(  # noqa: PLR0915
                 lakera_moderations_object = lakeraAI_Moderation(**init_params)
                 imported_list.append(lakera_moderations_object)
             elif isinstance(callback, str) and callback == "aporia_prompt_injection":
-                from litellm.proxy.guardrails.guardrail_hooks.aporia_ai import (
+                from litellm_proxy.guardrails.guardrail_hooks.aporia_ai import (
                     AporiaGuardrail,
                 )
 
@@ -164,7 +164,7 @@ def initialize_callbacks_on_proxy(  # noqa: PLR0915
                 banned_keywords_obj = _ENTERPRISE_BannedKeywords()
                 imported_list.append(banned_keywords_obj)
             elif isinstance(callback, str) and callback == "detect_prompt_injection":
-                from litellm.proxy.hooks.prompt_injection_detection import (
+                from litellm_proxy.hooks.prompt_injection_detection import (
                     _OPTIONAL_PromptInjectionDetection,
                 )
 
@@ -182,14 +182,14 @@ def initialize_callbacks_on_proxy(  # noqa: PLR0915
                 )
                 imported_list.append(prompt_injection_detection_obj)
             elif isinstance(callback, str) and callback == "batch_redis_requests":
-                from litellm.proxy.hooks.batch_redis_get import (
+                from litellm_proxy.hooks.batch_redis_get import (
                     _PROXY_BatchRedisRequests,
                 )
 
                 batch_redis_obj = _PROXY_BatchRedisRequests()
                 imported_list.append(batch_redis_obj)
             elif isinstance(callback, str) and callback == "azure_content_safety":
-                from litellm.proxy.hooks.azure_content_safety import (
+                from litellm_proxy.hooks.azure_content_safety import (
                     _PROXY_AzureContentSafety,
                 )
 
diff --git a/litellm/proxy/common_utils/debug_utils.py b/litellm_proxy/common_utils/debug_utils.py
similarity index 98%
rename from litellm/proxy/common_utils/debug_utils.py
rename to litellm_proxy/common_utils/debug_utils.py
index fdfbe0cb7c..5622cb162e 100644
--- a/litellm/proxy/common_utils/debug_utils.py
+++ b/litellm_proxy/common_utils/debug_utils.py
@@ -53,7 +53,7 @@ async def memory_usage_in_mem_cache():
     3. proxy_logging_cache
     4. internal_usage_cache
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         llm_router,
         proxy_logging_obj,
         user_api_key_cache,
@@ -90,7 +90,7 @@ async def memory_usage_in_mem_cache_items():
     3. proxy_logging_cache
     4. internal_usage_cache
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         llm_router,
         proxy_logging_obj,
         user_api_key_cache,
@@ -115,7 +115,7 @@ async def memory_usage_in_mem_cache_items():
 
 @router.get("/otel-spans", include_in_schema=False)
 async def get_otel_spans():
-    from litellm.proxy.proxy_server import open_telemetry_logger
+    from litellm_proxy.proxy_server import open_telemetry_logger
 
     if open_telemetry_logger is None:
         return {
diff --git a/litellm/proxy/common_utils/encrypt_decrypt_utils.py b/litellm_proxy/common_utils/encrypt_decrypt_utils.py
similarity index 98%
rename from litellm/proxy/common_utils/encrypt_decrypt_utils.py
rename to litellm_proxy/common_utils/encrypt_decrypt_utils.py
index 348c81101f..29ce43d794 100644
--- a/litellm/proxy/common_utils/encrypt_decrypt_utils.py
+++ b/litellm_proxy/common_utils/encrypt_decrypt_utils.py
@@ -6,7 +6,7 @@ from litellm._logging import verbose_proxy_logger
 
 
 def _get_salt_key():
-    from litellm.proxy.proxy_server import master_key
+    from litellm_proxy.proxy_server import master_key
 
     salt_key = os.getenv("LITELLM_SALT_KEY", None)
 
diff --git a/litellm/proxy/common_utils/html_forms/jwt_display_template.py b/litellm_proxy/common_utils/html_forms/jwt_display_template.py
similarity index 100%
rename from litellm/proxy/common_utils/html_forms/jwt_display_template.py
rename to litellm_proxy/common_utils/html_forms/jwt_display_template.py
diff --git a/litellm/proxy/common_utils/html_forms/ui_login.py b/litellm_proxy/common_utils/html_forms/ui_login.py
similarity index 100%
rename from litellm/proxy/common_utils/html_forms/ui_login.py
rename to litellm_proxy/common_utils/html_forms/ui_login.py
diff --git a/litellm/proxy/common_utils/http_parsing_utils.py b/litellm_proxy/common_utils/http_parsing_utils.py
similarity index 99%
rename from litellm/proxy/common_utils/http_parsing_utils.py
rename to litellm_proxy/common_utils/http_parsing_utils.py
index ca4b5a0588..57fe595746 100644
--- a/litellm/proxy/common_utils/http_parsing_utils.py
+++ b/litellm_proxy/common_utils/http_parsing_utils.py
@@ -131,7 +131,7 @@ def check_file_size_under_limit(
     Returns True -> when file size is under max_file_size_mb limit
     Raises ProxyException -> when file size is over max_file_size_mb limit or not a premium_user
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         CommonProxyErrors,
         ProxyException,
         llm_router,
diff --git a/litellm/proxy/common_utils/load_config_utils.py b/litellm_proxy/common_utils/load_config_utils.py
similarity index 100%
rename from litellm/proxy/common_utils/load_config_utils.py
rename to litellm_proxy/common_utils/load_config_utils.py
diff --git a/litellm/proxy/common_utils/openai_endpoint_utils.py b/litellm_proxy/common_utils/openai_endpoint_utils.py
similarity index 94%
rename from litellm/proxy/common_utils/openai_endpoint_utils.py
rename to litellm_proxy/common_utils/openai_endpoint_utils.py
index 316a842710..3b415271b8 100644
--- a/litellm/proxy/common_utils/openai_endpoint_utils.py
+++ b/litellm_proxy/common_utils/openai_endpoint_utils.py
@@ -6,7 +6,7 @@ from typing import Optional
 
 from fastapi import Request
 
-from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
+from litellm_proxy.common_utils.http_parsing_utils import _read_request_body
 
 
 def remove_sensitive_info_from_deployment(deployment_dict: dict) -> dict:
diff --git a/litellm/proxy/common_utils/proxy_state.py b/litellm_proxy/common_utils/proxy_state.py
similarity index 95%
rename from litellm/proxy/common_utils/proxy_state.py
rename to litellm_proxy/common_utils/proxy_state.py
index edd18c603d..d01956f924 100644
--- a/litellm/proxy/common_utils/proxy_state.py
+++ b/litellm_proxy/common_utils/proxy_state.py
@@ -6,7 +6,7 @@ Example: `spend_logs_row_count` is used to store the number of rows in the `Lite
 
 from typing import Any, Literal
 
-from litellm.proxy._types import ProxyStateVariables
+from litellm_proxy._types import ProxyStateVariables
 
 
 class ProxyState:
diff --git a/litellm/proxy/common_utils/reset_budget_job.py b/litellm_proxy/common_utils/reset_budget_job.py
similarity index 99%
rename from litellm/proxy/common_utils/reset_budget_job.py
rename to litellm_proxy/common_utils/reset_budget_job.py
index 1d50002f5c..4f41d3d93a 100644
--- a/litellm/proxy/common_utils/reset_budget_job.py
+++ b/litellm_proxy/common_utils/reset_budget_job.py
@@ -6,12 +6,12 @@ from typing import List, Literal, Optional, Union
 
 from litellm._logging import verbose_proxy_logger
 from litellm.litellm_core_utils.duration_parser import duration_in_seconds
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     LiteLLM_TeamTable,
     LiteLLM_UserTable,
     LiteLLM_VerificationToken,
 )
-from litellm.proxy.utils import PrismaClient, ProxyLogging
+from litellm_proxy.utils import PrismaClient, ProxyLogging
 from litellm.types.services import ServiceTypes
 
 
diff --git a/litellm/proxy/common_utils/swagger_utils.py b/litellm_proxy/common_utils/swagger_utils.py
similarity index 100%
rename from litellm/proxy/common_utils/swagger_utils.py
rename to litellm_proxy/common_utils/swagger_utils.py
diff --git a/litellm/proxy/config_management_endpoints/pass_through_endpoints.py b/litellm_proxy/config_management_endpoints/pass_through_endpoints.py
similarity index 85%
rename from litellm/proxy/config_management_endpoints/pass_through_endpoints.py
rename to litellm_proxy/config_management_endpoints/pass_through_endpoints.py
index 5ff02b8bce..a90b484014 100644
--- a/litellm/proxy/config_management_endpoints/pass_through_endpoints.py
+++ b/litellm_proxy/config_management_endpoints/pass_through_endpoints.py
@@ -6,8 +6,8 @@ CRUD endpoints for managing pass-through endpoints
 
 from fastapi import APIRouter, Depends, Request, Response
 
-from litellm.proxy._types import *
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy._types import *
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
 
 router = APIRouter()
 
diff --git a/litellm/proxy/credential_endpoints/endpoints.py b/litellm_proxy/credential_endpoints/endpoints.py
similarity index 95%
rename from litellm/proxy/credential_endpoints/endpoints.py
rename to litellm_proxy/credential_endpoints/endpoints.py
index bbbbbfd6b7..9ee1a034f3 100644
--- a/litellm/proxy/credential_endpoints/endpoints.py
+++ b/litellm_proxy/credential_endpoints/endpoints.py
@@ -10,10 +10,10 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.litellm_core_utils.credential_accessor import CredentialAccessor
 from litellm.litellm_core_utils.litellm_logging import _get_masked_values
-from litellm.proxy._types import CommonProxyErrors, UserAPIKeyAuth
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.common_utils.encrypt_decrypt_utils import encrypt_value_helper
-from litellm.proxy.utils import handle_exception_on_proxy, jsonify_object
+from litellm_proxy._types import CommonProxyErrors, UserAPIKeyAuth
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.common_utils.encrypt_decrypt_utils import encrypt_value_helper
+from litellm_proxy.utils import handle_exception_on_proxy, jsonify_object
 from litellm.types.utils import CreateCredentialItem, CredentialItem
 
 router = APIRouter()
@@ -46,7 +46,7 @@ async def create_credential(
     Stores credential in DB.
     Reloads credentials in memory.
     """
-    from litellm.proxy.proxy_server import llm_router, prisma_client
+    from litellm_proxy.proxy_server import llm_router, prisma_client
 
     try:
         if prisma_client is None:
@@ -152,7 +152,7 @@ async def get_credential(
     """
     [BETA] endpoint. This might change unexpectedly.
     """
-    from litellm.proxy.proxy_server import llm_router
+    from litellm_proxy.proxy_server import llm_router
 
     try:
         if model_id:
@@ -216,7 +216,7 @@ async def delete_credential(
     """
     [BETA] endpoint. This might change unexpectedly.
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
@@ -292,7 +292,7 @@ async def update_credential(
     """
     [BETA] endpoint. This might change unexpectedly.
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
diff --git a/litellm/proxy/custom_prompt_management.py b/litellm_proxy/custom_prompt_management.py
similarity index 100%
rename from litellm/proxy/custom_prompt_management.py
rename to litellm_proxy/custom_prompt_management.py
diff --git a/litellm/proxy/custom_sso.py b/litellm_proxy/custom_sso.py
similarity index 91%
rename from litellm/proxy/custom_sso.py
rename to litellm_proxy/custom_sso.py
index 210e9eea3d..ed6a5b9452 100644
--- a/litellm/proxy/custom_sso.py
+++ b/litellm_proxy/custom_sso.py
@@ -14,8 +14,8 @@ Flow:
 
 from fastapi_sso.sso.base import OpenID
 
-from litellm.proxy._types import LitellmUserRoles, SSOUserDefinedValues
-from litellm.proxy.management_endpoints.internal_user_endpoints import user_info
+from litellm_proxy._types import LitellmUserRoles, SSOUserDefinedValues
+from litellm_proxy.management_endpoints.internal_user_endpoints import user_info
 
 
 async def custom_sso_handler(userIDPInfo: OpenID) -> SSOUserDefinedValues:
diff --git a/litellm/proxy/custom_validate.py b/litellm_proxy/custom_validate.py
similarity index 100%
rename from litellm/proxy/custom_validate.py
rename to litellm_proxy/custom_validate.py
diff --git a/litellm/proxy/db/base_client.py b/litellm_proxy/db/base_client.py
similarity index 100%
rename from litellm/proxy/db/base_client.py
rename to litellm_proxy/db/base_client.py
diff --git a/litellm/proxy/db/check_migration.py b/litellm_proxy/db/check_migration.py
similarity index 100%
rename from litellm/proxy/db/check_migration.py
rename to litellm_proxy/db/check_migration.py
diff --git a/litellm/proxy/db/create_views.py b/litellm_proxy/db/create_views.py
similarity index 100%
rename from litellm/proxy/db/create_views.py
rename to litellm_proxy/db/create_views.py
diff --git a/litellm/proxy/db/db_spend_update_writer.py b/litellm_proxy/db/db_spend_update_writer.py
similarity index 98%
rename from litellm/proxy/db/db_spend_update_writer.py
rename to litellm_proxy/db/db_spend_update_writer.py
index 61ea930387..00a245a860 100644
--- a/litellm/proxy/db/db_spend_update_writer.py
+++ b/litellm_proxy/db/db_spend_update_writer.py
@@ -17,7 +17,7 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.caching import DualCache, RedisCache
 from litellm.constants import DB_SPEND_UPDATE_JOB_NAME
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     DB_CONNECTION_ERROR_TYPES,
     BaseDailySpendTransaction,
     DailyTagSpendTransaction,
@@ -30,15 +30,15 @@ from litellm.proxy._types import (
     SpendLogsPayload,
     SpendUpdateQueueItem,
 )
-from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
+from litellm_proxy.db.db_transaction_queue.daily_spend_update_queue import (
     DailySpendUpdateQueue,
 )
-from litellm.proxy.db.db_transaction_queue.pod_lock_manager import PodLockManager
-from litellm.proxy.db.db_transaction_queue.redis_update_buffer import RedisUpdateBuffer
-from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
+from litellm_proxy.db.db_transaction_queue.pod_lock_manager import PodLockManager
+from litellm_proxy.db.db_transaction_queue.redis_update_buffer import RedisUpdateBuffer
+from litellm_proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
 
 if TYPE_CHECKING:
-    from litellm.proxy.utils import PrismaClient, ProxyLogging
+    from litellm_proxy.utils import PrismaClient, ProxyLogging
 else:
     PrismaClient = Any
     ProxyLogging = Any
@@ -79,13 +79,13 @@ class DBSpendUpdateWriter:
         end_time: Optional[datetime],
         response_cost: Optional[float],
     ):
-        from litellm.proxy.proxy_server import (
+        from litellm_proxy.proxy_server import (
             disable_spend_logs,
             litellm_proxy_budget_name,
             prisma_client,
             user_api_key_cache,
         )
-        from litellm.proxy.utils import ProxyUpdateSpend, hash_token
+        from litellm_proxy.utils import ProxyUpdateSpend, hash_token
 
         try:
             verbose_proxy_logger.debug(
@@ -99,7 +99,7 @@ class DBSpendUpdateWriter:
                 hashed_token = token
 
             ## CREATE SPEND LOG PAYLOAD ##
-            from litellm.proxy.spend_tracking.spend_tracking_utils import (
+            from litellm_proxy.spend_tracking.spend_tracking_utils import (
                 get_logging_payload,
             )
 
@@ -530,7 +530,7 @@ class DBSpendUpdateWriter:
         Commits all the spend `UPDATE` transactions to the Database
 
         """
-        from litellm.proxy.utils import (
+        from litellm_proxy.utils import (
             ProxyUpdateSpend,
             _raise_failed_update_spend_exception,
         )
@@ -824,7 +824,7 @@ class DBSpendUpdateWriter:
         """
         Generic function to update daily spend for any entity type (user, team, tag)
         """
-        from litellm.proxy.utils import _raise_failed_update_spend_exception
+        from litellm_proxy.utils import _raise_failed_update_spend_exception
 
         verbose_proxy_logger.debug(
             f"Daily {entity_type.capitalize()} Spend transactions: {len(daily_spend_transactions)}"
diff --git a/litellm/proxy/db/db_transaction_queue/base_update_queue.py b/litellm_proxy/db/db_transaction_queue/base_update_queue.py
similarity index 100%
rename from litellm/proxy/db/db_transaction_queue/base_update_queue.py
rename to litellm_proxy/db/db_transaction_queue/base_update_queue.py
diff --git a/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py b/litellm_proxy/db/db_transaction_queue/daily_spend_update_queue.py
similarity index 97%
rename from litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py
rename to litellm_proxy/db/db_transaction_queue/daily_spend_update_queue.py
index c3074e641b..57b9fbb8f3 100644
--- a/litellm/proxy/db/db_transaction_queue/daily_spend_update_queue.py
+++ b/litellm_proxy/db/db_transaction_queue/daily_spend_update_queue.py
@@ -3,8 +3,8 @@ from copy import deepcopy
 from typing import Dict, List, Optional
 
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import BaseDailySpendTransaction
-from litellm.proxy.db.db_transaction_queue.base_update_queue import (
+from litellm_proxy._types import BaseDailySpendTransaction
+from litellm_proxy.db.db_transaction_queue.base_update_queue import (
     BaseUpdateQueue,
     service_logger_obj,
 )
diff --git a/litellm/proxy/db/db_transaction_queue/pod_lock_manager.py b/litellm_proxy/db/db_transaction_queue/pod_lock_manager.py
similarity index 99%
rename from litellm/proxy/db/db_transaction_queue/pod_lock_manager.py
rename to litellm_proxy/db/db_transaction_queue/pod_lock_manager.py
index be3be64546..ecf5afbc7c 100644
--- a/litellm/proxy/db/db_transaction_queue/pod_lock_manager.py
+++ b/litellm_proxy/db/db_transaction_queue/pod_lock_manager.py
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any, Optional
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.redis_cache import RedisCache
 from litellm.constants import DEFAULT_CRON_JOB_LOCK_TTL_SECONDS
-from litellm.proxy.db.db_transaction_queue.base_update_queue import service_logger_obj
+from litellm_proxy.db.db_transaction_queue.base_update_queue import service_logger_obj
 from litellm.types.services import ServiceTypes
 
 if TYPE_CHECKING:
diff --git a/litellm/proxy/db/db_transaction_queue/redis_update_buffer.py b/litellm_proxy/db/db_transaction_queue/redis_update_buffer.py
similarity index 97%
rename from litellm/proxy/db/db_transaction_queue/redis_update_buffer.py
rename to litellm_proxy/db/db_transaction_queue/redis_update_buffer.py
index 03bd9dca9e..9bef7be478 100644
--- a/litellm/proxy/db/db_transaction_queue/redis_update_buffer.py
+++ b/litellm_proxy/db/db_transaction_queue/redis_update_buffer.py
@@ -18,21 +18,21 @@ from litellm.constants import (
     REDIS_UPDATE_BUFFER_KEY,
 )
 from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     DailyTeamSpendTransaction,
     DailyUserSpendTransaction,
     DBSpendUpdateTransactions,
 )
-from litellm.proxy.db.db_transaction_queue.base_update_queue import service_logger_obj
-from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
+from litellm_proxy.db.db_transaction_queue.base_update_queue import service_logger_obj
+from litellm_proxy.db.db_transaction_queue.daily_spend_update_queue import (
     DailySpendUpdateQueue,
 )
-from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
+from litellm_proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
 from litellm.secret_managers.main import str_to_bool
 from litellm.types.services import ServiceTypes
 
 if TYPE_CHECKING:
-    from litellm.proxy.utils import PrismaClient
+    from litellm_proxy.utils import PrismaClient
 else:
     PrismaClient = Any
 
@@ -58,7 +58,7 @@ class RedisUpdateBuffer:
         This setting enables buffering database transactions in Redis
         to improve reliability and reduce database contention
         """
-        from litellm.proxy.proxy_server import general_settings
+        from litellm_proxy.proxy_server import general_settings
 
         _use_redis_transaction_buffer: Optional[
             Union[bool, str]
diff --git a/litellm/proxy/db/db_transaction_queue/spend_update_queue.py b/litellm_proxy/db/db_transaction_queue/spend_update_queue.py
similarity index 98%
rename from litellm/proxy/db/db_transaction_queue/spend_update_queue.py
rename to litellm_proxy/db/db_transaction_queue/spend_update_queue.py
index 98a9e5088a..da3ce2871a 100644
--- a/litellm/proxy/db/db_transaction_queue/spend_update_queue.py
+++ b/litellm_proxy/db/db_transaction_queue/spend_update_queue.py
@@ -2,12 +2,12 @@ import asyncio
 from typing import Dict, List, Optional
 
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     DBSpendUpdateTransactions,
     Litellm_EntityType,
     SpendUpdateQueueItem,
 )
-from litellm.proxy.db.db_transaction_queue.base_update_queue import (
+from litellm_proxy.db.db_transaction_queue.base_update_queue import (
     BaseUpdateQueue,
     service_logger_obj,
 )
diff --git a/litellm/proxy/db/dynamo_db.py b/litellm_proxy/db/dynamo_db.py
similarity index 96%
rename from litellm/proxy/db/dynamo_db.py
rename to litellm_proxy/db/dynamo_db.py
index 628509d9c3..30a7aef918 100644
--- a/litellm/proxy/db/dynamo_db.py
+++ b/litellm_proxy/db/dynamo_db.py
@@ -3,8 +3,8 @@ Deprecated. Only PostgresSQL is supported.
 """
 
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import DynamoDBArgs
-from litellm.proxy.db.base_client import CustomDB
+from litellm_proxy._types import DynamoDBArgs
+from litellm_proxy.db.base_client import CustomDB
 
 
 class DynamoDBWrapper(CustomDB):
diff --git a/litellm/proxy/db/exception_handler.py b/litellm_proxy/db/exception_handler.py
similarity index 95%
rename from litellm/proxy/db/exception_handler.py
rename to litellm_proxy/db/exception_handler.py
index db73f9e9c9..01cafa04b2 100644
--- a/litellm/proxy/db/exception_handler.py
+++ b/litellm_proxy/db/exception_handler.py
@@ -1,6 +1,6 @@
 from typing import Union
 
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     DB_CONNECTION_ERROR_TYPES,
     ProxyErrorTypes,
     ProxyException,
@@ -18,7 +18,7 @@ class PrismaDBExceptionHandler:
         """
         Returns True if the request should be allowed to proceed despite the DB connection error
         """
-        from litellm.proxy.proxy_server import general_settings
+        from litellm_proxy.proxy_server import general_settings
 
         _allow_requests_on_db_unavailable: Union[bool, str] = general_settings.get(
             "allow_requests_on_db_unavailable", False
diff --git a/litellm/proxy/db/log_db_metrics.py b/litellm_proxy/db/log_db_metrics.py
similarity index 97%
rename from litellm/proxy/db/log_db_metrics.py
rename to litellm_proxy/db/log_db_metrics.py
index 5c79515532..352ede70b5 100644
--- a/litellm/proxy/db/log_db_metrics.py
+++ b/litellm_proxy/db/log_db_metrics.py
@@ -41,7 +41,7 @@ def log_db_metrics(func):
         try:
             result = await func(*args, **kwargs)
             end_time: datetime = datetime.now()
-            from litellm.proxy.proxy_server import proxy_logging_obj
+            from litellm_proxy.proxy_server import proxy_logging_obj
 
             if "PROXY" not in func.__name__:
                 asyncio.create_task(
@@ -120,7 +120,7 @@ async def _handle_logging_db_exception(
     start_time: datetime,
     end_time: datetime,
 ) -> None:
-    from litellm.proxy.proxy_server import proxy_logging_obj
+    from litellm_proxy.proxy_server import proxy_logging_obj
 
     # don't log this as a DB Service Failure, if the DB did not raise an exception
     if _is_exception_related_to_db(e) is not True:
diff --git a/litellm/proxy/db/prisma_client.py b/litellm_proxy/db/prisma_client.py
similarity index 99%
rename from litellm/proxy/db/prisma_client.py
rename to litellm_proxy/db/prisma_client.py
index 339117cb27..c2e895b57e 100644
--- a/litellm/proxy/db/prisma_client.py
+++ b/litellm_proxy/db/prisma_client.py
@@ -62,7 +62,7 @@ class PrismaWrapper:
 
     def get_rds_iam_token(self) -> Optional[str]:
         if self.iam_token_db_auth:
-            from litellm.proxy.auth.rds_iam_token import generate_iam_auth_token
+            from litellm_proxy.auth.rds_iam_token import generate_iam_auth_token
 
             db_host = os.getenv("DATABASE_HOST")
             db_port = os.getenv("DATABASE_PORT")
diff --git a/litellm/proxy/enterprise b/litellm_proxy/enterprise
similarity index 100%
rename from litellm/proxy/enterprise
rename to litellm_proxy/enterprise
diff --git a/litellm/proxy/example_config_yaml/_health_check_test_config.yaml b/litellm_proxy/example_config_yaml/_health_check_test_config.yaml
similarity index 100%
rename from litellm/proxy/example_config_yaml/_health_check_test_config.yaml
rename to litellm_proxy/example_config_yaml/_health_check_test_config.yaml
diff --git a/litellm/proxy/example_config_yaml/aliases_config.yaml b/litellm_proxy/example_config_yaml/aliases_config.yaml
similarity index 100%
rename from litellm/proxy/example_config_yaml/aliases_config.yaml
rename to litellm_proxy/example_config_yaml/aliases_config.yaml
diff --git a/litellm/proxy/example_config_yaml/azure_config.yaml b/litellm_proxy/example_config_yaml/azure_config.yaml
similarity index 100%
rename from litellm/proxy/example_config_yaml/azure_config.yaml
rename to litellm_proxy/example_config_yaml/azure_config.yaml
diff --git a/litellm/proxy/example_config_yaml/bad_schema.prisma b/litellm_proxy/example_config_yaml/bad_schema.prisma
similarity index 100%
rename from litellm/proxy/example_config_yaml/bad_schema.prisma
rename to litellm_proxy/example_config_yaml/bad_schema.prisma
diff --git a/litellm/proxy/example_config_yaml/custom_auth.py b/litellm_proxy/example_config_yaml/custom_auth.py
similarity index 95%
rename from litellm/proxy/example_config_yaml/custom_auth.py
rename to litellm_proxy/example_config_yaml/custom_auth.py
index e59db22719..faa3e89e8d 100644
--- a/litellm/proxy/example_config_yaml/custom_auth.py
+++ b/litellm_proxy/example_config_yaml/custom_auth.py
@@ -2,7 +2,7 @@ import os
 
 from fastapi import Request
 
-from litellm.proxy._types import GenerateKeyRequest, UserAPIKeyAuth
+from litellm_proxy._types import GenerateKeyRequest, UserAPIKeyAuth
 
 
 async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
diff --git a/litellm/proxy/example_config_yaml/custom_auth_basic.py b/litellm_proxy/example_config_yaml/custom_auth_basic.py
similarity index 87%
rename from litellm/proxy/example_config_yaml/custom_auth_basic.py
rename to litellm_proxy/example_config_yaml/custom_auth_basic.py
index 4d633a54fe..4651618223 100644
--- a/litellm/proxy/example_config_yaml/custom_auth_basic.py
+++ b/litellm_proxy/example_config_yaml/custom_auth_basic.py
@@ -1,6 +1,6 @@
 from fastapi import Request
 
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 
 
 async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
diff --git a/litellm/proxy/example_config_yaml/custom_callbacks.py b/litellm_proxy/example_config_yaml/custom_callbacks.py
similarity index 100%
rename from litellm/proxy/example_config_yaml/custom_callbacks.py
rename to litellm_proxy/example_config_yaml/custom_callbacks.py
diff --git a/litellm/proxy/example_config_yaml/custom_callbacks1.py b/litellm_proxy/example_config_yaml/custom_callbacks1.py
similarity index 96%
rename from litellm/proxy/example_config_yaml/custom_callbacks1.py
rename to litellm_proxy/example_config_yaml/custom_callbacks1.py
index 2cc644a184..6e9618e273 100644
--- a/litellm/proxy/example_config_yaml/custom_callbacks1.py
+++ b/litellm_proxy/example_config_yaml/custom_callbacks1.py
@@ -2,7 +2,7 @@ from typing import Literal, Optional
 
 import litellm
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.proxy.proxy_server import DualCache, UserAPIKeyAuth
+from litellm_proxy.proxy_server import DualCache, UserAPIKeyAuth
 
 
 # This file includes the custom callbacks for LiteLLM Proxy
diff --git a/litellm/proxy/example_config_yaml/custom_guardrail.py b/litellm_proxy/example_config_yaml/custom_guardrail.py
similarity index 97%
rename from litellm/proxy/example_config_yaml/custom_guardrail.py
rename to litellm_proxy/example_config_yaml/custom_guardrail.py
index 5a5c784410..a71be58aef 100644
--- a/litellm/proxy/example_config_yaml/custom_guardrail.py
+++ b/litellm_proxy/example_config_yaml/custom_guardrail.py
@@ -4,8 +4,8 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_guardrail import CustomGuardrail
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
 
 
 class myCustomGuardrail(CustomGuardrail):
diff --git a/litellm/proxy/example_config_yaml/custom_handler.py b/litellm_proxy/example_config_yaml/custom_handler.py
similarity index 100%
rename from litellm/proxy/example_config_yaml/custom_handler.py
rename to litellm_proxy/example_config_yaml/custom_handler.py
diff --git a/litellm/proxy/example_config_yaml/disable_schema_update.yaml b/litellm_proxy/example_config_yaml/disable_schema_update.yaml
similarity index 100%
rename from litellm/proxy/example_config_yaml/disable_schema_update.yaml
rename to litellm_proxy/example_config_yaml/disable_schema_update.yaml
diff --git a/litellm/proxy/example_config_yaml/enterprise_config.yaml b/litellm_proxy/example_config_yaml/enterprise_config.yaml
similarity index 100%
rename from litellm/proxy/example_config_yaml/enterprise_config.yaml
rename to litellm_proxy/example_config_yaml/enterprise_config.yaml
diff --git a/litellm/proxy/example_config_yaml/langfuse_config.yaml b/litellm_proxy/example_config_yaml/langfuse_config.yaml
similarity index 100%
rename from litellm/proxy/example_config_yaml/langfuse_config.yaml
rename to litellm_proxy/example_config_yaml/langfuse_config.yaml
diff --git a/litellm/proxy/example_config_yaml/load_balancer.yaml b/litellm_proxy/example_config_yaml/load_balancer.yaml
similarity index 100%
rename from litellm/proxy/example_config_yaml/load_balancer.yaml
rename to litellm_proxy/example_config_yaml/load_balancer.yaml
diff --git a/litellm/proxy/example_config_yaml/multi_instance_simple_config.yaml b/litellm_proxy/example_config_yaml/multi_instance_simple_config.yaml
similarity index 100%
rename from litellm/proxy/example_config_yaml/multi_instance_simple_config.yaml
rename to litellm_proxy/example_config_yaml/multi_instance_simple_config.yaml
diff --git a/litellm/proxy/example_config_yaml/oai_misc_config.yaml b/litellm_proxy/example_config_yaml/oai_misc_config.yaml
similarity index 100%
rename from litellm/proxy/example_config_yaml/oai_misc_config.yaml
rename to litellm_proxy/example_config_yaml/oai_misc_config.yaml
diff --git a/litellm/proxy/example_config_yaml/opentelemetry_config.yaml b/litellm_proxy/example_config_yaml/opentelemetry_config.yaml
similarity index 100%
rename from litellm/proxy/example_config_yaml/opentelemetry_config.yaml
rename to litellm_proxy/example_config_yaml/opentelemetry_config.yaml
diff --git a/litellm/proxy/example_config_yaml/otel_test_config.yaml b/litellm_proxy/example_config_yaml/otel_test_config.yaml
similarity index 100%
rename from litellm/proxy/example_config_yaml/otel_test_config.yaml
rename to litellm_proxy/example_config_yaml/otel_test_config.yaml
diff --git a/litellm/proxy/example_config_yaml/pass_through_config.yaml b/litellm_proxy/example_config_yaml/pass_through_config.yaml
similarity index 100%
rename from litellm/proxy/example_config_yaml/pass_through_config.yaml
rename to litellm_proxy/example_config_yaml/pass_through_config.yaml
diff --git a/litellm/proxy/example_config_yaml/simple_config.yaml b/litellm_proxy/example_config_yaml/simple_config.yaml
similarity index 100%
rename from litellm/proxy/example_config_yaml/simple_config.yaml
rename to litellm_proxy/example_config_yaml/simple_config.yaml
diff --git a/litellm/proxy/example_config_yaml/spend_tracking_config.yaml b/litellm_proxy/example_config_yaml/spend_tracking_config.yaml
similarity index 100%
rename from litellm/proxy/example_config_yaml/spend_tracking_config.yaml
rename to litellm_proxy/example_config_yaml/spend_tracking_config.yaml
diff --git a/litellm/proxy/example_config_yaml/store_model_db_config.yaml b/litellm_proxy/example_config_yaml/store_model_db_config.yaml
similarity index 100%
rename from litellm/proxy/example_config_yaml/store_model_db_config.yaml
rename to litellm_proxy/example_config_yaml/store_model_db_config.yaml
diff --git a/litellm/proxy/fine_tuning_endpoints/endpoints.py b/litellm_proxy/fine_tuning_endpoints/endpoints.py
similarity index 95%
rename from litellm/proxy/fine_tuning_endpoints/endpoints.py
rename to litellm_proxy/fine_tuning_endpoints/endpoints.py
index d4c4250b37..ee94a4dfbb 100644
--- a/litellm/proxy/fine_tuning_endpoints/endpoints.py
+++ b/litellm_proxy/fine_tuning_endpoints/endpoints.py
@@ -13,10 +13,10 @@ from fastapi import APIRouter, Depends, Request, Response
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import *
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
-from litellm.proxy.utils import handle_exception_on_proxy
+from litellm_proxy._types import *
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.common_request_processing import ProxyBaseLLMRequestProcessing
+from litellm_proxy.utils import handle_exception_on_proxy
 
 router = APIRouter()
 
@@ -95,7 +95,7 @@ async def create_fine_tuning_job(
       }'
     ```
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
         premium_user,
@@ -167,7 +167,7 @@ async def create_fine_tuning_job(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.create_fine_tuning_job(): Exception occurred - {}".format(
+            "litellm_proxy.proxy_server.create_fine_tuning_job(): Exception occurred - {}".format(
                 str(e)
             )
         )
@@ -202,7 +202,7 @@ async def retrieve_fine_tuning_job(
     - `custom_llm_provider`: Name of the LiteLLM provider
     - `fine_tuning_job_id`: The ID of the fine-tuning job to retrieve.
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
         premium_user,
@@ -264,7 +264,7 @@ async def retrieve_fine_tuning_job(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.list_fine_tuning_jobs(): Exception occurred - {}".format(
+            "litellm_proxy.proxy_server.list_fine_tuning_jobs(): Exception occurred - {}".format(
                 str(e)
             )
         )
@@ -301,7 +301,7 @@ async def list_fine_tuning_jobs(
     - `after`: Identifier for the last job from the previous pagination request.
     - `limit`: Number of fine-tuning jobs to retrieve (default is 20).
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
         premium_user,
@@ -364,7 +364,7 @@ async def list_fine_tuning_jobs(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.list_fine_tuning_jobs(): Exception occurred - {}".format(
+            "litellm_proxy.proxy_server.list_fine_tuning_jobs(): Exception occurred - {}".format(
                 str(e)
             )
         )
@@ -399,7 +399,7 @@ async def cancel_fine_tuning_job(
     - `custom_llm_provider`: Name of the LiteLLM provider
     - `fine_tuning_job_id`: The ID of the fine-tuning job to cancel.
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
         premium_user,
@@ -465,7 +465,7 @@ async def cancel_fine_tuning_job(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.list_fine_tuning_jobs(): Exception occurred - {}".format(
+            "litellm_proxy.proxy_server.list_fine_tuning_jobs(): Exception occurred - {}".format(
                 str(e)
             )
         )
diff --git a/litellm/proxy/guardrails/guardrail_endpoints.py b/litellm_proxy/guardrails/guardrail_endpoints.py
similarity index 94%
rename from litellm/proxy/guardrails/guardrail_endpoints.py
rename to litellm_proxy/guardrails/guardrail_endpoints.py
index 7407d6fb12..6599471185 100644
--- a/litellm/proxy/guardrails/guardrail_endpoints.py
+++ b/litellm_proxy/guardrails/guardrail_endpoints.py
@@ -6,7 +6,7 @@ from typing import Dict, List, Optional, cast
 
 from fastapi import APIRouter, Depends
 
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
 from litellm.types.guardrails import GuardrailInfoResponse, ListGuardrailsResponse
 
 #### GUARDRAILS ENDPOINTS ####
@@ -73,7 +73,7 @@ async def list_guardrails():
     }
     ```
     """
-    from litellm.proxy.proxy_server import proxy_config
+    from litellm_proxy.proxy_server import proxy_config
 
     config = proxy_config.config
 
diff --git a/litellm/proxy/guardrails/guardrail_helpers.py b/litellm_proxy/guardrails/guardrail_helpers.py
similarity index 98%
rename from litellm/proxy/guardrails/guardrail_helpers.py
rename to litellm_proxy/guardrails/guardrail_helpers.py
index e970311460..58bd95bd1a 100644
--- a/litellm/proxy/guardrails/guardrail_helpers.py
+++ b/litellm_proxy/guardrails/guardrail_helpers.py
@@ -4,7 +4,7 @@ from typing import Dict
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy.proxy_server import LiteLLM_TeamTable, UserAPIKeyAuth
+from litellm_proxy.proxy_server import LiteLLM_TeamTable, UserAPIKeyAuth
 from litellm.types.guardrails import *
 
 sys.path.insert(
diff --git a/litellm/proxy/guardrails/guardrail_hooks/aim.py b/litellm_proxy/guardrails/guardrail_hooks/aim.py
similarity index 98%
rename from litellm/proxy/guardrails/guardrail_hooks/aim.py
rename to litellm_proxy/guardrails/guardrail_hooks/aim.py
index 86a9cf778a..d30508dbb4 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/aim.py
+++ b/litellm_proxy/guardrails/guardrail_hooks/aim.py
@@ -21,8 +21,8 @@ from litellm.llms.custom_httpx.http_handler import (
     get_async_httpx_client,
     httpxSpecialProvider,
 )
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.proxy_server import StreamingCallbackError
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.proxy_server import StreamingCallbackError
 from litellm.types.utils import (
     Choices,
     EmbeddingResponse,
diff --git a/litellm/proxy/guardrails/guardrail_hooks/aporia_ai.py b/litellm_proxy/guardrails/guardrail_hooks/aporia_ai.py
similarity index 96%
rename from litellm/proxy/guardrails/guardrail_hooks/aporia_ai.py
rename to litellm_proxy/guardrails/guardrail_hooks/aporia_ai.py
index 3c39b90b0a..75aa706c19 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/aporia_ai.py
+++ b/litellm_proxy/guardrails/guardrail_hooks/aporia_ai.py
@@ -30,8 +30,8 @@ from litellm.llms.custom_httpx.http_handler import (
     get_async_httpx_client,
     httpxSpecialProvider,
 )
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
 from litellm.types.guardrails import GuardrailEventHooks
 
 litellm.set_verbose = True
@@ -152,7 +152,7 @@ class AporiaGuardrail(CustomGuardrail):
         user_api_key_dict: UserAPIKeyAuth,
         response,
     ):
-        from litellm.proxy.common_utils.callback_utils import (
+        from litellm_proxy.common_utils.callback_utils import (
             add_guardrail_to_applied_guardrails_header,
         )
 
@@ -191,7 +191,7 @@ class AporiaGuardrail(CustomGuardrail):
             "responses",
         ],
     ):
-        from litellm.proxy.common_utils.callback_utils import (
+        from litellm_proxy.common_utils.callback_utils import (
             add_guardrail_to_applied_guardrails_header,
         )
 
diff --git a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py b/litellm_proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
similarity index 98%
rename from litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
rename to litellm_proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
index 5c6b53be25..3487c15f5d 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
+++ b/litellm_proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
@@ -31,7 +31,7 @@ from litellm.llms.custom_httpx.http_handler import (
     get_async_httpx_client,
     httpxSpecialProvider,
 )
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.secret_managers.main import get_secret
 from litellm.types.guardrails import (
     BedrockContentItem,
@@ -252,7 +252,7 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM):
             "responses",
         ],
     ):
-        from litellm.proxy.common_utils.callback_utils import (
+        from litellm_proxy.common_utils.callback_utils import (
             add_guardrail_to_applied_guardrails_header,
         )
 
@@ -279,7 +279,7 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM):
         user_api_key_dict: UserAPIKeyAuth,
         response,
     ):
-        from litellm.proxy.common_utils.callback_utils import (
+        from litellm_proxy.common_utils.callback_utils import (
             add_guardrail_to_applied_guardrails_header,
         )
         from litellm.types.guardrails import GuardrailEventHooks
diff --git a/litellm/proxy/guardrails/guardrail_hooks/custom_guardrail.py b/litellm_proxy/guardrails/guardrail_hooks/custom_guardrail.py
similarity index 98%
rename from litellm/proxy/guardrails/guardrail_hooks/custom_guardrail.py
rename to litellm_proxy/guardrails/guardrail_hooks/custom_guardrail.py
index 87860477f0..563b6c6981 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/custom_guardrail.py
+++ b/litellm_proxy/guardrails/guardrail_hooks/custom_guardrail.py
@@ -7,7 +7,7 @@ from litellm.integrations.custom_guardrail import (
     CustomGuardrail,
     log_guardrail_information,
 )
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 
 
 class myCustomGuardrail(CustomGuardrail):
diff --git a/litellm/proxy/guardrails/guardrail_hooks/guardrails_ai.py b/litellm_proxy/guardrails/guardrail_hooks/guardrails_ai.py
similarity index 97%
rename from litellm/proxy/guardrails/guardrail_hooks/guardrails_ai.py
rename to litellm_proxy/guardrails/guardrail_hooks/guardrails_ai.py
index 1a2c5a217b..7a50bf58aa 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/guardrails_ai.py
+++ b/litellm_proxy/guardrails/guardrail_hooks/guardrails_ai.py
@@ -19,8 +19,8 @@ from litellm.integrations.custom_guardrail import (
 from litellm.litellm_core_utils.prompt_templates.common_utils import (
     get_content_from_model_response,
 )
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.common_utils.callback_utils import (
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.common_utils.callback_utils import (
     add_guardrail_to_applied_guardrails_header,
 )
 from litellm.types.guardrails import GuardrailEventHooks
diff --git a/litellm/proxy/guardrails/guardrail_hooks/lakera_ai.py b/litellm_proxy/guardrails/guardrail_hooks/lakera_ai.py
similarity index 99%
rename from litellm/proxy/guardrails/guardrail_hooks/lakera_ai.py
rename to litellm_proxy/guardrails/guardrail_hooks/lakera_ai.py
index 2dd8a3154a..186061058d 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/lakera_ai.py
+++ b/litellm_proxy/guardrails/guardrail_hooks/lakera_ai.py
@@ -28,8 +28,8 @@ from litellm.llms.custom_httpx.http_handler import (
     get_async_httpx_client,
     httpxSpecialProvider,
 )
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
 from litellm.secret_managers.main import get_secret
 from litellm.types.guardrails import (
     GuardrailItem,
diff --git a/litellm/proxy/guardrails/guardrail_hooks/presidio.py b/litellm_proxy/guardrails/guardrail_hooks/presidio.py
similarity index 99%
rename from litellm/proxy/guardrails/guardrail_hooks/presidio.py
rename to litellm_proxy/guardrails/guardrail_hooks/presidio.py
index 0c7d2a1fe6..49b98e7897 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/presidio.py
+++ b/litellm_proxy/guardrails/guardrail_hooks/presidio.py
@@ -24,7 +24,7 @@ from litellm.integrations.custom_guardrail import (
     CustomGuardrail,
     log_guardrail_information,
 )
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.types.guardrails import GuardrailEventHooks
 from litellm.utils import (
     EmbeddingResponse,
diff --git a/litellm/proxy/guardrails/guardrail_initializers.py b/litellm_proxy/guardrails/guardrail_initializers.py
similarity index 91%
rename from litellm/proxy/guardrails/guardrail_initializers.py
rename to litellm_proxy/guardrails/guardrail_initializers.py
index c32d75f986..880906b901 100644
--- a/litellm/proxy/guardrails/guardrail_initializers.py
+++ b/litellm_proxy/guardrails/guardrail_initializers.py
@@ -4,7 +4,7 @@ from litellm.types.guardrails import *
 
 
 def initialize_aporia(litellm_params, guardrail):
-    from litellm.proxy.guardrails.guardrail_hooks.aporia_ai import AporiaGuardrail
+    from litellm_proxy.guardrails.guardrail_hooks.aporia_ai import AporiaGuardrail
 
     _aporia_callback = AporiaGuardrail(
         api_base=litellm_params["api_base"],
@@ -17,7 +17,7 @@ def initialize_aporia(litellm_params, guardrail):
 
 
 def initialize_bedrock(litellm_params, guardrail):
-    from litellm.proxy.guardrails.guardrail_hooks.bedrock_guardrails import (
+    from litellm_proxy.guardrails.guardrail_hooks.bedrock_guardrails import (
         BedrockGuardrail,
     )
 
@@ -32,7 +32,7 @@ def initialize_bedrock(litellm_params, guardrail):
 
 
 def initialize_lakera(litellm_params, guardrail):
-    from litellm.proxy.guardrails.guardrail_hooks.lakera_ai import lakeraAI_Moderation
+    from litellm_proxy.guardrails.guardrail_hooks.lakera_ai import lakeraAI_Moderation
 
     _lakera_callback = lakeraAI_Moderation(
         api_base=litellm_params["api_base"],
@@ -46,7 +46,7 @@ def initialize_lakera(litellm_params, guardrail):
 
 
 def initialize_aim(litellm_params, guardrail):
-    from litellm.proxy.guardrails.guardrail_hooks.aim import AimGuardrail
+    from litellm_proxy.guardrails.guardrail_hooks.aim import AimGuardrail
 
     _aim_callback = AimGuardrail(
         api_base=litellm_params["api_base"],
@@ -59,7 +59,7 @@ def initialize_aim(litellm_params, guardrail):
 
 
 def initialize_presidio(litellm_params, guardrail):
-    from litellm.proxy.guardrails.guardrail_hooks.presidio import (
+    from litellm_proxy.guardrails.guardrail_hooks.presidio import (
         _OPTIONAL_PresidioPIIMasking,
     )
 
@@ -97,7 +97,7 @@ def initialize_hide_secrets(litellm_params, guardrail):
 
 
 def initialize_guardrails_ai(litellm_params, guardrail):
-    from litellm.proxy.guardrails.guardrail_hooks.guardrails_ai import GuardrailsAI
+    from litellm_proxy.guardrails.guardrail_hooks.guardrails_ai import GuardrailsAI
 
     _guard_name = litellm_params.get("guard_name")
     if not _guard_name:
diff --git a/litellm/proxy/guardrails/guardrail_registry.py b/litellm_proxy/guardrails/guardrail_registry.py
similarity index 100%
rename from litellm/proxy/guardrails/guardrail_registry.py
rename to litellm_proxy/guardrails/guardrail_registry.py
diff --git a/litellm/proxy/guardrails/init_guardrails.py b/litellm_proxy/guardrails/init_guardrails.py
similarity index 98%
rename from litellm/proxy/guardrails/init_guardrails.py
rename to litellm_proxy/guardrails/init_guardrails.py
index d08dcaba25..63bed8d305 100644
--- a/litellm/proxy/guardrails/init_guardrails.py
+++ b/litellm_proxy/guardrails/init_guardrails.py
@@ -5,7 +5,7 @@ from typing import Dict, List, Optional
 import litellm
 from litellm import get_secret
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy.common_utils.callback_utils import initialize_callbacks_on_proxy
+from litellm_proxy.common_utils.callback_utils import initialize_callbacks_on_proxy
 
 # v2 implementation
 from litellm.types.guardrails import (
diff --git a/litellm/proxy/health_check.py b/litellm_proxy/health_check.py
similarity index 100%
rename from litellm/proxy/health_check.py
rename to litellm_proxy/health_check.py
diff --git a/litellm/proxy/health_endpoints/_health_endpoints.py b/litellm_proxy/health_endpoints/_health_endpoints.py
similarity index 97%
rename from litellm/proxy/health_endpoints/_health_endpoints.py
rename to litellm_proxy/health_endpoints/_health_endpoints.py
index 9de845397a..e45ab0c575 100644
--- a/litellm/proxy/health_endpoints/_health_endpoints.py
+++ b/litellm_proxy/health_endpoints/_health_endpoints.py
@@ -11,7 +11,7 @@ from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
 import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.constants import HEALTH_CHECK_TIMEOUT_SECONDS
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     AlertType,
     CallInfo,
     ProxyErrorTypes,
@@ -19,9 +19,9 @@ from litellm.proxy._types import (
     UserAPIKeyAuth,
     WebhookEvent,
 )
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.db.exception_handler import PrismaDBExceptionHandler
-from litellm.proxy.health_check import (
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.db.exception_handler import PrismaDBExceptionHandler
+from litellm_proxy.health_check import (
     _clean_endpoint_data,
     _update_litellm_params_for_health_check,
     perform_health_check,
@@ -85,7 +85,7 @@ async def health_services_endpoint(  # noqa: PLR0915
     ```
     """
     try:
-        from litellm.proxy.proxy_server import (
+        from litellm_proxy.proxy_server import (
             general_settings,
             prisma_client,
             proxy_logging_obj,
@@ -274,7 +274,7 @@ async def health_services_endpoint(  # noqa: PLR0915
 
     except Exception as e:
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.health_services_endpoint(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.health_services_endpoint(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -319,7 +319,7 @@ async def health_endpoint(
     ```
     else, the health checks will be run on models when /health is called.
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         health_check_details,
         health_check_results,
         llm_model_list,
@@ -365,7 +365,7 @@ async def health_endpoint(
             }
     except Exception as e:
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.py::health_endpoint(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.py::health_endpoint(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -377,7 +377,7 @@ db_health_cache = {"status": "unknown", "last_updated": datetime.now()}
 
 
 async def _db_health_readiness_check():
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     global db_health_cache
 
@@ -435,7 +435,7 @@ async def active_callbacks():
     ```
     """
 
-    from litellm.proxy.proxy_server import general_settings, proxy_logging_obj
+    from litellm_proxy.proxy_server import general_settings, proxy_logging_obj
 
     _alerting = str(general_settings.get("alerting"))
     # get success callbacks
@@ -501,7 +501,7 @@ async def health_readiness():
     """
     Unprotected endpoint for checking if worker can receive requests
     """
-    from litellm.proxy.proxy_server import prisma_client, version
+    from litellm_proxy.proxy_server import prisma_client, version
 
     try:
         # get success callback
@@ -693,7 +693,7 @@ async def test_model_connection(
 
     except Exception as e:
         verbose_proxy_logger.error(
-            f"litellm.proxy.health_endpoints.test_model_connection(): Exception occurred - {str(e)}"
+            f"litellm_proxy.health_endpoints.test_model_connection(): Exception occurred - {str(e)}"
         )
         verbose_proxy_logger.debug(traceback.format_exc())
         raise HTTPException(
diff --git a/litellm/proxy/hooks/__init__.py b/litellm_proxy/hooks/__init__.py
similarity index 100%
rename from litellm/proxy/hooks/__init__.py
rename to litellm_proxy/hooks/__init__.py
diff --git a/litellm/proxy/hooks/azure_content_safety.py b/litellm_proxy/hooks/azure_content_safety.py
similarity index 98%
rename from litellm/proxy/hooks/azure_content_safety.py
rename to litellm_proxy/hooks/azure_content_safety.py
index b35d671117..4e88c1cb3d 100644
--- a/litellm/proxy/hooks/azure_content_safety.py
+++ b/litellm_proxy/hooks/azure_content_safety.py
@@ -7,7 +7,7 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 
 
 class _PROXY_AzureContentSafety(
@@ -127,7 +127,7 @@ class _PROXY_AzureContentSafety(
             raise e
         except Exception as e:
             verbose_proxy_logger.error(
-                "litellm.proxy.hooks.azure_content_safety.py::async_pre_call_hook(): Exception occured - {}".format(
+                "litellm_proxy.hooks.azure_content_safety.py::async_pre_call_hook(): Exception occured - {}".format(
                     str(e)
                 )
             )
diff --git a/litellm/proxy/hooks/batch_redis_get.py b/litellm_proxy/hooks/batch_redis_get.py
similarity index 98%
rename from litellm/proxy/hooks/batch_redis_get.py
rename to litellm_proxy/hooks/batch_redis_get.py
index c608317f4e..d0fae88220 100644
--- a/litellm/proxy/hooks/batch_redis_get.py
+++ b/litellm_proxy/hooks/batch_redis_get.py
@@ -12,7 +12,7 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 
 
 class _PROXY_BatchRedisRequests(CustomLogger):
@@ -98,7 +98,7 @@ class _PROXY_BatchRedisRequests(CustomLogger):
             raise e
         except Exception as e:
             verbose_proxy_logger.error(
-                "litellm.proxy.hooks.batch_redis_get.py::async_pre_call_hook(): Exception occured - {}".format(
+                "litellm_proxy.hooks.batch_redis_get.py::async_pre_call_hook(): Exception occured - {}".format(
                     str(e)
                 )
             )
diff --git a/litellm/proxy/hooks/cache_control_check.py b/litellm_proxy/hooks/cache_control_check.py
similarity index 94%
rename from litellm/proxy/hooks/cache_control_check.py
rename to litellm_proxy/hooks/cache_control_check.py
index 6e3fbf84fa..23d01a4bed 100644
--- a/litellm/proxy/hooks/cache_control_check.py
+++ b/litellm_proxy/hooks/cache_control_check.py
@@ -8,7 +8,7 @@ from litellm import verbose_logger
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 
 
 class _PROXY_CacheControlCheck(CustomLogger):
@@ -52,7 +52,7 @@ class _PROXY_CacheControlCheck(CustomLogger):
             raise e
         except Exception as e:
             verbose_logger.exception(
-                "litellm.proxy.hooks.cache_control_check.py::async_pre_call_hook(): Exception occured - {}".format(
+                "litellm_proxy.hooks.cache_control_check.py::async_pre_call_hook(): Exception occured - {}".format(
                     str(e)
                 )
             )
diff --git a/litellm/proxy/hooks/dynamic_rate_limiter.py b/litellm_proxy/hooks/dynamic_rate_limiter.py
similarity index 98%
rename from litellm/proxy/hooks/dynamic_rate_limiter.py
rename to litellm_proxy/hooks/dynamic_rate_limiter.py
index e06366d02b..e5027a7728 100644
--- a/litellm/proxy/hooks/dynamic_rate_limiter.py
+++ b/litellm_proxy/hooks/dynamic_rate_limiter.py
@@ -13,7 +13,7 @@ from litellm import ModelResponse, Router
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.types.router import ModelGroupInfo
 from litellm.utils import get_utc_datetime
 
@@ -63,7 +63,7 @@ class DynamicRateLimiterCache:
             )
         except Exception as e:
             verbose_proxy_logger.exception(
-                "litellm.proxy.hooks.dynamic_rate_limiter.py::async_set_cache_sadd(): Exception occured - {}".format(
+                "litellm_proxy.hooks.dynamic_rate_limiter.py::async_set_cache_sadd(): Exception occured - {}".format(
                     str(e)
                 )
             )
@@ -177,7 +177,7 @@ class _PROXY_DynamicRateLimitHandler(CustomLogger):
             )
         except Exception as e:
             verbose_proxy_logger.exception(
-                "litellm.proxy.hooks.dynamic_rate_limiter.py::check_available_usage: Exception occurred - {}".format(
+                "litellm_proxy.hooks.dynamic_rate_limiter.py::check_available_usage: Exception occurred - {}".format(
                     str(e)
                 )
             )
@@ -299,7 +299,7 @@ class _PROXY_DynamicRateLimitHandler(CustomLogger):
             )
         except Exception as e:
             verbose_proxy_logger.exception(
-                "litellm.proxy.hooks.dynamic_rate_limiter.py::async_post_call_success_hook(): Exception occured - {}".format(
+                "litellm_proxy.hooks.dynamic_rate_limiter.py::async_post_call_success_hook(): Exception occured - {}".format(
                     str(e)
                 )
             )
diff --git a/litellm/proxy/hooks/example_presidio_ad_hoc_recognizer.json b/litellm_proxy/hooks/example_presidio_ad_hoc_recognizer.json
similarity index 100%
rename from litellm/proxy/hooks/example_presidio_ad_hoc_recognizer.json
rename to litellm_proxy/hooks/example_presidio_ad_hoc_recognizer.json
diff --git a/litellm/proxy/hooks/key_management_event_hooks.py b/litellm_proxy/hooks/key_management_event_hooks.py
similarity index 96%
rename from litellm/proxy/hooks/key_management_event_hooks.py
rename to litellm_proxy/hooks/key_management_event_hooks.py
index c2c4f0669f..9024fd6a7d 100644
--- a/litellm/proxy/hooks/key_management_event_hooks.py
+++ b/litellm_proxy/hooks/key_management_event_hooks.py
@@ -8,7 +8,7 @@ from fastapi import status
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     GenerateKeyRequest,
     GenerateKeyResponse,
     KeyRequest,
@@ -43,10 +43,10 @@ class KeyManagementEventHooks:
         - Storing Audit Logs for key generation
         - Storing Generated Key in DB
         """
-        from litellm.proxy.management_helpers.audit_logs import (
+        from litellm_proxy.management_helpers.audit_logs import (
             create_audit_log_for_update,
         )
-        from litellm.proxy.proxy_server import litellm_proxy_admin_name
+        from litellm_proxy.proxy_server import litellm_proxy_admin_name
 
         if data.send_invite_email is True:
             await KeyManagementEventHooks._send_key_created_email(
@@ -93,10 +93,10 @@ class KeyManagementEventHooks:
         Handles the following:
         - Storing Audit Logs for key update
         """
-        from litellm.proxy.management_helpers.audit_logs import (
+        from litellm_proxy.management_helpers.audit_logs import (
             create_audit_log_for_update,
         )
-        from litellm.proxy.proxy_server import litellm_proxy_admin_name
+        from litellm_proxy.proxy_server import litellm_proxy_admin_name
 
         # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
         if litellm.store_audit_logs is True:
@@ -156,10 +156,10 @@ class KeyManagementEventHooks:
         Handles the following:
         - Storing Audit Logs for key deletion
         """
-        from litellm.proxy.management_helpers.audit_logs import (
+        from litellm_proxy.management_helpers.audit_logs import (
             create_audit_log_for_update,
         )
-        from litellm.proxy.proxy_server import litellm_proxy_admin_name, prisma_client
+        from litellm_proxy.proxy_server import litellm_proxy_admin_name, prisma_client
 
         # Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
         # we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
@@ -297,7 +297,7 @@ class KeyManagementEventHooks:
 
     @staticmethod
     async def _send_key_created_email(response: dict):
-        from litellm.proxy.proxy_server import general_settings, proxy_logging_obj
+        from litellm_proxy.proxy_server import general_settings, proxy_logging_obj
 
         if "email" not in general_settings.get("alerting", []):
             raise ValueError(
diff --git a/litellm/proxy/hooks/managed_files.py b/litellm_proxy/hooks/managed_files.py
similarity index 98%
rename from litellm/proxy/hooks/managed_files.py
rename to litellm_proxy/hooks/managed_files.py
index 9ac6cc580b..c599f3933d 100644
--- a/litellm/proxy/hooks/managed_files.py
+++ b/litellm_proxy/hooks/managed_files.py
@@ -11,7 +11,7 @@ from litellm import Router, verbose_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.prompt_templates.common_utils import extract_file_data
-from litellm.proxy._types import CallTypes, LiteLLM_ManagedFileTable, UserAPIKeyAuth
+from litellm_proxy._types import CallTypes, LiteLLM_ManagedFileTable, UserAPIKeyAuth
 from litellm.types.llms.openai import (
     AllMessageValues,
     ChatCompletionFileObject,
@@ -24,8 +24,8 @@ from litellm.types.utils import SpecialEnums
 if TYPE_CHECKING:
     from opentelemetry.trace import Span as _Span
 
-    from litellm.proxy.utils import InternalUsageCache as _InternalUsageCache
-    from litellm.proxy.utils import PrismaClient as _PrismaClient
+    from litellm_proxy.utils import InternalUsageCache as _InternalUsageCache
+    from litellm_proxy.utils import PrismaClient as _PrismaClient
 
     Span = Union[_Span, Any]
     InternalUsageCache = _InternalUsageCache
diff --git a/litellm/proxy/hooks/max_budget_limiter.py b/litellm_proxy/hooks/max_budget_limiter.py
similarity index 93%
rename from litellm/proxy/hooks/max_budget_limiter.py
rename to litellm_proxy/hooks/max_budget_limiter.py
index 4b59f603d3..6017222dc8 100644
--- a/litellm/proxy/hooks/max_budget_limiter.py
+++ b/litellm_proxy/hooks/max_budget_limiter.py
@@ -4,7 +4,7 @@ from litellm import verbose_logger
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 
 
 class _PROXY_MaxBudgetLimiter(CustomLogger):
@@ -43,7 +43,7 @@ class _PROXY_MaxBudgetLimiter(CustomLogger):
             raise e
         except Exception as e:
             verbose_logger.exception(
-                "litellm.proxy.hooks.max_budget_limiter.py::async_pre_call_hook(): Exception occured - {}".format(
+                "litellm_proxy.hooks.max_budget_limiter.py::async_pre_call_hook(): Exception occured - {}".format(
                     str(e)
                 )
             )
diff --git a/litellm/proxy/hooks/model_max_budget_limiter.py b/litellm_proxy/hooks/model_max_budget_limiter.py
similarity index 99%
rename from litellm/proxy/hooks/model_max_budget_limiter.py
rename to litellm_proxy/hooks/model_max_budget_limiter.py
index ac02c91536..0b74acffcf 100644
--- a/litellm/proxy/hooks/model_max_budget_limiter.py
+++ b/litellm_proxy/hooks/model_max_budget_limiter.py
@@ -5,7 +5,7 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_logger import Span
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.router_strategy.budget_limiter import RouterBudgetLimiting
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import (
diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm_proxy/hooks/parallel_request_limiter.py
similarity index 98%
rename from litellm/proxy/hooks/parallel_request_limiter.py
rename to litellm_proxy/hooks/parallel_request_limiter.py
index 242c013d67..6bee52b2d5 100644
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm_proxy/hooks/parallel_request_limiter.py
@@ -11,8 +11,8 @@ from litellm import DualCache, ModelResponse
 from litellm._logging import verbose_proxy_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
-from litellm.proxy._types import CommonProxyErrors, CurrentItemRateLimit, UserAPIKeyAuth
-from litellm.proxy.auth.auth_utils import (
+from litellm_proxy._types import CommonProxyErrors, CurrentItemRateLimit, UserAPIKeyAuth
+from litellm_proxy.auth.auth_utils import (
     get_key_model_rpm_limit,
     get_key_model_tpm_limit,
 )
@@ -20,7 +20,7 @@ from litellm.proxy.auth.auth_utils import (
 if TYPE_CHECKING:
     from opentelemetry.trace import Span as _Span
 
-    from litellm.proxy.utils import InternalUsageCache as _InternalUsageCache
+    from litellm_proxy.utils import InternalUsageCache as _InternalUsageCache
 
     Span = Union[_Span, Any]
     InternalUsageCache = _InternalUsageCache
@@ -447,7 +447,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
     async def async_log_success_event(  # noqa: PLR0915
         self, kwargs, response_obj, start_time, end_time
     ):
-        from litellm.proxy.common_utils.callback_utils import (
+        from litellm_proxy.common_utils.callback_utils import (
             get_model_group_from_litellm_kwargs,
         )
 
@@ -769,13 +769,13 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
         """
         Helper to get the 'Internal User Object'
 
-        It uses the `get_user_object` function from `litellm.proxy.auth.auth_checks`
+        It uses the `get_user_object` function from `litellm_proxy.auth.auth_checks`
 
         We need this because the UserApiKeyAuth object does not contain the rpm/tpm limits for a User AND there could be a perf impact by additionally reading the UserTable.
         """
         from litellm._logging import verbose_proxy_logger
-        from litellm.proxy.auth.auth_checks import get_user_object
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.auth.auth_checks import get_user_object
+        from litellm_proxy.proxy_server import prisma_client
 
         try:
             _user_id_rate_limits = await get_user_object(
diff --git a/litellm/proxy/hooks/prompt_injection_detection.py b/litellm_proxy/hooks/prompt_injection_detection.py
similarity index 98%
rename from litellm/proxy/hooks/prompt_injection_detection.py
rename to litellm_proxy/hooks/prompt_injection_detection.py
index ee5d192555..d35de79141 100644
--- a/litellm/proxy/hooks/prompt_injection_detection.py
+++ b/litellm_proxy/hooks/prompt_injection_detection.py
@@ -20,7 +20,7 @@ from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.prompt_templates.factory import (
     prompt_injection_detection_default_pt,
 )
-from litellm.proxy._types import LiteLLMPromptInjectionParams, UserAPIKeyAuth
+from litellm_proxy._types import LiteLLMPromptInjectionParams, UserAPIKeyAuth
 from litellm.router import Router
 from litellm.utils import get_formatted_prompt
 
@@ -210,7 +210,7 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger):
             raise e
         except Exception as e:
             verbose_proxy_logger.exception(
-                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
+                "litellm_proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
                     str(e)
                 )
             )
diff --git a/litellm/proxy/hooks/proxy_track_cost_callback.py b/litellm_proxy/hooks/proxy_track_cost_callback.py
similarity index 96%
rename from litellm/proxy/hooks/proxy_track_cost_callback.py
rename to litellm_proxy/hooks/proxy_track_cost_callback.py
index 4b8447fb03..f441191515 100644
--- a/litellm/proxy/hooks/proxy_track_cost_callback.py
+++ b/litellm_proxy/hooks/proxy_track_cost_callback.py
@@ -11,10 +11,10 @@ from litellm.litellm_core_utils.core_helpers import (
     get_litellm_metadata_from_kwargs,
 )
 from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.auth.auth_checks import log_db_metrics
-from litellm.proxy.auth.route_checks import RouteChecks
-from litellm.proxy.utils import ProxyUpdateSpend
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.auth.auth_checks import log_db_metrics
+from litellm_proxy.auth.route_checks import RouteChecks
+from litellm_proxy.utils import ProxyUpdateSpend
 from litellm.types.utils import (
     StandardLoggingPayload,
     StandardLoggingUserAPIKeyMetadata,
@@ -42,7 +42,7 @@ class _ProxyDBLogger(CustomLogger):
         ):
             return
 
-        from litellm.proxy.proxy_server import proxy_logging_obj
+        from litellm_proxy.proxy_server import proxy_logging_obj
 
         _metadata = dict(
             StandardLoggingUserAPIKeyMetadata(
@@ -96,7 +96,7 @@ class _ProxyDBLogger(CustomLogger):
         start_time=None,
         end_time=None,  # start/end time for completion
     ):
-        from litellm.proxy.proxy_server import (
+        from litellm_proxy.proxy_server import (
             prisma_client,
             proxy_logging_obj,
             update_cache,
@@ -226,7 +226,7 @@ class _ProxyDBLogger(CustomLogger):
 
         If users want to disable error tracking, they can set the disable_error_logs flag in the general_settings
         """
-        from litellm.proxy.proxy_server import general_settings
+        from litellm_proxy.proxy_server import general_settings
 
         if general_settings.get("disable_error_logs") is True:
             return False
diff --git a/litellm/proxy/lambda.py b/litellm_proxy/lambda.py
similarity index 59%
rename from litellm/proxy/lambda.py
rename to litellm_proxy/lambda.py
index 6b278c4118..68e5d08ada 100644
--- a/litellm/proxy/lambda.py
+++ b/litellm_proxy/lambda.py
@@ -1,4 +1,4 @@
 from mangum import Mangum
-from litellm.proxy.proxy_server import app
+from litellm_proxy.proxy_server import app
 
 handler = Mangum(app, lifespan="on")
diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm_proxy/litellm_pre_call_utils.py
similarity index 98%
rename from litellm/proxy/litellm_pre_call_utils.py
rename to litellm_proxy/litellm_pre_call_utils.py
index 097f798de2..a98bceda33 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm_proxy/litellm_pre_call_utils.py
@@ -9,7 +9,7 @@ from starlette.datastructures import Headers
 import litellm
 from litellm._logging import verbose_logger, verbose_proxy_logger
 from litellm._service_logger import ServiceLogging
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     AddTeamCallback,
     CommonProxyErrors,
     LitellmDataForBackendLLMCall,
@@ -17,7 +17,7 @@ from litellm.proxy._types import (
     TeamCallbackMetadata,
     UserAPIKeyAuth,
 )
-from litellm.proxy.auth.route_checks import RouteChecks
+from litellm_proxy.auth.route_checks import RouteChecks
 from litellm.router import Router
 from litellm.types.llms.anthropic import ANTHROPIC_API_HEADERS
 from litellm.types.services import ServiceTypes
@@ -31,7 +31,7 @@ service_logger_obj = ServiceLogging()  # used for tracking latency on OTEL
 
 
 if TYPE_CHECKING:
-    from litellm.proxy.proxy_server import ProxyConfig as _ProxyConfig
+    from litellm_proxy.proxy_server import ProxyConfig as _ProxyConfig
 
     ProxyConfig = _ProxyConfig
 else:
@@ -470,7 +470,7 @@ async def add_litellm_data_to_request(  # noqa: PLR0915
 
     """
 
-    from litellm.proxy.proxy_server import llm_router, premium_user
+    from litellm_proxy.proxy_server import llm_router, premium_user
 
     safe_add_api_version_from_query_params(data, request)
 
@@ -832,7 +832,7 @@ def _add_guardrails_from_key_or_team_metadata(
         metadata_variable_name: The name of the metadata field in data
 
     """
-    from litellm.proxy.utils import _premium_user_check
+    from litellm_proxy.utils import _premium_user_check
 
     for _management_object_metadata in [key_metadata, team_metadata]:
         if _management_object_metadata and "guardrails" in _management_object_metadata:
@@ -896,7 +896,7 @@ def add_provider_specific_headers_to_request(
 
 
 def _add_otel_traceparent_to_data(data: dict, request: Request):
-    from litellm.proxy.proxy_server import open_telemetry_logger
+    from litellm_proxy.proxy_server import open_telemetry_logger
 
     if data is None:
         return
diff --git a/litellm/proxy/llamaguard_prompt.txt b/litellm_proxy/llamaguard_prompt.txt
similarity index 100%
rename from litellm/proxy/llamaguard_prompt.txt
rename to litellm_proxy/llamaguard_prompt.txt
diff --git a/litellm/proxy/logo.jpg b/litellm_proxy/logo.jpg
similarity index 100%
rename from litellm/proxy/logo.jpg
rename to litellm_proxy/logo.jpg
diff --git a/litellm/proxy/management_endpoints/budget_management_endpoints.py b/litellm_proxy/management_endpoints/budget_management_endpoints.py
similarity index 94%
rename from litellm/proxy/management_endpoints/budget_management_endpoints.py
rename to litellm_proxy/management_endpoints/budget_management_endpoints.py
index 65b0156afe..6746f8f7e5 100644
--- a/litellm/proxy/management_endpoints/budget_management_endpoints.py
+++ b/litellm_proxy/management_endpoints/budget_management_endpoints.py
@@ -14,9 +14,9 @@ All /budget management endpoints
 #### BUDGET TABLE MANAGEMENT ####
 from fastapi import APIRouter, Depends, HTTPException
 
-from litellm.proxy._types import *
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.utils import jsonify_object
+from litellm_proxy._types import *
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.utils import jsonify_object
 
 router = APIRouter()
 
@@ -43,7 +43,7 @@ async def new_budget(
     - rpm_limit: Optional[int] - The requests per minute limit for the budget.
     - model_max_budget: Optional[dict] - Specify max budget for a given model. Example: {"openai/gpt-4o-mini": {"max_budget": 100.0, "budget_duration": "1d", "tpm_limit": 100000, "rpm_limit": 100000}}
     """
-    from litellm.proxy.proxy_server import litellm_proxy_admin_name, prisma_client
+    from litellm_proxy.proxy_server import litellm_proxy_admin_name, prisma_client
 
     if prisma_client is None:
         raise HTTPException(
@@ -86,7 +86,7 @@ async def update_budget(
     - rpm_limit: Optional[int] - The requests per minute limit for the budget.
     - model_max_budget: Optional[dict] - Specify max budget for a given model. Example: {"openai/gpt-4o-mini": {"max_budget": 100.0, "budget_duration": "1d", "tpm_limit": 100000, "rpm_limit": 100000}}
     """
-    from litellm.proxy.proxy_server import litellm_proxy_admin_name, prisma_client
+    from litellm_proxy.proxy_server import litellm_proxy_admin_name, prisma_client
 
     if prisma_client is None:
         raise HTTPException(
@@ -119,7 +119,7 @@ async def info_budget(data: BudgetRequest):
     Parameters:
     - budgets: List[str] - The list of budget ids to get information for
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -155,7 +155,7 @@ async def budget_settings(
     Query Parameters:
     - budget_id: str - The budget id to get information for
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(
@@ -221,7 +221,7 @@ async def list_budget(
     user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
     """List all the created budgets in proxy db. Used on Admin UI."""
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(
@@ -260,7 +260,7 @@ async def delete_budget(
     Parameters:
     - id: str - The budget id to delete
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(
diff --git a/litellm/proxy/management_endpoints/common_daily_activity.py b/litellm_proxy/management_endpoints/common_daily_activity.py
similarity index 99%
rename from litellm/proxy/management_endpoints/common_daily_activity.py
rename to litellm_proxy/management_endpoints/common_daily_activity.py
index d782adf85c..bb2ca28777 100644
--- a/litellm/proxy/management_endpoints/common_daily_activity.py
+++ b/litellm_proxy/management_endpoints/common_daily_activity.py
@@ -4,8 +4,8 @@ from typing import Any, Dict, List, Optional, Set, Union
 from fastapi import HTTPException, status
 
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import CommonProxyErrors
-from litellm.proxy.utils import PrismaClient
+from litellm_proxy._types import CommonProxyErrors
+from litellm_proxy.utils import PrismaClient
 from litellm.types.proxy.management_endpoints.common_daily_activity import (
     BreakdownMetrics,
     DailySpendData,
diff --git a/litellm/proxy/management_endpoints/common_utils.py b/litellm_proxy/management_endpoints/common_utils.py
similarity index 93%
rename from litellm/proxy/management_endpoints/common_utils.py
rename to litellm_proxy/management_endpoints/common_utils.py
index 87bf7f5799..9e07c20b47 100644
--- a/litellm/proxy/management_endpoints/common_utils.py
+++ b/litellm_proxy/management_endpoints/common_utils.py
@@ -1,13 +1,13 @@
 from typing import Any, Union
 
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     GenerateKeyRequest,
     LiteLLM_ManagementEndpoint_MetadataFields_Premium,
     LiteLLM_TeamTable,
     LitellmUserRoles,
     UserAPIKeyAuth,
 )
-from litellm.proxy.utils import _premium_user_check
+from litellm_proxy.utils import _premium_user_check
 
 
 def _user_has_admin_view(user_api_key_dict: UserAPIKeyAuth) -> bool:
diff --git a/litellm/proxy/management_endpoints/customer_endpoints.py b/litellm_proxy/management_endpoints/customer_endpoints.py
similarity index 97%
rename from litellm/proxy/management_endpoints/customer_endpoints.py
rename to litellm_proxy/management_endpoints/customer_endpoints.py
index 1f6f846bc7..af05e99ea8 100644
--- a/litellm/proxy/management_endpoints/customer_endpoints.py
+++ b/litellm_proxy/management_endpoints/customer_endpoints.py
@@ -18,8 +18,8 @@ from fastapi import APIRouter, Depends, HTTPException, Request, status
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import *
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy._types import *
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
 
 router = APIRouter()
 
@@ -52,7 +52,7 @@ async def block_user(data: BlockUsers):
         }'
         ```
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         records = []
@@ -218,7 +218,7 @@ async def new_end_user(
     - end-user object
     - currently allowed models 
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         litellm_proxy_admin_name,
         llm_router,
         prisma_client,
@@ -283,7 +283,7 @@ async def new_end_user(
         return end_user_record
     except Exception as e:
         verbose_proxy_logger.exception(
-            "litellm.proxy.management_endpoints.customer_endpoints.new_end_user(): Exception occured - {}".format(
+            "litellm_proxy.management_endpoints.customer_endpoints.new_end_user(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -341,7 +341,7 @@ async def end_user_info(
         -H 'Authorization: Bearer sk-1234'
     ```
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(
@@ -406,7 +406,7 @@ async def update_end_user(
     ```
     """
 
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         data_json: dict = data.json()
@@ -446,7 +446,7 @@ async def update_end_user(
         # update based on remaining passed in values
     except Exception as e:
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.update_end_user(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.update_end_user(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -502,7 +502,7 @@ async def delete_end_user(
     See below for all params 
     ```
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
@@ -539,7 +539,7 @@ async def delete_end_user(
         # update based on remaining passed in values
     except Exception as e:
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.delete_end_user(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.delete_end_user(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -588,7 +588,7 @@ async def list_end_user(
     ```
 
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if (
         user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
diff --git a/litellm/proxy/management_endpoints/internal_user_endpoints.py b/litellm_proxy/management_endpoints/internal_user_endpoints.py
similarity index 97%
rename from litellm/proxy/management_endpoints/internal_user_endpoints.py
rename to litellm_proxy/management_endpoints/internal_user_endpoints.py
index 589b30d524..7e9dc232e4 100644
--- a/litellm/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm_proxy/management_endpoints/internal_user_endpoints.py
@@ -23,17 +23,17 @@ from fastapi import APIRouter, Depends, Header, HTTPException, Request, status
 import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.litellm_core_utils.duration_parser import duration_in_seconds
-from litellm.proxy._types import *
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.management_endpoints.common_daily_activity import get_daily_activity
-from litellm.proxy.management_endpoints.common_utils import _user_has_admin_view
-from litellm.proxy.management_endpoints.key_management_endpoints import (
+from litellm_proxy._types import *
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.management_endpoints.common_daily_activity import get_daily_activity
+from litellm_proxy.management_endpoints.common_utils import _user_has_admin_view
+from litellm_proxy.management_endpoints.key_management_endpoints import (
     generate_key_helper_fn,
     prepare_metadata_fields,
 )
-from litellm.proxy.management_helpers.audit_logs import create_audit_log_for_update
-from litellm.proxy.management_helpers.utils import management_endpoint_wrapper
-from litellm.proxy.utils import handle_exception_on_proxy
+from litellm_proxy.management_helpers.audit_logs import create_audit_log_for_update
+from litellm_proxy.management_helpers.utils import management_endpoint_wrapper
+from litellm_proxy.utils import handle_exception_on_proxy
 from litellm.types.proxy.management_endpoints.common_daily_activity import (
     BreakdownMetrics,
     KeyMetadata,
@@ -222,7 +222,7 @@ async def new_user(
     ```
     """
     try:
-        from litellm.proxy.proxy_server import (
+        from litellm_proxy.proxy_server import (
             general_settings,
             litellm_proxy_admin_name,
             prisma_client,
@@ -239,7 +239,7 @@ async def new_user(
         # Add User to Team and Organization
         # if team_id passed add this user to the team
         if data_json.get("team_id", None) is not None:
-            from litellm.proxy.management_endpoints.team_endpoints import (
+            from litellm_proxy.management_endpoints.team_endpoints import (
                 team_member_add,
             )
 
@@ -263,20 +263,20 @@ async def new_user(
                     "already exists" in str(e) or "doesn't exist" in str(e)
                 ):
                     verbose_proxy_logger.debug(
-                        "litellm.proxy.management_endpoints.internal_user_endpoints.new_user(): User already exists in team - {}".format(
+                        "litellm_proxy.management_endpoints.internal_user_endpoints.new_user(): User already exists in team - {}".format(
                             str(e)
                         )
                     )
                 else:
                     verbose_proxy_logger.debug(
-                        "litellm.proxy.management_endpoints.internal_user_endpoints.new_user(): Exception occured - {}".format(
+                        "litellm_proxy.management_endpoints.internal_user_endpoints.new_user(): Exception occured - {}".format(
                             str(e)
                         )
                     )
             except Exception as e:
                 if "already exists" in str(e) or "doesn't exist" in str(e):
                     verbose_proxy_logger.debug(
-                        "litellm.proxy.management_endpoints.internal_user_endpoints.new_user(): User already exists in team - {}".format(
+                        "litellm_proxy.management_endpoints.internal_user_endpoints.new_user(): User already exists in team - {}".format(
                             str(e)
                         )
                     )
@@ -436,7 +436,7 @@ async def user_info(
     --header 'Authorization: Bearer sk-1234'
     ```
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
@@ -458,7 +458,7 @@ async def user_info(
         ## GET ALL TEAMS ##
         team_list = []
         team_id_list = []
-        from litellm.proxy.management_endpoints.team_endpoints import list_team
+        from litellm_proxy.management_endpoints.team_endpoints import list_team
 
         teams_1 = await list_team(
             http_request=Request(
@@ -533,7 +533,7 @@ async def user_info(
         return response_data
     except Exception as e:
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.user_info(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.user_info(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -551,7 +551,7 @@ async def _get_user_info_for_proxy_admin():
         - To get Faster UI load times, get all teams and virtual keys in 1 query
     """
 
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     sql_query = """
         SELECT 
@@ -592,7 +592,7 @@ def _process_keys_for_user_info(
     keys: Optional[List[LiteLLM_VerificationToken]],
     all_teams: Optional[Union[List[LiteLLM_TeamTable], List[TeamListResponseObject]]],
 ):
-    from litellm.proxy.proxy_server import general_settings, litellm_master_key_hash
+    from litellm_proxy.proxy_server import general_settings, litellm_master_key_hash
 
     returned_keys = []
     if keys is None:
@@ -732,7 +732,7 @@ async def user_update(
             
     
     """
-    from litellm.proxy.proxy_server import litellm_proxy_admin_name, prisma_client
+    from litellm_proxy.proxy_server import litellm_proxy_admin_name, prisma_client
 
     try:
         data_json: dict = data.json()
@@ -843,7 +843,7 @@ async def user_update(
         # update based on remaining passed in values
     except Exception as e:
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.user_update(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.user_update(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -999,7 +999,7 @@ async def get_users(
         sort_order: Optional[str]
             Sort order ('asc' or 'desc')
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(
@@ -1128,7 +1128,7 @@ async def delete_user(
     Parameters:
     - user_ids: List[str] - The list of user id's to be deleted.
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         create_audit_log_for_update,
         litellm_proxy_admin_name,
         prisma_client,
@@ -1217,7 +1217,7 @@ async def add_internal_user_to_organization(
     - Exception if database not connected
     - Exception if user_id or organization_id not found
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise Exception("Database not connected")
@@ -1284,7 +1284,7 @@ async def ui_view_users(
     Returns:
         List[LiteLLM_SpendLogs]: Paginated list of matching user records
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -1441,7 +1441,7 @@ async def get_user_daily_activity(
     - api_requests
     - breakdown by model, api_key, provider
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(
diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm_proxy/management_endpoints/key_management_endpoints.py
similarity index 98%
rename from litellm/proxy/management_endpoints/key_management_endpoints.py
rename to litellm_proxy/management_endpoints/key_management_endpoints.py
index 15edab8909..d1d31234ae 100644
--- a/litellm/proxy/management_endpoints/key_management_endpoints.py
+++ b/litellm_proxy/management_endpoints/key_management_endpoints.py
@@ -26,28 +26,28 @@ from litellm._logging import verbose_proxy_logger
 from litellm.caching import DualCache
 from litellm.constants import LENGTH_OF_LITELLM_GENERATED_KEY, UI_SESSION_TOKEN_TEAM_ID
 from litellm.litellm_core_utils.duration_parser import duration_in_seconds
-from litellm.proxy._types import *
-from litellm.proxy.auth.auth_checks import (
+from litellm_proxy._types import *
+from litellm_proxy.auth.auth_checks import (
     _cache_key_object,
     _delete_cache_key_object,
     get_key_object,
     get_team_object,
 )
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.hooks.key_management_event_hooks import KeyManagementEventHooks
-from litellm.proxy.management_endpoints.common_utils import (
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.hooks.key_management_event_hooks import KeyManagementEventHooks
+from litellm_proxy.management_endpoints.common_utils import (
     _is_user_team_admin,
     _set_object_metadata_field,
 )
-from litellm.proxy.management_endpoints.model_management_endpoints import (
+from litellm_proxy.management_endpoints.model_management_endpoints import (
     _add_model_to_db,
 )
-from litellm.proxy.management_helpers.team_member_permission_checks import (
+from litellm_proxy.management_helpers.team_member_permission_checks import (
     TeamMemberPermissionChecks,
 )
-from litellm.proxy.management_helpers.utils import management_endpoint_wrapper
-from litellm.proxy.spend_tracking.spend_tracking_utils import _is_master_key
-from litellm.proxy.utils import (
+from litellm_proxy.management_helpers.utils import management_endpoint_wrapper
+from litellm_proxy.spend_tracking.spend_tracking_utils import _is_master_key
+from litellm_proxy.utils import (
     PrismaClient,
     _hash_token_if_needed,
     handle_exception_on_proxy,
@@ -392,7 +392,7 @@ async def generate_key_fn(  # noqa: PLR0915
     - user_id: (str) Unique user id - used for tracking spend across multiple keys for same user id.
     """
     try:
-        from litellm.proxy.proxy_server import (
+        from litellm_proxy.proxy_server import (
             litellm_proxy_admin_name,
             llm_router,
             premium_user,
@@ -553,7 +553,7 @@ async def generate_key_fn(  # noqa: PLR0915
 
         # Set tags on the new key
         if "tags" in data_json:
-            from litellm.proxy.proxy_server import premium_user
+            from litellm_proxy.proxy_server import premium_user
 
             if premium_user is not True and data_json["tags"] is not None:
                 raise ValueError(
@@ -599,7 +599,7 @@ async def generate_key_fn(  # noqa: PLR0915
         return response
     except Exception as e:
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.generate_key_fn(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -629,7 +629,7 @@ def prepare_metadata_fields(
 
     except Exception as e:
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.prepare_metadata_fields(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.prepare_metadata_fields(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -744,7 +744,7 @@ async def update_key_fn(
     }'
     ```
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         llm_router,
         premium_user,
         prisma_client,
@@ -824,7 +824,7 @@ async def update_key_fn(
         # update based on remaining passed in values
     except Exception as e:
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.update_key_fn(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.update_key_fn(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -881,7 +881,7 @@ async def delete_key_fn(
         HTTPException: If an error occurs during key deletion.
     """
     try:
-        from litellm.proxy.proxy_server import prisma_client, user_api_key_cache
+        from litellm_proxy.proxy_server import prisma_client, user_api_key_cache
 
         if prisma_client is None:
             raise Exception("Not connected to DB!")
@@ -949,7 +949,7 @@ async def delete_key_fn(
         return {"deleted_keys": deleted_keys}
     except Exception as e:
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.delete_key_fn(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.delete_key_fn(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -983,7 +983,7 @@ async def info_key_fn_v2(
     -d {"keys": ["sk-1", "sk-2", "sk-3"]}
     ```
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
@@ -1047,7 +1047,7 @@ async def info_key_fn(
 -H "Authorization: Bearer sk-02Wr4IAlN3NvPXvL5JVvDA"
     ```
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
@@ -1174,7 +1174,7 @@ async def generate_key_helper_fn(  # noqa: PLR0915
     updated_by: Optional[str] = None,
     allowed_routes: Optional[list] = None,
 ):
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         litellm_proxy_budget_name,
         premium_user,
         prisma_client,
@@ -1356,7 +1356,7 @@ async def generate_key_helper_fn(  # noqa: PLR0915
             key_data["updated_at"] = getattr(create_key_response, "updated_at", None)
     except Exception as e:
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.generate_key_helper_fn(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.generate_key_helper_fn(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -1472,7 +1472,7 @@ async def delete_verification_tokens(
                 - List of keys being deleted, this contains information about the key_alias, token, and user_id being deleted,
                 this is passed down to the KeyManagementEventHooks to delete the keys from the secret manager and handle audit logs
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client:
@@ -1526,7 +1526,7 @@ async def delete_verification_tokens(
             raise Exception("DB not connected. prisma_client is None")
     except Exception as e:
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.delete_verification_tokens(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.delete_verification_tokens(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -1579,7 +1579,7 @@ async def _rotate_master_key(
     3. Encrypt the values with the new master key
     4. Update the values in the DB
     """
-    from litellm.proxy.proxy_server import proxy_config
+    from litellm_proxy.proxy_server import proxy_config
 
     try:
         models: Optional[
@@ -1706,7 +1706,7 @@ async def regenerate_key_fn(
     Note: This is an Enterprise feature. It requires a premium license to use.
     """
     try:
-        from litellm.proxy.proxy_server import (
+        from litellm_proxy.proxy_server import (
             hash_token,
             master_key,
             premium_user,
@@ -1988,7 +1988,7 @@ async def list_keys(
         }
     """
     try:
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
 
         verbose_proxy_logger.debug("Entering list_keys function")
 
@@ -2215,7 +2215,7 @@ async def block_key(
 
     Note: This is an admin-only endpoint. Only proxy admins can block keys.
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         create_audit_log_for_update,
         hash_token,
         litellm_proxy_admin_name,
@@ -2322,7 +2322,7 @@ async def unblock_key(
 
     Note: This is an admin-only endpoint. Only proxy admins can unblock keys.
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         create_audit_log_for_update,
         hash_token,
         litellm_proxy_admin_name,
@@ -2529,8 +2529,8 @@ async def test_key_logging(
     import logging
     from io import StringIO
 
-    from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
-    from litellm.proxy.proxy_server import general_settings, proxy_config
+    from litellm_proxy.litellm_pre_call_utils import add_litellm_data_to_request
+    from litellm_proxy.proxy_server import general_settings, proxy_config
 
     logging_callbacks: List[str] = []
     for callback in key_logging:
@@ -2642,7 +2642,7 @@ def validate_model_max_budget(model_max_budget: Optional[Dict]) -> None:
         if len(model_max_budget) == 0:
             return
         if model_max_budget is not None:
-            from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
+            from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
 
             if premium_user is not True:
                 raise ValueError(
diff --git a/litellm/proxy/management_endpoints/model_management_endpoints.py b/litellm_proxy/management_endpoints/model_management_endpoints.py
similarity index 97%
rename from litellm/proxy/management_endpoints/model_management_endpoints.py
rename to litellm_proxy/management_endpoints/model_management_endpoints.py
index 42dd903e79..606d210a78 100644
--- a/litellm/proxy/management_endpoints/model_management_endpoints.py
+++ b/litellm_proxy/management_endpoints/model_management_endpoints.py
@@ -20,7 +20,7 @@ from pydantic import BaseModel
 
 from litellm._logging import verbose_proxy_logger
 from litellm.constants import LITELLM_PROXY_ADMIN_NAME
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     CommonProxyErrors,
     LiteLLM_ProxyModelTable,
     LiteLLM_TeamTable,
@@ -34,15 +34,15 @@ from litellm.proxy._types import (
     UpdateTeamRequest,
     UserAPIKeyAuth,
 )
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.common_utils.encrypt_decrypt_utils import encrypt_value_helper
-from litellm.proxy.management_endpoints.common_utils import _is_user_team_admin
-from litellm.proxy.management_endpoints.team_endpoints import (
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.common_utils.encrypt_decrypt_utils import encrypt_value_helper
+from litellm_proxy.management_endpoints.common_utils import _is_user_team_admin
+from litellm_proxy.management_endpoints.team_endpoints import (
     team_model_add,
     update_team,
 )
-from litellm.proxy.management_helpers.audit_logs import create_object_audit_log
-from litellm.proxy.utils import PrismaClient
+from litellm_proxy.management_helpers.audit_logs import create_object_audit_log
+from litellm_proxy.utils import PrismaClient
 from litellm.types.router import (
     Deployment,
     DeploymentTypedDict,
@@ -151,7 +151,7 @@ async def patch_model(
     Raises:
         ProxyException: For various error conditions including authentication and database errors
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         litellm_proxy_admin_name,
         llm_router,
         prisma_client,
@@ -451,7 +451,7 @@ async def delete_model(
     model_info: ModelInfoDelete,
     user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
-    from litellm.proxy.proxy_server import llm_router
+    from litellm_proxy.proxy_server import llm_router
 
     try:
         """
@@ -461,7 +461,7 @@ async def delete_model(
         - Delete
         """
 
-        from litellm.proxy.proxy_server import (
+        from litellm_proxy.proxy_server import (
             llm_router,
             premium_user,
             prisma_client,
@@ -568,7 +568,7 @@ async def add_new_model(
     model_params: Deployment,
     user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         general_settings,
         premium_user,
         prisma_client,
@@ -669,7 +669,7 @@ async def add_new_model(
 
     except Exception as e:
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.add_new_model(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.add_new_model(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -706,7 +706,7 @@ async def update_model(
 
     Use `/model/{model_id}/update` to PATCH the stored model in db.
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         LITELLM_PROXY_ADMIN_NAME,
         llm_router,
         premium_user,
@@ -827,7 +827,7 @@ async def update_model(
             return model_response
     except Exception as e:
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.update_model(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.update_model(): Exception occured - {}".format(
                 str(e)
             )
         )
diff --git a/litellm/proxy/management_endpoints/organization_endpoints.py b/litellm_proxy/management_endpoints/organization_endpoints.py
similarity index 97%
rename from litellm/proxy/management_endpoints/organization_endpoints.py
rename to litellm_proxy/management_endpoints/organization_endpoints.py
index f0b0b645d2..42561b5d27 100644
--- a/litellm/proxy/management_endpoints/organization_endpoints.py
+++ b/litellm_proxy/management_endpoints/organization_endpoints.py
@@ -17,17 +17,17 @@ from typing import List, Optional, Tuple
 from fastapi import APIRouter, Depends, HTTPException, Request, status
 
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import *
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.management_endpoints.budget_management_endpoints import (
+from litellm_proxy._types import *
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.management_endpoints.budget_management_endpoints import (
     new_budget,
     update_budget,
 )
-from litellm.proxy.management_helpers.utils import (
+from litellm_proxy.management_helpers.utils import (
     get_new_internal_user_defaults,
     management_endpoint_wrapper,
 )
-from litellm.proxy.utils import PrismaClient
+from litellm_proxy.utils import PrismaClient
 
 router = APIRouter()
 
@@ -103,7 +103,7 @@ async def new_organization(
     ```
     """
 
-    from litellm.proxy.proxy_server import litellm_proxy_admin_name, prisma_client
+    from litellm_proxy.proxy_server import litellm_proxy_admin_name, prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -199,7 +199,7 @@ async def update_organization(
     """
     Update an organization
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(
@@ -248,7 +248,7 @@ async def delete_organization(
 
     - organization_ids: List[str] - The organization ids to delete.
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(
@@ -306,7 +306,7 @@ async def list_organization(
         --header 'Authorization: Bearer sk-1234'
     ```
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -353,7 +353,7 @@ async def info_organization(organization_id: str):
     """
     Get the org specific information
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -384,7 +384,7 @@ async def deprecated_info_organization(data: OrganizationRequest):
     """
     DEPRECATED: Use GET /organization/info instead
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -457,7 +457,7 @@ async def organization_member_add(
     3. Add Internal User to the `LiteLLM_OrganizationMembership` table
     """
     try:
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
 
         if prisma_client is None:
             raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -563,7 +563,7 @@ async def organization_member_update(
     Update a member's role in an organization
     """
     try:
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
 
         if prisma_client is None:
             raise HTTPException(
@@ -700,7 +700,7 @@ async def organization_member_delete(
     Delete a member from an organization
     """
     try:
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
 
         if prisma_client is None:
             raise HTTPException(
diff --git a/litellm/proxy/management_endpoints/scim/README_SCIM.md b/litellm_proxy/management_endpoints/scim/README_SCIM.md
similarity index 100%
rename from litellm/proxy/management_endpoints/scim/README_SCIM.md
rename to litellm_proxy/management_endpoints/scim/README_SCIM.md
diff --git a/litellm/proxy/management_endpoints/scim/scim_transformations.py b/litellm_proxy/management_endpoints/scim/scim_transformations.py
similarity index 97%
rename from litellm/proxy/management_endpoints/scim/scim_transformations.py
rename to litellm_proxy/management_endpoints/scim/scim_transformations.py
index f68f728e2a..0f1d7b46c3 100644
--- a/litellm/proxy/management_endpoints/scim/scim_transformations.py
+++ b/litellm_proxy/management_endpoints/scim/scim_transformations.py
@@ -1,6 +1,6 @@
 from typing import List, Union
 
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     LiteLLM_TeamTable,
     LiteLLM_UserTable,
     Member,
@@ -19,7 +19,7 @@ class ScimTransformations:
     async def transform_litellm_user_to_scim_user(
         user: Union[LiteLLM_UserTable, NewUserResponse],
     ) -> SCIMUser:
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
 
         if prisma_client is None:
             raise HTTPException(
@@ -111,7 +111,7 @@ class ScimTransformations:
     async def transform_litellm_team_to_scim_group(
         team: Union[LiteLLM_TeamTable, dict],
     ) -> SCIMGroup:
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
 
         if prisma_client is None:
             raise HTTPException(
diff --git a/litellm/proxy/management_endpoints/scim/scim_v2.py b/litellm_proxy/management_endpoints/scim/scim_v2.py
similarity index 95%
rename from litellm/proxy/management_endpoints/scim/scim_v2.py
rename to litellm_proxy/management_endpoints/scim/scim_v2.py
index 62f4ab89c0..103924356a 100644
--- a/litellm/proxy/management_endpoints/scim/scim_v2.py
+++ b/litellm_proxy/management_endpoints/scim/scim_v2.py
@@ -18,7 +18,7 @@ from fastapi import (
 )
 
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     LiteLLM_UserTable,
     LitellmUserRoles,
     Member,
@@ -26,12 +26,12 @@ from litellm.proxy._types import (
     NewUserRequest,
     UserAPIKeyAuth,
 )
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.management_endpoints.internal_user_endpoints import new_user
-from litellm.proxy.management_endpoints.scim.scim_transformations import (
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.management_endpoints.internal_user_endpoints import new_user
+from litellm_proxy.management_endpoints.scim.scim_transformations import (
     ScimTransformations,
 )
-from litellm.proxy.management_endpoints.team_endpoints import new_team
+from litellm_proxy.management_endpoints.team_endpoints import new_team
 from litellm.types.proxy.management_endpoints.scim_v2 import *
 
 scim_router = APIRouter(
@@ -63,7 +63,7 @@ async def get_users(
     """
     Get a list of users according to SCIM v2 protocol
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No database connected"})
@@ -128,7 +128,7 @@ async def get_user(
     """
     Get a single user by ID according to SCIM v2 protocol
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No database connected"})
@@ -167,7 +167,7 @@ async def create_user(
     """
     Create a user according to SCIM v2 protocol
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No database connected"})
@@ -235,7 +235,7 @@ async def update_user(
     """
     Update a user according to SCIM v2 protocol
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No database connected"})
@@ -260,7 +260,7 @@ async def delete_user(
     """
     Delete a user according to SCIM v2 protocol
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No database connected"})
@@ -321,7 +321,7 @@ async def patch_user(
     """
     Patch a user according to SCIM v2 protocol
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No database connected"})
@@ -364,7 +364,7 @@ async def get_groups(
     """
     Get a list of groups according to SCIM v2 protocol
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No database connected"})
@@ -448,7 +448,7 @@ async def get_group(
     """
     Get a single group by ID according to SCIM v2 protocol
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No database connected"})
@@ -489,7 +489,7 @@ async def create_group(
     """
     Create a group according to SCIM v2 protocol
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No database connected"})
@@ -556,7 +556,7 @@ async def update_group(
     """
     Update a group according to SCIM v2 protocol
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No database connected"})
@@ -674,7 +674,7 @@ async def delete_group(
     """
     Delete a group according to SCIM v2 protocol
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No database connected"})
@@ -730,7 +730,7 @@ async def patch_group(
     """
     Patch a group according to SCIM v2 protocol
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No database connected"})
diff --git a/litellm/proxy/management_endpoints/sso_helper_utils.py b/litellm_proxy/management_endpoints/sso_helper_utils.py
similarity index 91%
rename from litellm/proxy/management_endpoints/sso_helper_utils.py
rename to litellm_proxy/management_endpoints/sso_helper_utils.py
index 45906b2fce..bf683a39e8 100644
--- a/litellm/proxy/management_endpoints/sso_helper_utils.py
+++ b/litellm_proxy/management_endpoints/sso_helper_utils.py
@@ -1,4 +1,4 @@
-from litellm.proxy._types import LitellmUserRoles
+from litellm_proxy._types import LitellmUserRoles
 
 
 def check_is_admin_only_access(ui_access_mode: str) -> bool:
diff --git a/litellm/proxy/management_endpoints/tag_management_endpoints.py b/litellm_proxy/management_endpoints/tag_management_endpoints.py
similarity index 95%
rename from litellm/proxy/management_endpoints/tag_management_endpoints.py
rename to litellm_proxy/management_endpoints/tag_management_endpoints.py
index 7c731400fb..87ada7a673 100644
--- a/litellm/proxy/management_endpoints/tag_management_endpoints.py
+++ b/litellm_proxy/management_endpoints/tag_management_endpoints.py
@@ -18,9 +18,9 @@ from fastapi import APIRouter, Depends, HTTPException
 
 from litellm._logging import verbose_proxy_logger
 from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.management_endpoints.common_daily_activity import (
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.management_endpoints.common_daily_activity import (
     SpendAnalyticsPaginatedResponse,
     get_daily_activity,
 )
@@ -128,7 +128,7 @@ async def new_tag(
     - description: Optional[str] - Description of what this tag represents
     - models: List[str] - List of LLM models allowed for this tag
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail="Database not connected")
@@ -181,7 +181,7 @@ async def new_tag(
 
 async def _add_tag_to_deployment(model_id: str, tag: str):
     """Helper function to add tag to deployment"""
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail="Database not connected")
@@ -219,7 +219,7 @@ async def update_tag(
     - description: Optional[str] - Updated description
     - models: List[str] - Updated list of allowed LLM models
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail="Database not connected")
@@ -275,7 +275,7 @@ async def info_tag(
     Parameters:
     - names: List[str] - List of tag names to get information for
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail="Database not connected")
@@ -310,7 +310,7 @@ async def list_tags(
     """
     List all available tags.
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail="Database not connected")
@@ -362,7 +362,7 @@ async def delete_tag(
     Parameters:
     - name: str - The name of the tag to delete
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail="Database not connected")
@@ -416,7 +416,7 @@ async def get_tag_daily_activity(
     Returns:
         SpendAnalyticsPaginatedResponse: Paginated response containing daily activity data.
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     # Convert comma-separated tags string to list if provided
     tag_list = tags.split(",") if tags else None
diff --git a/litellm/proxy/management_endpoints/team_callback_endpoints.py b/litellm_proxy/management_endpoints/team_callback_endpoints.py
similarity index 96%
rename from litellm/proxy/management_endpoints/team_callback_endpoints.py
rename to litellm_proxy/management_endpoints/team_callback_endpoints.py
index 93d338a40d..dd46f49b3b 100644
--- a/litellm/proxy/management_endpoints/team_callback_endpoints.py
+++ b/litellm_proxy/management_endpoints/team_callback_endpoints.py
@@ -11,15 +11,15 @@ from typing import Optional
 from fastapi import APIRouter, Depends, Header, HTTPException, Request, status
 
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     AddTeamCallback,
     ProxyErrorTypes,
     ProxyException,
     TeamCallbackMetadata,
     UserAPIKeyAuth,
 )
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.management_helpers.utils import management_endpoint_wrapper
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.management_helpers.utils import management_endpoint_wrapper
 
 router = APIRouter()
 
@@ -79,7 +79,7 @@ async def add_team_callbacks(
 
     """
     try:
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
 
         if prisma_client is None:
             raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -170,7 +170,7 @@ async def add_team_callbacks(
 
     except Exception as e:
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.add_team_callbacks(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.add_team_callbacks(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -218,7 +218,7 @@ async def disable_team_logging(
 
     """
     try:
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
 
         if prisma_client is None:
             raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -271,7 +271,7 @@ async def disable_team_logging(
 
     except Exception as e:
         verbose_proxy_logger.error(
-            f"litellm.proxy.proxy_server.disable_team_logging(): Exception occurred - {str(e)}"
+            f"litellm_proxy.proxy_server.disable_team_logging(): Exception occurred - {str(e)}"
         )
         verbose_proxy_logger.debug(traceback.format_exc())
         if isinstance(e, HTTPException):
@@ -327,7 +327,7 @@ async def get_team_callbacks(
         }
     """
     try:
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
 
         if prisma_client is None:
             raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -361,7 +361,7 @@ async def get_team_callbacks(
 
     except Exception as e:
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.get_team_callbacks(): Exception occurred - {}".format(
+            "litellm_proxy.proxy_server.get_team_callbacks(): Exception occurred - {}".format(
                 str(e)
             )
         )
diff --git a/litellm/proxy/management_endpoints/team_endpoints.py b/litellm_proxy/management_endpoints/team_endpoints.py
similarity index 98%
rename from litellm/proxy/management_endpoints/team_endpoints.py
rename to litellm_proxy/management_endpoints/team_endpoints.py
index cd4ae97ac7..bcaf49d2b7 100644
--- a/litellm/proxy/management_endpoints/team_endpoints.py
+++ b/litellm_proxy/management_endpoints/team_endpoints.py
@@ -22,7 +22,7 @@ from pydantic import BaseModel
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     BlockTeamRequest,
     CommonProxyErrors,
     DeleteTeamRequest,
@@ -53,28 +53,28 @@ from litellm.proxy._types import (
     UpdateTeamRequest,
     UserAPIKeyAuth,
 )
-from litellm.proxy.auth.auth_checks import (
+from litellm_proxy.auth.auth_checks import (
     allowed_route_check_inside_route,
     get_team_object,
     get_user_object,
 )
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.management_endpoints.common_utils import (
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.management_endpoints.common_utils import (
     _is_user_team_admin,
     _set_object_metadata_field,
     _user_has_admin_view,
 )
-from litellm.proxy.management_endpoints.tag_management_endpoints import (
+from litellm_proxy.management_endpoints.tag_management_endpoints import (
     get_daily_activity,
 )
-from litellm.proxy.management_helpers.team_member_permission_checks import (
+from litellm_proxy.management_helpers.team_member_permission_checks import (
     TeamMemberPermissionChecks,
 )
-from litellm.proxy.management_helpers.utils import (
+from litellm_proxy.management_helpers.utils import (
     add_new_member,
     management_endpoint_wrapper,
 )
-from litellm.proxy.utils import (
+from litellm_proxy.utils import (
     PrismaClient,
     _premium_user_check,
     handle_exception_on_proxy,
@@ -195,7 +195,7 @@ async def new_team(  # noqa: PLR0915
     ```
     """
     try:
-        from litellm.proxy.proxy_server import (
+        from litellm_proxy.proxy_server import (
             create_audit_log_for_update,
             duration_in_seconds,
             litellm_proxy_admin_name,
@@ -464,8 +464,8 @@ async def update_team(
     }'
     ```
     """
-    from litellm.proxy.auth.auth_checks import _cache_team_object
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.auth.auth_checks import _cache_team_object
+    from litellm_proxy.proxy_server import (
         create_audit_log_for_update,
         duration_in_seconds,
         litellm_proxy_admin_name,
@@ -663,7 +663,7 @@ async def team_member_add(
 
     ```
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         litellm_proxy_admin_name,
         premium_user,
         prisma_client,
@@ -859,7 +859,7 @@ async def team_member_delete(
     }'
     ```
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -981,7 +981,7 @@ async def team_member_update(
 
     Update team member budgets and team member role
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -1138,7 +1138,7 @@ async def delete_team(
     }'
     ```
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         create_audit_log_for_update,
         litellm_proxy_admin_name,
         prisma_client,
@@ -1298,7 +1298,7 @@ async def team_info(
     --header 'Authorization: Bearer your_api_key_here'
     ```
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
@@ -1384,7 +1384,7 @@ async def team_info(
 
     except Exception as e:
         verbose_proxy_logger.error(
-            "litellm.proxy.management_endpoints.team_endpoints.py::team_info - Exception occurred - {}\n{}".format(
+            "litellm_proxy.management_endpoints.team_endpoints.py::team_info - Exception occurred - {}\n{}".format(
                 e, traceback.format_exc()
             )
         )
@@ -1436,7 +1436,7 @@ async def block_team(
 
 
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise Exception("No DB Connected.")
@@ -1479,7 +1479,7 @@ async def unblock_team(
     }'
     ```
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise Exception("No DB Connected.")
@@ -1503,7 +1503,7 @@ async def list_available_teams(
     user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
     response_model=List[LiteLLM_TeamTable],
 ):
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(
@@ -1575,7 +1575,7 @@ async def list_team(
     - user_id: str - Optional. If passed will only return teams that the user_id is a member of.
     - organization_id: str - Optional. If passed will only return teams that belong to the organization_id. Pass 'default_organization' to get all teams without organization_id.
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if not allowed_route_check_inside_route(
         user_api_key_dict=user_api_key_dict, requested_user_id=user_id
@@ -1758,7 +1758,7 @@ async def ui_view_teams(
     Returns:
         List[LiteLLM_SpendLogs]: Paginated list of matching user records
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -1828,7 +1828,7 @@ async def team_model_add(
     }'
     ```
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -1901,7 +1901,7 @@ async def team_model_delete(
     }'
     ```
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -1960,7 +1960,7 @@ async def team_member_permissions(
     """
     Get the team member permissions for a team
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         prisma_client,
         proxy_logging_obj,
         user_api_key_cache,
@@ -2033,7 +2033,7 @@ async def update_team_member_permissions(
     """
     Update the team member permissions for a team
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         prisma_client,
         proxy_logging_obj,
         user_api_key_cache,
@@ -2120,7 +2120,7 @@ async def get_team_daily_activity(
     Returns:
         SpendAnalyticsPaginatedResponse: Paginated response containing daily activity data.
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         prisma_client,
         proxy_logging_obj,
         user_api_key_cache,
diff --git a/litellm/proxy/management_endpoints/types.py b/litellm_proxy/management_endpoints/types.py
similarity index 100%
rename from litellm/proxy/management_endpoints/types.py
rename to litellm_proxy/management_endpoints/types.py
diff --git a/litellm/proxy/management_endpoints/ui_sso.py b/litellm_proxy/management_endpoints/ui_sso.py
similarity index 97%
rename from litellm/proxy/management_endpoints/ui_sso.py
rename to litellm_proxy/management_endpoints/ui_sso.py
index 23054fc45b..c340131ada 100644
--- a/litellm/proxy/management_endpoints/ui_sso.py
+++ b/litellm_proxy/management_endpoints/ui_sso.py
@@ -25,7 +25,7 @@ from litellm.llms.custom_httpx.http_handler import (
     get_async_httpx_client,
     httpxSpecialProvider,
 )
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     LiteLLM_UserTable,
     LitellmUserRoles,
     Member,
@@ -38,26 +38,26 @@ from litellm.proxy._types import (
     TeamMemberAddRequest,
     UserAPIKeyAuth,
 )
-from litellm.proxy.auth.auth_checks import get_user_object
-from litellm.proxy.auth.auth_utils import _has_user_setup_sso
-from litellm.proxy.auth.handle_jwt import JWTHandler
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.common_utils.admin_ui_utils import (
+from litellm_proxy.auth.auth_checks import get_user_object
+from litellm_proxy.auth.auth_utils import _has_user_setup_sso
+from litellm_proxy.auth.handle_jwt import JWTHandler
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.common_utils.admin_ui_utils import (
     admin_ui_disabled,
     show_missing_vars_in_env,
 )
-from litellm.proxy.common_utils.html_forms.jwt_display_template import (
+from litellm_proxy.common_utils.html_forms.jwt_display_template import (
     jwt_display_template,
 )
-from litellm.proxy.common_utils.html_forms.ui_login import html_form
-from litellm.proxy.management_endpoints.internal_user_endpoints import new_user
-from litellm.proxy.management_endpoints.sso_helper_utils import (
+from litellm_proxy.common_utils.html_forms.ui_login import html_form
+from litellm_proxy.management_endpoints.internal_user_endpoints import new_user
+from litellm_proxy.management_endpoints.sso_helper_utils import (
     check_is_admin_only_access,
     has_admin_ui_access,
 )
-from litellm.proxy.management_endpoints.team_endpoints import new_team, team_member_add
-from litellm.proxy.management_endpoints.types import CustomOpenID
-from litellm.proxy.utils import PrismaClient
+from litellm_proxy.management_endpoints.team_endpoints import new_team, team_member_add
+from litellm_proxy.management_endpoints.types import CustomOpenID
+from litellm_proxy.utils import PrismaClient
 from litellm.secret_managers.main import str_to_bool
 from litellm.types.proxy.management_endpoints.ui_sso import *
 
@@ -76,7 +76,7 @@ async def google_login(request: Request):  # noqa: PLR0915
     PROXY_BASE_URL should be the your deployed proxy endpoint, e.g. PROXY_BASE_URL="https://litellm-production-7002.up.railway.app/"
     Example:
     """
-    from litellm.proxy.proxy_server import premium_user
+    from litellm_proxy.proxy_server import premium_user
 
     microsoft_client_id = os.getenv("MICROSOFT_CLIENT_ID", None)
     google_client_id = os.getenv("GOOGLE_CLIENT_ID", None)
@@ -322,10 +322,10 @@ def get_disabled_non_admin_personal_key_creation():
 @router.get("/sso/callback", tags=["experimental"], include_in_schema=False)
 async def auth_callback(request: Request):  # noqa: PLR0915
     """Verify login"""
-    from litellm.proxy.management_endpoints.key_management_endpoints import (
+    from litellm_proxy.management_endpoints.key_management_endpoints import (
         generate_key_helper_fn,
     )
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         general_settings,
         jwt_handler,
         master_key,
@@ -622,7 +622,7 @@ async def insert_sso_user(
     dependencies=[Depends(user_api_key_auth)],
 )
 async def get_ui_settings(request: Request):
-    from litellm.proxy.proxy_server import general_settings, proxy_state
+    from litellm_proxy.proxy_server import general_settings, proxy_state
 
     _proxy_base_url = os.getenv("PROXY_BASE_URL", None)
     _logout_url = os.getenv("PROXY_LOGOUT_URL", None)
@@ -893,7 +893,7 @@ class SSOAuthenticationHandler:
             litellm_team_id (str): The ID of the Litellm Team
             litellm_team_name (Optional[str]): The name of the Litellm Team
         """
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
 
         if prisma_client is None:
             raise ProxyException(
@@ -1321,7 +1321,7 @@ async def debug_sso_login(request: Request):
     PROXY_BASE_URL should be the your deployed proxy endpoint, e.g. PROXY_BASE_URL="https://litellm-production-7002.up.railway.app/"
     Example:
     """
-    from litellm.proxy.proxy_server import premium_user
+    from litellm_proxy.proxy_server import premium_user
 
     microsoft_client_id = os.getenv("MICROSOFT_CLIENT_ID", None)
     google_client_id = os.getenv("GOOGLE_CLIENT_ID", None)
@@ -1373,7 +1373,7 @@ async def debug_sso_callback(request: Request):
 
     from fastapi.responses import HTMLResponse
 
-    from litellm.proxy.proxy_server import jwt_handler
+    from litellm_proxy.proxy_server import jwt_handler
 
     microsoft_client_id = os.getenv("MICROSOFT_CLIENT_ID", None)
     google_client_id = os.getenv("GOOGLE_CLIENT_ID", None)
diff --git a/litellm/proxy/management_helpers/audit_logs.py b/litellm_proxy/management_helpers/audit_logs.py
similarity index 96%
rename from litellm/proxy/management_helpers/audit_logs.py
rename to litellm_proxy/management_helpers/audit_logs.py
index d6c83c3856..d52fab1d1d 100644
--- a/litellm/proxy/management_helpers/audit_logs.py
+++ b/litellm_proxy/management_helpers/audit_logs.py
@@ -8,7 +8,7 @@ from datetime import datetime, timezone
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     AUDIT_ACTIONS,
     LiteLLM_AuditLogs,
     LitellmTableNames,
@@ -65,7 +65,7 @@ async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
     if not litellm.store_audit_logs:
         return
 
-    from litellm.proxy.proxy_server import premium_user, prisma_client
+    from litellm_proxy.proxy_server import premium_user, prisma_client
 
     if premium_user is not True:
         return
diff --git a/litellm/proxy/management_helpers/team_member_permission_checks.py b/litellm_proxy/management_helpers/team_member_permission_checks.py
similarity index 93%
rename from litellm/proxy/management_helpers/team_member_permission_checks.py
rename to litellm_proxy/management_helpers/team_member_permission_checks.py
index fc4622e878..297c1a7e8e 100644
--- a/litellm/proxy/management_helpers/team_member_permission_checks.py
+++ b/litellm_proxy/management_helpers/team_member_permission_checks.py
@@ -1,7 +1,7 @@
 from typing import List, Optional
 
 from litellm.caching import DualCache
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     KeyManagementRoutes,
     LiteLLM_TeamTableCachedObj,
     LiteLLM_VerificationToken,
@@ -12,9 +12,9 @@ from litellm.proxy._types import (
     ProxyException,
     UserAPIKeyAuth,
 )
-from litellm.proxy.auth.auth_checks import get_team_object
-from litellm.proxy.auth.route_checks import RouteChecks
-from litellm.proxy.utils import PrismaClient
+from litellm_proxy.auth.auth_checks import get_team_object
+from litellm_proxy.auth.route_checks import RouteChecks
+from litellm_proxy.utils import PrismaClient
 
 DEFAULT_TEAM_MEMBER_PERMISSIONS = [
     KeyManagementRoutes.KEY_INFO,
@@ -61,7 +61,7 @@ class TeamMemberPermissionChecks:
         """
         Main handler for checking if a team member can update a key
         """
-        from litellm.proxy.management_endpoints.key_management_endpoints import (
+        from litellm_proxy.management_endpoints.key_management_endpoints import (
             _get_user_in_team,
         )
 
@@ -143,10 +143,10 @@ class TeamMemberPermissionChecks:
         """
         Returns True if the user belongs to the team that the key is assigned to
         """
-        from litellm.proxy.management_endpoints.key_management_endpoints import (
+        from litellm_proxy.management_endpoints.key_management_endpoints import (
             _get_user_in_team,
         )
-        from litellm.proxy.proxy_server import prisma_client, user_api_key_cache
+        from litellm_proxy.proxy_server import prisma_client, user_api_key_cache
 
         if existing_key_row.team_id is None:
             return False
diff --git a/litellm/proxy/management_helpers/utils.py b/litellm_proxy/management_helpers/utils.py
similarity index 95%
rename from litellm/proxy/management_helpers/utils.py
rename to litellm_proxy/management_helpers/utils.py
index cb8e079b76..aeeb1919ed 100644
--- a/litellm/proxy/management_helpers/utils.py
+++ b/litellm_proxy/management_helpers/utils.py
@@ -9,7 +9,7 @@ from fastapi import HTTPException, Request
 
 import litellm
 from litellm._logging import verbose_logger
-from litellm.proxy._types import (  # key request types; user request types; team request types; customer request types
+from litellm_proxy._types import (  # key request types; user request types; team request types; customer request types
     DeleteCustomerRequest,
     DeleteTeamRequest,
     DeleteUserRequest,
@@ -26,8 +26,8 @@ from litellm.proxy._types import (  # key request types; user request types; tea
     UserAPIKeyAuth,
     VirtualKeyEvent,
 )
-from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
-from litellm.proxy.utils import PrismaClient
+from litellm_proxy.common_utils.http_parsing_utils import _read_request_body
+from litellm_proxy.utils import PrismaClient
 
 
 def get_new_internal_user_defaults(
@@ -156,7 +156,7 @@ async def add_new_member(
 
 
 def _delete_user_id_from_cache(kwargs):
-    from litellm.proxy.proxy_server import user_api_key_cache
+    from litellm_proxy.proxy_server import user_api_key_cache
 
     if kwargs.get("data") is not None:
         update_user_request = kwargs.get("data")
@@ -171,7 +171,7 @@ def _delete_user_id_from_cache(kwargs):
 
 
 def _delete_api_key_from_cache(kwargs):
-    from litellm.proxy.proxy_server import user_api_key_cache
+    from litellm_proxy.proxy_server import user_api_key_cache
 
     if kwargs.get("data") is not None:
         update_request = kwargs.get("data")
@@ -186,7 +186,7 @@ def _delete_api_key_from_cache(kwargs):
 
 
 def _delete_team_id_from_cache(kwargs):
-    from litellm.proxy.proxy_server import user_api_key_cache
+    from litellm_proxy.proxy_server import user_api_key_cache
 
     if kwargs.get("data") is not None:
         update_request = kwargs.get("data")
@@ -201,7 +201,7 @@ def _delete_team_id_from_cache(kwargs):
 
 
 def _delete_customer_id_from_cache(kwargs):
-    from litellm.proxy.proxy_server import user_api_key_cache
+    from litellm_proxy.proxy_server import user_api_key_cache
 
     if kwargs.get("data") is not None:
         update_request = kwargs.get("data")
@@ -226,7 +226,7 @@ async def send_management_endpoint_alert(
     - An internal user is created, updated, or deleted
     - A team is created, updated, or deleted
     """
-    from litellm.proxy.proxy_server import premium_user, proxy_logging_obj
+    from litellm_proxy.proxy_server import premium_user, proxy_logging_obj
     from litellm.types.integrations.slack_alerting import AlertType
 
     if premium_user is not True:
@@ -301,7 +301,7 @@ def management_endpoint_wrapper(func):
                 _http_request = kwargs.get("http_request", None)
                 parent_otel_span = getattr(user_api_key_dict, "parent_otel_span", None)
                 if parent_otel_span is not None:
-                    from litellm.proxy.proxy_server import open_telemetry_logger
+                    from litellm_proxy.proxy_server import open_telemetry_logger
 
                     if open_telemetry_logger is not None:
                         if _http_request:
@@ -345,7 +345,7 @@ def management_endpoint_wrapper(func):
             )
             parent_otel_span = getattr(user_api_key_dict, "parent_otel_span", None)
             if parent_otel_span is not None:
-                from litellm.proxy.proxy_server import open_telemetry_logger
+                from litellm_proxy.proxy_server import open_telemetry_logger
 
                 if open_telemetry_logger is not None:
                     _http_request = kwargs.get("http_request")
diff --git a/litellm/proxy/mcp_tools.py b/litellm_proxy/mcp_tools.py
similarity index 100%
rename from litellm/proxy/mcp_tools.py
rename to litellm_proxy/mcp_tools.py
diff --git a/litellm/proxy/middleware/prometheus_auth_middleware.py b/litellm_proxy/middleware/prometheus_auth_middleware.py
similarity index 94%
rename from litellm/proxy/middleware/prometheus_auth_middleware.py
rename to litellm_proxy/middleware/prometheus_auth_middleware.py
index 5d7913e5bf..5dd6749931 100644
--- a/litellm/proxy/middleware/prometheus_auth_middleware.py
+++ b/litellm_proxy/middleware/prometheus_auth_middleware.py
@@ -6,8 +6,8 @@ from fastapi.responses import JSONResponse
 from starlette.middleware.base import BaseHTTPMiddleware
 
 import litellm
-from litellm.proxy._types import SpecialHeaders
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy._types import SpecialHeaders
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
 
 
 class PrometheusAuthMiddleware(BaseHTTPMiddleware):
diff --git a/litellm/proxy/model_config.yaml b/litellm_proxy/model_config.yaml
similarity index 100%
rename from litellm/proxy/model_config.yaml
rename to litellm_proxy/model_config.yaml
diff --git a/litellm/proxy/openai_files_endpoints/files_endpoints.py b/litellm_proxy/openai_files_endpoints/files_endpoints.py
similarity index 97%
rename from litellm/proxy/openai_files_endpoints/files_endpoints.py
rename to litellm_proxy/openai_files_endpoints/files_endpoints.py
index bf29bdf6bd..3e79bc72ac 100644
--- a/litellm/proxy/openai_files_endpoints/files_endpoints.py
+++ b/litellm_proxy/openai_files_endpoints/files_endpoints.py
@@ -25,14 +25,14 @@ from fastapi import (
 import litellm
 from litellm import CreateFileRequest, get_secret_str
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import *
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
-from litellm.proxy.common_utils.openai_endpoint_utils import (
+from litellm_proxy._types import *
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.common_request_processing import ProxyBaseLLMRequestProcessing
+from litellm_proxy.common_utils.openai_endpoint_utils import (
     get_custom_llm_provider_from_request_body,
 )
-from litellm.proxy.hooks.managed_files import _PROXY_LiteLLMManagedFiles
-from litellm.proxy.utils import ProxyLogging
+from litellm_proxy.hooks.managed_files import _PROXY_LiteLLMManagedFiles
+from litellm_proxy.utils import ProxyLogging
 from litellm.router import Router
 from litellm.types.llms.openai import (
     CREATE_FILE_REQUESTS_PURPOSE,
@@ -229,7 +229,7 @@ async def create_file(
 
     ```
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
         llm_router,
@@ -350,7 +350,7 @@ async def create_file(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.create_file(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.create_file(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -406,7 +406,7 @@ async def get_file_content(
 
     ```
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
         llm_router,
@@ -507,7 +507,7 @@ async def get_file_content(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.retrieve_file_content(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.retrieve_file_content(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -564,7 +564,7 @@ async def get_file(
 
     ```
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
         proxy_config,
@@ -645,7 +645,7 @@ async def get_file(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.retrieve_file(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.retrieve_file(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -703,7 +703,7 @@ async def delete_file(
 
     ```
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
         llm_router,
@@ -794,7 +794,7 @@ async def delete_file(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.retrieve_file(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.retrieve_file(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -851,7 +851,7 @@ async def list_files(
 
     ```
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
         proxy_config,
@@ -910,7 +910,7 @@ async def list_files(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.list_files(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.list_files(): Exception occured - {}".format(
                 str(e)
             )
         )
diff --git a/litellm/proxy/openapi.json b/litellm_proxy/openapi.json
similarity index 100%
rename from litellm/proxy/openapi.json
rename to litellm_proxy/openapi.json
diff --git a/litellm/proxy/pass_through_endpoints/common_utils.py b/litellm_proxy/pass_through_endpoints/common_utils.py
similarity index 100%
rename from litellm/proxy/pass_through_endpoints/common_utils.py
rename to litellm_proxy/pass_through_endpoints/common_utils.py
diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm_proxy/pass_through_endpoints/llm_passthrough_endpoints.py
similarity index 98%
rename from litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
rename to litellm_proxy/pass_through_endpoints/llm_passthrough_endpoints.py
index 21df55c084..8e3c1adf2b 100644
--- a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
+++ b/litellm_proxy/pass_through_endpoints/llm_passthrough_endpoints.py
@@ -16,11 +16,11 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.constants import BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES
 from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
-from litellm.proxy._types import *
-from litellm.proxy.auth.route_checks import RouteChecks
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.pass_through_endpoints.common_utils import get_litellm_virtual_key
-from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
+from litellm_proxy._types import *
+from litellm_proxy.auth.route_checks import RouteChecks
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.pass_through_endpoints.common_utils import get_litellm_virtual_key
+from litellm_proxy.pass_through_endpoints.pass_through_endpoints import (
     create_pass_through_route,
 )
 from litellm.secret_managers.main import get_secret_str
@@ -481,7 +481,7 @@ async def assemblyai_proxy_route(
     fastapi_response: Response,
     user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
-    from litellm.proxy.pass_through_endpoints.llm_provider_handlers.assembly_passthrough_logging_handler import (
+    from litellm_proxy.pass_through_endpoints.llm_provider_handlers.assembly_passthrough_logging_handler import (
         AssemblyAIPassthroughLoggingHandler,
     )
 
diff --git a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py b/litellm_proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py
similarity index 98%
rename from litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py
rename to litellm_proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py
index 0edccdf89b..ec8b4bd22f 100644
--- a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py
+++ b/litellm_proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py
@@ -11,8 +11,8 @@ from litellm.llms.anthropic.chat.handler import (
     ModelResponseIterator as AnthropicModelResponseIterator,
 )
 from litellm.llms.anthropic.chat.transformation import AnthropicConfig
-from litellm.proxy._types import PassThroughEndpointLoggingTypedDict
-from litellm.proxy.auth.auth_utils import get_end_user_id_from_request_body
+from litellm_proxy._types import PassThroughEndpointLoggingTypedDict
+from litellm_proxy.auth.auth_utils import get_end_user_id_from_request_body
 from litellm.types.passthrough_endpoints.pass_through_endpoints import (
     PassthroughStandardLoggingPayload,
 )
diff --git a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/assembly_passthrough_logging_handler.py b/litellm_proxy/pass_through_endpoints/llm_provider_handlers/assembly_passthrough_logging_handler.py
similarity index 99%
rename from litellm/proxy/pass_through_endpoints/llm_provider_handlers/assembly_passthrough_logging_handler.py
rename to litellm_proxy/pass_through_endpoints/llm_provider_handlers/assembly_passthrough_logging_handler.py
index 1acaf5f361..609763b195 100644
--- a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/assembly_passthrough_logging_handler.py
+++ b/litellm_proxy/pass_through_endpoints/llm_provider_handlers/assembly_passthrough_logging_handler.py
@@ -187,7 +187,7 @@ class AssemblyAIPassthroughLoggingHandler:
         Returns:
             Optional[dict]: Transcript details if successful, None otherwise
         """
-        from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+        from litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints import (
             passthrough_endpoint_router,
         )
 
diff --git a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/base_passthrough_logging_handler.py b/litellm_proxy/pass_through_endpoints/llm_provider_handlers/base_passthrough_logging_handler.py
similarity index 98%
rename from litellm/proxy/pass_through_endpoints/llm_provider_handlers/base_passthrough_logging_handler.py
rename to litellm_proxy/pass_through_endpoints/llm_provider_handlers/base_passthrough_logging_handler.py
index b9df8ecede..c19b53864b 100644
--- a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/base_passthrough_logging_handler.py
+++ b/litellm_proxy/pass_through_endpoints/llm_provider_handlers/base_passthrough_logging_handler.py
@@ -11,8 +11,8 @@ from litellm.litellm_core_utils.litellm_logging import (
     get_standard_logging_object_payload,
 )
 from litellm.llms.base_llm.chat.transformation import BaseConfig
-from litellm.proxy._types import PassThroughEndpointLoggingTypedDict
-from litellm.proxy.auth.auth_utils import get_end_user_id_from_request_body
+from litellm_proxy._types import PassThroughEndpointLoggingTypedDict
+from litellm_proxy.auth.auth_utils import get_end_user_id_from_request_body
 from litellm.types.passthrough_endpoints.pass_through_endpoints import (
     PassthroughStandardLoggingPayload,
 )
diff --git a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/cohere_passthrough_logging_handler.py b/litellm_proxy/pass_through_endpoints/llm_provider_handlers/cohere_passthrough_logging_handler.py
similarity index 100%
rename from litellm/proxy/pass_through_endpoints/llm_provider_handlers/cohere_passthrough_logging_handler.py
rename to litellm_proxy/pass_through_endpoints/llm_provider_handlers/cohere_passthrough_logging_handler.py
diff --git a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py b/litellm_proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py
similarity index 99%
rename from litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py
rename to litellm_proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py
index a20f39e65c..e7ca367707 100644
--- a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py
+++ b/litellm_proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py
@@ -12,7 +12,7 @@ from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
 from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
     ModelResponseIterator as VertexModelResponseIterator,
 )
-from litellm.proxy._types import PassThroughEndpointLoggingTypedDict
+from litellm_proxy._types import PassThroughEndpointLoggingTypedDict
 from litellm.types.utils import (
     EmbeddingResponse,
     ImageResponse,
diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm_proxy/pass_through_endpoints/pass_through_endpoints.py
similarity index 97%
rename from litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
rename to litellm_proxy/pass_through_endpoints/pass_through_endpoints.py
index 2fbedaeb22..75daba75aa 100644
--- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
+++ b/litellm_proxy/pass_through_endpoints/pass_through_endpoints.py
@@ -26,7 +26,7 @@ from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
 from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     ConfigFieldInfo,
     ConfigFieldUpdate,
     PassThroughEndpointResponse,
@@ -34,9 +34,9 @@ from litellm.proxy._types import (
     ProxyException,
     UserAPIKeyAuth,
 )
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
-from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.common_request_processing import ProxyBaseLLMRequestProcessing
+from litellm_proxy.common_utils.http_parsing_utils import _read_request_body
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.custom_http import httpxSpecialProvider
 from litellm.types.passthrough_endpoints.pass_through_endpoints import (
@@ -119,7 +119,7 @@ async def chat_completion_pass_through_endpoint(  # noqa: PLR0915
     adapter_id: str,
     user_api_key_dict: UserAPIKeyAuth,
 ):
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
         llm_router,
@@ -263,7 +263,7 @@ async def chat_completion_pass_through_endpoint(  # noqa: PLR0915
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.completion(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.completion(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -474,7 +474,7 @@ async def pass_through_request(  # noqa: PLR0915
     url: Optional[httpx.URL] = None
     try:
         from litellm.litellm_core_utils.litellm_logging import Logging
-        from litellm.proxy.proxy_server import proxy_logging_obj
+        from litellm_proxy.proxy_server import proxy_logging_obj
 
         url = httpx.URL(target)
         headers = custom_headers
@@ -720,7 +720,7 @@ async def pass_through_request(  # noqa: PLR0915
             api_base=str(url._uri_reference) if url else None,
         )
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.pass_through_endpoint(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.pass_through_endpoint(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -813,7 +813,7 @@ def create_pass_through_route(
     # check if target is an adapter.py or a url
     import uuid
 
-    from litellm.proxy.types_utils.utils import get_instance_fn
+    from litellm_proxy.types_utils.utils import get_instance_fn
 
     try:
         if isinstance(target, CustomLogger):
@@ -872,8 +872,8 @@ def _is_streaming_response(response: httpx.Response) -> bool:
 
 async def initialize_pass_through_endpoints(pass_through_endpoints: list):
     verbose_proxy_logger.debug("initializing pass through endpoints")
-    from litellm.proxy._types import CommonProxyErrors, LiteLLMRoutes
-    from litellm.proxy.proxy_server import app, premium_user
+    from litellm_proxy._types import CommonProxyErrors, LiteLLMRoutes
+    from litellm_proxy.proxy_server import app, premium_user
 
     for endpoint in pass_through_endpoints:
         _target = endpoint.get("target", None)
@@ -933,7 +933,7 @@ async def get_pass_through_endpoints(
 
     If no endpoint_id given, return all configured endpoints.
     """
-    from litellm.proxy.proxy_server import get_config_general_settings
+    from litellm_proxy.proxy_server import get_config_general_settings
 
     ## Get existing pass-through endpoint field value
     try:
@@ -990,7 +990,7 @@ async def create_pass_through_endpoints(
     """
     Create new pass-through endpoint
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         get_config_general_settings,
         update_config_general_settings,
     )
@@ -1038,7 +1038,7 @@ async def delete_pass_through_endpoints(
 
     Returns - the deleted endpoint
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         get_config_general_settings,
         update_config_general_settings,
     )
diff --git a/litellm/proxy/pass_through_endpoints/passthrough_endpoint_router.py b/litellm_proxy/pass_through_endpoints/passthrough_endpoint_router.py
similarity index 100%
rename from litellm/proxy/pass_through_endpoints/passthrough_endpoint_router.py
rename to litellm_proxy/pass_through_endpoints/passthrough_endpoint_router.py
diff --git a/litellm/proxy/pass_through_endpoints/streaming_handler.py b/litellm_proxy/pass_through_endpoints/streaming_handler.py
similarity index 98%
rename from litellm/proxy/pass_through_endpoints/streaming_handler.py
rename to litellm_proxy/pass_through_endpoints/streaming_handler.py
index d4260a0300..dd73bd7bf4 100644
--- a/litellm/proxy/pass_through_endpoints/streaming_handler.py
+++ b/litellm_proxy/pass_through_endpoints/streaming_handler.py
@@ -7,7 +7,7 @@ import httpx
 
 from litellm._logging import verbose_proxy_logger
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
-from litellm.proxy._types import PassThroughEndpointLoggingResultValues
+from litellm_proxy._types import PassThroughEndpointLoggingResultValues
 from litellm.types.passthrough_endpoints.pass_through_endpoints import EndpointType
 from litellm.types.utils import StandardPassThroughResponseObject
 
diff --git a/litellm/proxy/pass_through_endpoints/success_handler.py b/litellm_proxy/pass_through_endpoints/success_handler.py
similarity index 99%
rename from litellm/proxy/pass_through_endpoints/success_handler.py
rename to litellm_proxy/pass_through_endpoints/success_handler.py
index 09f0e3847a..841726cc99 100644
--- a/litellm/proxy/pass_through_endpoints/success_handler.py
+++ b/litellm_proxy/pass_through_endpoints/success_handler.py
@@ -6,7 +6,7 @@ from urllib.parse import urlparse
 import httpx
 
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
-from litellm.proxy._types import PassThroughEndpointLoggingResultValues
+from litellm_proxy._types import PassThroughEndpointLoggingResultValues
 from litellm.types.passthrough_endpoints.pass_through_endpoints import (
     PassthroughStandardLoggingPayload,
 )
diff --git a/litellm/proxy/post_call_rules.py b/litellm_proxy/post_call_rules.py
similarity index 100%
rename from litellm/proxy/post_call_rules.py
rename to litellm_proxy/post_call_rules.py
diff --git a/litellm/proxy/prisma_migration.py b/litellm_proxy/prisma_migration.py
similarity index 100%
rename from litellm/proxy/prisma_migration.py
rename to litellm_proxy/prisma_migration.py
diff --git a/litellm/proxy/proxy_cli.py b/litellm_proxy/proxy_cli.py
similarity index 98%
rename from litellm/proxy/proxy_cli.py
rename to litellm_proxy/proxy_cli.py
index de78247daf..fbfc3d5150 100644
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm_proxy/proxy_cli.py
@@ -124,7 +124,7 @@ class ProxyInitializationHelpers:
         import litellm
 
         uvicorn_args = {
-            "app": "litellm.proxy.proxy_server:app",
+            "app": "litellm_proxy.proxy_server:app",
             "host": host,
             "port": port,
         }
@@ -568,7 +568,7 @@ def run_server(  # noqa: PLR0915
         ### GET DB TOKEN FOR IAM AUTH ###
 
         if iam_token_db_auth:
-            from litellm.proxy.auth.rds_iam_token import generate_iam_auth_token
+            from litellm_proxy.auth.rds_iam_token import generate_iam_auth_token
 
             db_host = os.getenv("DATABASE_HOST")
             db_port = os.getenv("DATABASE_PORT")
@@ -721,8 +721,8 @@ def run_server(  # noqa: PLR0915
                 is_prisma_runnable = False
 
             if is_prisma_runnable:
-                from litellm.proxy.db.check_migration import check_prisma_schema_diff
-                from litellm.proxy.db.prisma_client import (
+                from litellm_proxy.db.check_migration import check_prisma_schema_diff
+                from litellm_proxy.db.prisma_client import (
                     PrismaManager,
                     should_update_prisma_schema,
                 )
@@ -749,7 +749,7 @@ def run_server(  # noqa: PLR0915
             litellm._turn_on_debug()
 
         # DO NOT DELETE - enables global variables to work across files
-        from litellm.proxy.proxy_server import app  # noqa
+        from litellm_proxy.proxy_server import app  # noqa
 
         uvicorn_args = ProxyInitializationHelpers._get_default_unvicorn_init_args(
             host=host,
diff --git a/litellm/proxy/proxy_config.yaml b/litellm_proxy/proxy_config.yaml
similarity index 100%
rename from litellm/proxy/proxy_config.yaml
rename to litellm_proxy/proxy_config.yaml
diff --git a/litellm/proxy/proxy_server.py b/litellm_proxy/proxy_server.py
similarity index 98%
rename from litellm/proxy/proxy_server.py
rename to litellm_proxy/proxy_server.py
index fd32a62ee4..63bc0ad653 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm_proxy/proxy_server.py
@@ -33,6 +33,7 @@ from litellm.types.utils import (
     ModelResponse,
     ModelResponseStream,
     TextCompletionResponse,
+    KeyManagementSettings,
 )
 
 if TYPE_CHECKING:
@@ -141,149 +142,149 @@ from litellm.litellm_core_utils.credential_accessor import CredentialAccessor
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.litellm_core_utils.sensitive_data_masker import SensitiveDataMasker
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
-from litellm.proxy._experimental.mcp_server.server import router as mcp_router
-from litellm.proxy._experimental.mcp_server.tool_registry import (
+from litellm_proxy._experimental.mcp_server.server import router as mcp_router
+from litellm_proxy._experimental.mcp_server.tool_registry import (
     global_mcp_tool_registry,
 )
-from litellm.proxy._types import *
-from litellm.proxy.analytics_endpoints.analytics_endpoints import (
+from litellm_proxy._types import *
+from litellm_proxy.analytics_endpoints.analytics_endpoints import (
     router as analytics_router,
 )
-from litellm.proxy.anthropic_endpoints.endpoints import router as anthropic_router
-from litellm.proxy.auth.auth_checks import get_team_object, log_db_metrics
-from litellm.proxy.auth.auth_utils import check_response_size_is_safe
-from litellm.proxy.auth.handle_jwt import JWTHandler
-from litellm.proxy.auth.litellm_license import LicenseCheck
-from litellm.proxy.auth.model_checks import (
+from litellm_proxy.anthropic_endpoints.endpoints import router as anthropic_router
+from litellm_proxy.auth.auth_checks import get_team_object, log_db_metrics
+from litellm_proxy.auth.auth_utils import check_response_size_is_safe
+from litellm_proxy.auth.handle_jwt import JWTHandler
+from litellm_proxy.auth.litellm_license import LicenseCheck
+from litellm_proxy.auth.model_checks import (
     get_complete_model_list,
     get_key_models,
     get_team_models,
 )
-from litellm.proxy.auth.user_api_key_auth import (
+from litellm_proxy.auth.user_api_key_auth import (
     user_api_key_auth,
     user_api_key_auth_websocket,
 )
-from litellm.proxy.batches_endpoints.endpoints import router as batches_router
+from litellm_proxy.batches_endpoints.endpoints import router as batches_router
 
 ## Import All Misc routes here ##
-from litellm.proxy.caching_routes import router as caching_router
-from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
-from litellm.proxy.common_utils.callback_utils import initialize_callbacks_on_proxy
-from litellm.proxy.common_utils.debug_utils import init_verbose_loggers
-from litellm.proxy.common_utils.debug_utils import router as debugging_endpoints_router
-from litellm.proxy.common_utils.encrypt_decrypt_utils import (
+from litellm_proxy.caching_routes import router as caching_router
+from litellm_proxy.common_request_processing import ProxyBaseLLMRequestProcessing
+from litellm_proxy.common_utils.callback_utils import initialize_callbacks_on_proxy
+from litellm_proxy.common_utils.debug_utils import init_verbose_loggers
+from litellm_proxy.common_utils.debug_utils import router as debugging_endpoints_router
+from litellm_proxy.common_utils.encrypt_decrypt_utils import (
     decrypt_value_helper,
     encrypt_value_helper,
 )
-from litellm.proxy.common_utils.html_forms.ui_login import html_form
-from litellm.proxy.common_utils.http_parsing_utils import (
+from litellm_proxy.common_utils.html_forms.ui_login import html_form
+from litellm_proxy.common_utils.http_parsing_utils import (
     _read_request_body,
     check_file_size_under_limit,
 )
-from litellm.proxy.common_utils.load_config_utils import (
+from litellm_proxy.common_utils.load_config_utils import (
     get_config_file_contents_from_gcs,
     get_file_contents_from_s3,
 )
-from litellm.proxy.common_utils.openai_endpoint_utils import (
+from litellm_proxy.common_utils.openai_endpoint_utils import (
     remove_sensitive_info_from_deployment,
 )
-from litellm.proxy.common_utils.proxy_state import ProxyState
-from litellm.proxy.common_utils.reset_budget_job import ResetBudgetJob
-from litellm.proxy.common_utils.swagger_utils import ERROR_RESPONSES
-from litellm.proxy.credential_endpoints.endpoints import router as credential_router
-from litellm.proxy.db.exception_handler import PrismaDBExceptionHandler
-from litellm.proxy.fine_tuning_endpoints.endpoints import router as fine_tuning_router
-from litellm.proxy.fine_tuning_endpoints.endpoints import set_fine_tuning_config
-from litellm.proxy.guardrails.guardrail_endpoints import router as guardrails_router
-from litellm.proxy.guardrails.init_guardrails import (
+from litellm_proxy.common_utils.proxy_state import ProxyState
+from litellm_proxy.common_utils.reset_budget_job import ResetBudgetJob
+from litellm_proxy.common_utils.swagger_utils import ERROR_RESPONSES
+from litellm_proxy.credential_endpoints.endpoints import router as credential_router
+from litellm_proxy.db.exception_handler import PrismaDBExceptionHandler
+from litellm_proxy.fine_tuning_endpoints.endpoints import router as fine_tuning_router
+from litellm_proxy.fine_tuning_endpoints.endpoints import set_fine_tuning_config
+from litellm_proxy.guardrails.guardrail_endpoints import router as guardrails_router
+from litellm_proxy.guardrails.init_guardrails import (
     init_guardrails_v2,
     initialize_guardrails,
 )
-from litellm.proxy.health_check import perform_health_check
-from litellm.proxy.health_endpoints._health_endpoints import router as health_router
-from litellm.proxy.hooks.model_max_budget_limiter import (
+from litellm_proxy.health_check import perform_health_check
+from litellm_proxy.health_endpoints._health_endpoints import router as health_router
+from litellm_proxy.hooks.model_max_budget_limiter import (
     _PROXY_VirtualKeyModelMaxBudgetLimiter,
 )
-from litellm.proxy.hooks.prompt_injection_detection import (
+from litellm_proxy.hooks.prompt_injection_detection import (
     _OPTIONAL_PromptInjectionDetection,
 )
-from litellm.proxy.hooks.proxy_track_cost_callback import _ProxyDBLogger
-from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
-from litellm.proxy.management_endpoints.budget_management_endpoints import (
+from litellm_proxy.hooks.proxy_track_cost_callback import _ProxyDBLogger
+from litellm_proxy.litellm_pre_call_utils import add_litellm_data_to_request
+from litellm_proxy.management_endpoints.budget_management_endpoints import (
     router as budget_management_router,
 )
-from litellm.proxy.management_endpoints.customer_endpoints import (
+from litellm_proxy.management_endpoints.customer_endpoints import (
     router as customer_router,
 )
-from litellm.proxy.management_endpoints.internal_user_endpoints import (
+from litellm_proxy.management_endpoints.internal_user_endpoints import (
     router as internal_user_router,
 )
-from litellm.proxy.management_endpoints.internal_user_endpoints import user_update
-from litellm.proxy.management_endpoints.key_management_endpoints import (
+from litellm_proxy.management_endpoints.internal_user_endpoints import user_update
+from litellm_proxy.management_endpoints.key_management_endpoints import (
     delete_verification_tokens,
     duration_in_seconds,
     generate_key_helper_fn,
 )
-from litellm.proxy.management_endpoints.key_management_endpoints import (
+from litellm_proxy.management_endpoints.key_management_endpoints import (
     router as key_management_router,
 )
-from litellm.proxy.management_endpoints.model_management_endpoints import (
+from litellm_proxy.management_endpoints.model_management_endpoints import (
     _add_model_to_db,
     _add_team_model_to_db,
     _deduplicate_litellm_router_models,
 )
-from litellm.proxy.management_endpoints.model_management_endpoints import (
+from litellm_proxy.management_endpoints.model_management_endpoints import (
     router as model_management_router,
 )
-from litellm.proxy.management_endpoints.organization_endpoints import (
+from litellm_proxy.management_endpoints.organization_endpoints import (
     router as organization_router,
 )
-from litellm.proxy.management_endpoints.scim.scim_v2 import scim_router
-from litellm.proxy.management_endpoints.tag_management_endpoints import (
+from litellm_proxy.management_endpoints.scim.scim_v2 import scim_router
+from litellm_proxy.management_endpoints.tag_management_endpoints import (
     router as tag_management_router,
 )
-from litellm.proxy.management_endpoints.team_callback_endpoints import (
+from litellm_proxy.management_endpoints.team_callback_endpoints import (
     router as team_callback_router,
 )
-from litellm.proxy.management_endpoints.team_endpoints import router as team_router
-from litellm.proxy.management_endpoints.team_endpoints import (
+from litellm_proxy.management_endpoints.team_endpoints import router as team_router
+from litellm_proxy.management_endpoints.team_endpoints import (
     update_team,
     validate_membership,
 )
-from litellm.proxy.management_endpoints.ui_sso import (
+from litellm_proxy.management_endpoints.ui_sso import (
     get_disabled_non_admin_personal_key_creation,
 )
-from litellm.proxy.management_endpoints.ui_sso import router as ui_sso_router
-from litellm.proxy.management_helpers.audit_logs import create_audit_log_for_update
-from litellm.proxy.middleware.prometheus_auth_middleware import PrometheusAuthMiddleware
-from litellm.proxy.openai_files_endpoints.files_endpoints import (
+from litellm_proxy.management_endpoints.ui_sso import router as ui_sso_router
+from litellm_proxy.management_helpers.audit_logs import create_audit_log_for_update
+from litellm_proxy.middleware.prometheus_auth_middleware import PrometheusAuthMiddleware
+from litellm_proxy.openai_files_endpoints.files_endpoints import (
     router as openai_files_router,
 )
-from litellm.proxy.openai_files_endpoints.files_endpoints import set_files_config
-from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+from litellm_proxy.openai_files_endpoints.files_endpoints import set_files_config
+from litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints import (
     passthrough_endpoint_router,
 )
-from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+from litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints import (
     router as llm_passthrough_router,
 )
-from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
+from litellm_proxy.pass_through_endpoints.pass_through_endpoints import (
     initialize_pass_through_endpoints,
 )
-from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
+from litellm_proxy.pass_through_endpoints.pass_through_endpoints import (
     router as pass_through_router,
 )
-from litellm.proxy.rerank_endpoints.endpoints import router as rerank_router
-from litellm.proxy.response_api_endpoints.endpoints import router as response_router
-from litellm.proxy.route_llm_request import route_request
-from litellm.proxy.spend_tracking.spend_management_endpoints import (
+from litellm_proxy.rerank_endpoints.endpoints import router as rerank_router
+from litellm_proxy.response_api_endpoints.endpoints import router as response_router
+from litellm_proxy.route_llm_request import route_request
+from litellm_proxy.spend_tracking.spend_management_endpoints import (
     router as spend_management_router,
 )
-from litellm.proxy.spend_tracking.spend_tracking_utils import get_logging_payload
-from litellm.proxy.types_utils.utils import get_instance_fn
-from litellm.proxy.ui_crud_endpoints.proxy_setting_endpoints import (
+from litellm_proxy.spend_tracking.spend_tracking_utils import get_logging_payload
+from litellm_proxy.types_utils.utils import get_instance_fn
+from litellm_proxy.ui_crud_endpoints.proxy_setting_endpoints import (
     router as ui_crud_endpoints_router,
 )
-from litellm.proxy.utils import (
+from litellm_proxy.utils import (
     PrismaClient,
     ProxyLogging,
     ProxyUpdateSpend,
@@ -297,7 +298,7 @@ from litellm.proxy.utils import (
     hash_token,
     update_spend,
 )
-from litellm.proxy.vertex_ai_endpoints.langfuse_endpoints import (
+from litellm_proxy.vertex_ai_endpoints.langfuse_endpoints import (
     router as langfuse_router,
 )
 from litellm.router import (
@@ -372,15 +373,7 @@ from fastapi.security.api_key import APIKeyHeader
 from fastapi.staticfiles import StaticFiles
 
 # import enterprise folder
-try:
-    # when using litellm cli
-    import litellm.proxy.enterprise as enterprise
-except Exception:
-    # when using litellm docker image
-    try:
-        import enterprise  # type: ignore
-    except Exception:
-        pass
+import enterprise
 
 server_root_path = os.getenv("SERVER_ROOT_PATH", "")
 _license_check = LicenseCheck()
@@ -480,7 +473,7 @@ async def proxy_startup_event(app: FastAPI):
     init_verbose_loggers()
     ## CHECK PREMIUM USER
     verbose_proxy_logger.debug(
-        "litellm.proxy.proxy_server.py::startup() - CHECKING PREMIUM USER - {}".format(
+        "litellm_proxy.proxy_server.py::startup() - CHECKING PREMIUM USER - {}".format(
             premium_user
         )
     )
@@ -1996,7 +1989,7 @@ class ProxyConfig:
 
         mcp_servers_config = config.get("mcp_servers", None)
         if mcp_servers_config:
-            from litellm.proxy._experimental.mcp_server.mcp_server_manager import (
+            from litellm_proxy._experimental.mcp_server.mcp_server_manager import (
                 global_mcp_server_manager,
             )
 
@@ -2646,7 +2639,7 @@ class ProxyConfig:
 
         except Exception as e:
             verbose_proxy_logger.exception(
-                "litellm.proxy.proxy_server.py::ProxyConfig:add_deployment - {}".format(
+                "litellm_proxy.proxy_server.py::ProxyConfig:add_deployment - {}".format(
                     str(e)
                 )
             )
@@ -2869,7 +2862,7 @@ async def async_assistants_data_generator(
         yield f"data: {done_message}\n\n"
     except Exception as e:
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.async_assistants_data_generator(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.async_assistants_data_generator(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -2928,7 +2921,7 @@ async def async_data_generator(
         yield f"data: {done_message}\n\n"
     except Exception as e:
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.async_data_generator(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -3640,7 +3633,7 @@ async def completion(  # noqa: PLR0915
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.completion(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.completion(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -3841,7 +3834,7 @@ async def embeddings(  # noqa: PLR0915
             litellm_debug_info,
         )
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.embeddings(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.embeddings(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -3961,7 +3954,7 @@ async def image_generation(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.image_generation(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.image_generation(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -4087,7 +4080,7 @@ async def audio_speech(
 
     except Exception as e:
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.audio_speech(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.audio_speech(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -4224,7 +4217,7 @@ async def audio_transcriptions(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.audio_transcription(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.audio_transcription(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -4412,7 +4405,7 @@ async def get_assistants(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.get_assistants(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.get_assistants(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -4511,7 +4504,7 @@ async def create_assistant(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.create_assistant(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.create_assistant(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -4608,7 +4601,7 @@ async def delete_assistant(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.delete_assistant(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.delete_assistant(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -4705,7 +4698,7 @@ async def create_threads(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.create_threads(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.create_threads(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -4800,7 +4793,7 @@ async def get_thread(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.get_thread(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.get_thread(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -4899,7 +4892,7 @@ async def add_messages(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.add_messages(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.add_messages(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -4994,7 +4987,7 @@ async def get_messages(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.get_messages(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.get_messages(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -5103,7 +5096,7 @@ async def run_thread(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.run_thread(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.run_thread(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -5226,7 +5219,7 @@ async def moderations(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.moderations(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.moderations(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -7495,7 +7488,7 @@ async def update_config(config_info: ConfigYAML):  # noqa: PLR0915
         return {"message": "Config updated successfully"}
     except Exception as e:
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.update_config(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.update_config(): Exception occured - {}".format(
                 str(e)
             )
         )
@@ -8045,7 +8038,7 @@ async def get_config():  # noqa: PLR0915
         }
     except Exception as e:
         verbose_proxy_logger.exception(
-            "litellm.proxy.proxy_server.get_config(): Exception occured - {}".format(
+            "litellm_proxy.proxy_server.get_config(): Exception occured - {}".format(
                 str(e)
             )
         )
diff --git a/litellm/proxy/rerank_endpoints/endpoints.py b/litellm_proxy/rerank_endpoints/endpoints.py
similarity index 93%
rename from litellm/proxy/rerank_endpoints/endpoints.py
rename to litellm_proxy/rerank_endpoints/endpoints.py
index ba9046b3c2..953ff69781 100644
--- a/litellm/proxy/rerank_endpoints/endpoints.py
+++ b/litellm_proxy/rerank_endpoints/endpoints.py
@@ -5,9 +5,9 @@ from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
 from fastapi.responses import ORJSONResponse
 
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import *
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
+from litellm_proxy._types import *
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.common_request_processing import ProxyBaseLLMRequestProcessing
 
 router = APIRouter()
 import asyncio
@@ -36,7 +36,7 @@ async def rerank(
     fastapi_response: Response,
     user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         add_litellm_data_to_request,
         general_settings,
         llm_router,
@@ -108,7 +108,7 @@ async def rerank(
             user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
         )
         verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.rerank(): Exception occured - {}".format(str(e))
+            "litellm_proxy.proxy_server.rerank(): Exception occured - {}".format(str(e))
         )
         if isinstance(e, HTTPException):
             raise ProxyException(
diff --git a/litellm/proxy/response_api_endpoints/endpoints.py b/litellm_proxy/response_api_endpoints/endpoints.py
similarity index 95%
rename from litellm/proxy/response_api_endpoints/endpoints.py
rename to litellm_proxy/response_api_endpoints/endpoints.py
index f9ddf306a7..080a75057a 100644
--- a/litellm/proxy/response_api_endpoints/endpoints.py
+++ b/litellm_proxy/response_api_endpoints/endpoints.py
@@ -1,8 +1,8 @@
 from fastapi import APIRouter, Depends, Request, Response
 
-from litellm.proxy._types import *
-from litellm.proxy.auth.user_api_key_auth import UserAPIKeyAuth, user_api_key_auth
-from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
+from litellm_proxy._types import *
+from litellm_proxy.auth.user_api_key_auth import UserAPIKeyAuth, user_api_key_auth
+from litellm_proxy.common_request_processing import ProxyBaseLLMRequestProcessing
 
 router = APIRouter()
 
@@ -35,7 +35,7 @@ async def responses_api(
     }'
     ```
     """
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         _read_request_body,
         general_settings,
         llm_router,
diff --git a/litellm/proxy/route_llm_request.py b/litellm_proxy/route_llm_request.py
similarity index 100%
rename from litellm/proxy/route_llm_request.py
rename to litellm_proxy/route_llm_request.py
diff --git a/litellm/proxy/schema.prisma b/litellm_proxy/schema.prisma
similarity index 100%
rename from litellm/proxy/schema.prisma
rename to litellm_proxy/schema.prisma
diff --git a/litellm/proxy/spend_tracking/spend_management_endpoints.py b/litellm_proxy/spend_tracking/spend_management_endpoints.py
similarity index 97%
rename from litellm/proxy/spend_tracking/spend_management_endpoints.py
rename to litellm_proxy/spend_tracking/spend_management_endpoints.py
index 70370f3b53..b860e29eba 100644
--- a/litellm/proxy/spend_tracking/spend_management_endpoints.py
+++ b/litellm_proxy/spend_tracking/spend_management_endpoints.py
@@ -10,16 +10,16 @@ from fastapi import APIRouter, Depends, HTTPException, status
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import *
-from litellm.proxy._types import ProviderBudgetResponse, ProviderBudgetResponseObject
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.spend_tracking.spend_tracking_utils import (
+from litellm_proxy._types import *
+from litellm_proxy._types import ProviderBudgetResponse, ProviderBudgetResponseObject
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.spend_tracking.spend_tracking_utils import (
     get_spend_by_team_and_customer,
 )
-from litellm.proxy.utils import handle_exception_on_proxy
+from litellm_proxy.utils import handle_exception_on_proxy
 
 if TYPE_CHECKING:
-    from litellm.proxy.proxy_server import PrismaClient
+    from litellm_proxy.proxy_server import PrismaClient
 else:
     PrismaClient = Any
 
@@ -43,7 +43,7 @@ async def spend_key_fn():
     ```
     """
 
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
@@ -88,7 +88,7 @@ async def spend_user_fn(
 -H "Authorization: Bearer sk-1234"
     ```
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
@@ -150,7 +150,7 @@ async def view_spend_tags(
     """
 
     from enterprise.utils import get_spend_by_tags
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
@@ -193,7 +193,7 @@ async def view_spend_tags(
 async def get_global_activity_internal_user(
     user_api_key_dict: UserAPIKeyAuth, start_date: datetime, end_date: datetime
 ):
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -270,7 +270,7 @@ async def get_global_activity(
     start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
     end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
 
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
@@ -335,7 +335,7 @@ async def get_global_activity(
 async def get_global_activity_model_internal_user(
     user_api_key_dict: UserAPIKeyAuth, start_date: datetime, end_date: datetime
 ):
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -436,7 +436,7 @@ async def get_global_activity_model(
     start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
     end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
 
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
@@ -588,7 +588,7 @@ async def get_global_activity_exceptions_per_deployment(
     start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
     end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
 
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
@@ -721,7 +721,7 @@ async def get_global_activity_exceptions(
     start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
     end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
 
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
@@ -827,7 +827,7 @@ async def get_global_spend_provider(
     start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
     end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
 
-    from litellm.proxy.proxy_server import llm_router, prisma_client
+    from litellm_proxy.proxy_server import llm_router, prisma_client
 
     try:
         if prisma_client is None:
@@ -986,7 +986,7 @@ async def get_global_spend_report(
     start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
     end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
 
-    from litellm.proxy.proxy_server import premium_user, prisma_client
+    from litellm_proxy.proxy_server import premium_user, prisma_client
 
     try:
         if prisma_client is None:
@@ -1278,7 +1278,7 @@ async def get_global_spend_report(
 )
 async def global_get_all_tag_names():
     try:
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
 
         if prisma_client is None:
             raise Exception(
@@ -1358,7 +1358,7 @@ async def global_view_spend_tags(
     """
     import traceback
 
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
@@ -1405,7 +1405,7 @@ async def _get_spend_report_for_time_range(
     start_date: str,
     end_date: str,
 ):
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         verbose_proxy_logger.error(
@@ -1519,7 +1519,7 @@ async def calculate_spend(request: SpendCalculateRequest):
     try:
         from litellm import completion_cost
         from litellm.cost_calculator import CostPerToken
-        from litellm.proxy.proxy_server import llm_router
+        from litellm_proxy.proxy_server import llm_router
 
         _cost = None
         if request.model is not None:
@@ -1667,7 +1667,7 @@ async def ui_view_spend_logs(  # noqa: PLR0915
             "total_pages": int                 # Total number of pages
         }
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise ProxyException(
@@ -1864,7 +1864,7 @@ async def view_spend_logs(  # noqa: PLR0915
 -H "Authorization: Bearer sk-1234"
     ```
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if (
         user_api_key_dict.user_role == LitellmUserRoles.INTERNAL_USER
@@ -2037,7 +2037,7 @@ async def global_spend_reset():
     3. LiteLLM_TeamTable spend will be set = 0
 
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise ProxyException(
@@ -2070,7 +2070,7 @@ async def global_spend_refresh():
 
     Globally refresh spend MonthlyGlobalSpend view
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise ProxyException(
@@ -2107,9 +2107,9 @@ async def global_spend_refresh():
         REFRESH MATERIALIZED VIEW "MonthlyGlobalSpend";    
         """
         try:
-            from litellm.proxy._types import CommonProxyErrors
-            from litellm.proxy.proxy_server import proxy_logging_obj
-            from litellm.proxy.utils import PrismaClient
+            from litellm_proxy._types import CommonProxyErrors
+            from litellm_proxy.proxy_server import proxy_logging_obj
+            from litellm_proxy.utils import PrismaClient
 
             db_url = os.getenv("DATABASE_URL")
             if db_url is None:
@@ -2143,7 +2143,7 @@ async def global_spend_for_internal_user(
     api_key: Optional[str] = None,
     user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise ProxyException(
@@ -2203,7 +2203,7 @@ async def global_spend_logs(
         get_daily_spend_from_prometheus,
         is_prometheus_connected,
     )
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         if prisma_client is None:
@@ -2282,7 +2282,7 @@ async def global_spend():
     """
     import traceback
 
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     try:
         total_spend = 0.0
@@ -2319,7 +2319,7 @@ async def global_spend():
 async def global_spend_key_internal_user(
     user_api_key_dict: UserAPIKeyAuth, limit: int = 10
 ):
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -2380,7 +2380,7 @@ async def global_spend_keys(
 
     Use this to get the top 'n' keys with the highest spend, ordered by spend.
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if (
         user_api_key_dict.user_role == LitellmUserRoles.INTERNAL_USER
@@ -2424,7 +2424,7 @@ async def global_spend_per_team():
 
     Use this to get daily spend, grouped by `team_id` and `date`
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -2517,7 +2517,7 @@ async def global_view_all_end_users():
 
     Use this to just get all the unique `end_users`
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -2549,7 +2549,7 @@ async def global_spend_end_users(data: Optional[GlobalEndUsersSpend] = None):
 
     Use this to get the top 'n' keys with the highest spend, ordered by spend.
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -2593,7 +2593,7 @@ LIMIT 100
 async def global_spend_models_internal_user(
     user_api_key_dict: UserAPIKeyAuth, limit: int = 10
 ):
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if prisma_client is None:
         raise HTTPException(status_code=500, detail={"error": "No db connected"})
@@ -2641,7 +2641,7 @@ async def global_spend_models(
 
     Use this to get the top 'n' models with the highest spend, ordered by spend.
     """
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     if (
         user_api_key_dict.user_role == LitellmUserRoles.INTERNAL_USER
@@ -2711,7 +2711,7 @@ async def provider_budgets() -> ProviderBudgetResponse:
     ```
 
     """
-    from litellm.proxy.proxy_server import llm_router
+    from litellm_proxy.proxy_server import llm_router
 
     try:
         if llm_router is None:
diff --git a/litellm/proxy/spend_tracking/spend_tracking_utils.py b/litellm_proxy/spend_tracking/spend_tracking_utils.py
similarity index 98%
rename from litellm/proxy/spend_tracking/spend_tracking_utils.py
rename to litellm_proxy/spend_tracking/spend_tracking_utils.py
index ecd3963a92..5c1f3af301 100644
--- a/litellm/proxy/spend_tracking/spend_tracking_utils.py
+++ b/litellm_proxy/spend_tracking/spend_tracking_utils.py
@@ -11,8 +11,8 @@ from pydantic import BaseModel
 import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.litellm_core_utils.core_helpers import get_litellm_metadata_from_kwargs
-from litellm.proxy._types import SpendLogsMetadata, SpendLogsPayload
-from litellm.proxy.utils import PrismaClient, hash_token
+from litellm_proxy._types import SpendLogsMetadata, SpendLogsPayload
+from litellm_proxy.utils import PrismaClient, hash_token
 from litellm.types.utils import (
     StandardLoggingMCPToolCall,
     StandardLoggingModelInformation,
@@ -126,7 +126,7 @@ def get_spend_logs_id(
 def get_logging_payload(  # noqa: PLR0915
     kwargs, response_obj, start_time, end_time
 ) -> SpendLogsPayload:
-    from litellm.proxy.proxy_server import general_settings, master_key
+    from litellm_proxy.proxy_server import general_settings, master_key
 
     if kwargs is None:
         kwargs = {}
@@ -457,6 +457,6 @@ def _get_response_for_spend_logs_payload(
 
 
 def _should_store_prompts_and_responses_in_spend_logs() -> bool:
-    from litellm.proxy.proxy_server import general_settings
+    from litellm_proxy.proxy_server import general_settings
 
     return general_settings.get("store_prompts_in_spend_logs") is True
diff --git a/litellm/proxy/start.sh b/litellm_proxy/start.sh
similarity index 100%
rename from litellm/proxy/start.sh
rename to litellm_proxy/start.sh
diff --git a/litellm/proxy/swagger/favicon.png b/litellm_proxy/swagger/favicon.png
similarity index 100%
rename from litellm/proxy/swagger/favicon.png
rename to litellm_proxy/swagger/favicon.png
diff --git a/litellm/proxy/swagger/swagger-ui-bundle.js b/litellm_proxy/swagger/swagger-ui-bundle.js
similarity index 100%
rename from litellm/proxy/swagger/swagger-ui-bundle.js
rename to litellm_proxy/swagger/swagger-ui-bundle.js
diff --git a/litellm/proxy/swagger/swagger-ui.css b/litellm_proxy/swagger/swagger-ui.css
similarity index 100%
rename from litellm/proxy/swagger/swagger-ui.css
rename to litellm_proxy/swagger/swagger-ui.css
diff --git a/litellm/proxy/types_utils/README.md b/litellm_proxy/types_utils/README.md
similarity index 100%
rename from litellm/proxy/types_utils/README.md
rename to litellm_proxy/types_utils/README.md
diff --git a/litellm/proxy/types_utils/utils.py b/litellm_proxy/types_utils/utils.py
similarity index 100%
rename from litellm/proxy/types_utils/utils.py
rename to litellm_proxy/types_utils/utils.py
diff --git a/litellm/proxy/ui_crud_endpoints/proxy_setting_endpoints.py b/litellm_proxy/ui_crud_endpoints/proxy_setting_endpoints.py
similarity index 95%
rename from litellm/proxy/ui_crud_endpoints/proxy_setting_endpoints.py
rename to litellm_proxy/ui_crud_endpoints/proxy_setting_endpoints.py
index 6f1b2bfb8c..4f9faf85ef 100644
--- a/litellm/proxy/ui_crud_endpoints/proxy_setting_endpoints.py
+++ b/litellm_proxy/ui_crud_endpoints/proxy_setting_endpoints.py
@@ -5,8 +5,8 @@ from fastapi import APIRouter, Depends, HTTPException
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import *
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy._types import *
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
 from litellm.types.proxy.management_endpoints.ui_sso import DefaultTeamSSOParams
 
 router = APIRouter()
@@ -23,7 +23,7 @@ class IPAddress(BaseModel):
     include_in_schema=False,
 )
 async def get_allowed_ips():
-    from litellm.proxy.proxy_server import general_settings
+    from litellm_proxy.proxy_server import general_settings
 
     _allowed_ip = general_settings.get("allowed_ips")
     return {"data": _allowed_ip}
@@ -35,7 +35,7 @@ async def get_allowed_ips():
     dependencies=[Depends(user_api_key_auth)],
 )
 async def add_allowed_ip(ip_address: IPAddress):
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         general_settings,
         prisma_client,
         proxy_config,
@@ -86,7 +86,7 @@ async def add_allowed_ip(ip_address: IPAddress):
     dependencies=[Depends(user_api_key_auth)],
 )
 async def delete_allowed_ip(ip_address: IPAddress):
-    from litellm.proxy.proxy_server import general_settings, proxy_config
+    from litellm_proxy.proxy_server import general_settings, proxy_config
 
     _allowed_ips: List = general_settings.get("allowed_ips", [])
     if ip_address.ip in _allowed_ips:
@@ -174,7 +174,7 @@ async def get_sso_settings():
     Get all SSO settings from the litellm_settings configuration.
     Returns a structured object with values and descriptions for UI display.
     """
-    from litellm.proxy.proxy_server import proxy_config
+    from litellm_proxy.proxy_server import proxy_config
 
     # Load existing config
     config = await proxy_config.get_config()
@@ -196,7 +196,7 @@ async def get_default_team_settings():
     Get all SSO settings from the litellm_settings configuration.
     Returns a structured object with values and descriptions for UI display.
     """
-    from litellm.proxy.proxy_server import proxy_config
+    from litellm_proxy.proxy_server import proxy_config
 
     # Load existing config
     config = await proxy_config.get_config()
@@ -223,7 +223,7 @@ async def _update_litellm_setting(
         in_memory_var: The in-memory variable to update
         success_message: Message to return on success
     """
-    from litellm.proxy.proxy_server import proxy_config
+    from litellm_proxy.proxy_server import proxy_config
 
     # Update the in-memory settings
     in_memory_var = settings.model_dump(exclude_none=True)
diff --git a/litellm/proxy/utils.py b/litellm_proxy/utils.py
similarity index 99%
rename from litellm/proxy/utils.py
rename to litellm_proxy/utils.py
index 6e8c65710d..851fe86410 100644
--- a/litellm/proxy/utils.py
+++ b/litellm_proxy/utils.py
@@ -23,7 +23,7 @@ from typing import (
 )
 
 from litellm.constants import MAX_TEAM_LIST_LIMIT
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     DB_CONNECTION_ERROR_TYPES,
     CommonProxyErrors,
     ProxyErrorTypes,
@@ -62,27 +62,27 @@ from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting
 from litellm.integrations.SlackAlerting.utils import _add_langfuse_trace_id_to_alert
 from litellm.litellm_core_utils.litellm_logging import Logging
 from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     AlertType,
     CallInfo,
     LiteLLM_VerificationTokenView,
     Member,
     UserAPIKeyAuth,
 )
-from litellm.proxy.db.create_views import (
+from litellm_proxy.db.create_views import (
     create_missing_views,
     should_create_missing_views,
 )
-from litellm.proxy.db.db_spend_update_writer import DBSpendUpdateWriter
-from litellm.proxy.db.log_db_metrics import log_db_metrics
-from litellm.proxy.db.prisma_client import PrismaWrapper
-from litellm.proxy.hooks import PROXY_HOOKS, get_proxy_hook
-from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
-from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
-from litellm.proxy.hooks.parallel_request_limiter import (
+from litellm_proxy.db.db_spend_update_writer import DBSpendUpdateWriter
+from litellm_proxy.db.log_db_metrics import log_db_metrics
+from litellm_proxy.db.prisma_client import PrismaWrapper
+from litellm_proxy.hooks import PROXY_HOOKS, get_proxy_hook
+from litellm_proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
+from litellm_proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
+from litellm_proxy.hooks.parallel_request_limiter import (
     _PROXY_MaxParallelRequestsHandler,
 )
-from litellm.proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
+from litellm_proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
 from litellm.secret_managers.main import str_to_bool
 from litellm.types.integrations.slack_alerting import DEFAULT_ALERT_TYPES
 from litellm.types.utils import CallTypes, LLMResponseTypes, LoggedLiteLLMParams
@@ -358,7 +358,7 @@ class ProxyLogging:
         """
         Add proxy hooks to litellm.callbacks
         """
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
 
         for hook in PROXY_HOOKS:
             proxy_hook = get_proxy_hook(hook)
@@ -2331,7 +2331,7 @@ class PrismaClient:
 
         This is used later to determine if we should run expensive UI Usage queries.
         """
-        from litellm.proxy.proxy_server import proxy_state
+        from litellm_proxy.proxy_server import proxy_state
 
         _num_spend_logs_rows = await self._get_spend_logs_row_count()
         proxy_state.set_proxy_state_variable(
@@ -2558,7 +2558,7 @@ class ProxyUpdateSpend:
         returns True if should not update spend in db
         Skips writing spend logs and updates to key, team, user spend to DB
         """
-        from litellm.proxy.proxy_server import general_settings
+        from litellm_proxy.proxy_server import general_settings
 
         if general_settings.get("disable_spend_updates") is True:
             return True
@@ -2787,7 +2787,7 @@ def _premium_user_check():
     """
     Raises an HTTPException if the user is not a premium user
     """
-    from litellm.proxy.proxy_server import premium_user
+    from litellm_proxy.proxy_server import premium_user
 
     if not premium_user:
         raise HTTPException(
diff --git a/litellm/proxy/vertex_ai_endpoints/langfuse_endpoints.py b/litellm_proxy/vertex_ai_endpoints/langfuse_endpoints.py
similarity index 93%
rename from litellm/proxy/vertex_ai_endpoints/langfuse_endpoints.py
rename to litellm_proxy/vertex_ai_endpoints/langfuse_endpoints.py
index 684e2ad061..7c98598ddb 100644
--- a/litellm/proxy/vertex_ai_endpoints/langfuse_endpoints.py
+++ b/litellm_proxy/vertex_ai_endpoints/langfuse_endpoints.py
@@ -17,10 +17,10 @@ import httpx
 from fastapi import APIRouter, Request, Response
 
 import litellm
-from litellm.proxy._types import *
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-from litellm.proxy.litellm_pre_call_utils import _get_dynamic_logging_metadata
-from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
+from litellm_proxy._types import *
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.litellm_pre_call_utils import _get_dynamic_logging_metadata
+from litellm_proxy.pass_through_endpoints.pass_through_endpoints import (
     create_pass_through_route,
 )
 
@@ -53,7 +53,7 @@ async def langfuse_proxy_route(
 
     [Docs](https://docs.litellm.ai/docs/pass_through/langfuse)
     """
-    from litellm.proxy.proxy_server import proxy_config
+    from litellm_proxy.proxy_server import proxy_config
 
     ## CHECK FOR LITELLM API KEY IN THE QUERY PARAMS - ?..key=LITELLM_API_KEY
     api_key = request.headers.get("Authorization") or ""
diff --git a/tests/documentation_tests/test_api_docs.py b/tests/documentation_tests/test_api_docs.py
index 2faac371c3..eb2fe426f1 100644
--- a/tests/documentation_tests/test_api_docs.py
+++ b/tests/documentation_tests/test_api_docs.py
@@ -109,7 +109,7 @@ def analyze_function(func_info: FunctionInfo) -> Dict:
 
     for name, type_name in func_info.parameters:
         if type_name.endswith("Request") or type_name.endswith("Response"):
-            pydantic_model = getattr(litellm.proxy._types, type_name, None)
+            pydantic_model = getattr(litellm_proxy._types, type_name, None)
             if pydantic_model is not None:
                 for param in pydantic_model.model_fields.keys():
                     pydantic_params.add(param)
diff --git a/tests/litellm/conftest.py b/tests/litellm/conftest.py
index b3561d8a62..e4083f1933 100644
--- a/tests/litellm/conftest.py
+++ b/tests/litellm/conftest.py
@@ -29,11 +29,11 @@ def setup_and_teardown():
 
     try:
         if hasattr(litellm, "proxy") and hasattr(litellm.proxy, "proxy_server"):
-            import litellm.proxy.proxy_server
+            import litellm_proxy.proxy_server
 
-            importlib.reload(litellm.proxy.proxy_server)
+            importlib.reload(litellm_proxy.proxy_server)
     except Exception as e:
-        print(f"Error reloading litellm.proxy.proxy_server: {e}")
+        print(f"Error reloading litellm_proxy.proxy_server: {e}")
 
     import asyncio
 
diff --git a/tests/litellm/integrations/gcs_pubsub/test_pub_sub.py b/tests/litellm/integrations/gcs_pubsub/test_pub_sub.py
index 7ff28bfe83..d8fa054863 100644
--- a/tests/litellm/integrations/gcs_pubsub/test_pub_sub.py
+++ b/tests/litellm/integrations/gcs_pubsub/test_pub_sub.py
@@ -24,7 +24,7 @@ async def test_construct_request_headers_project_id_from_env(monkeypatch):
     test_project_id = "test-project-123"
     monkeypatch.setenv("GCS_PUBSUB_PROJECT_ID", test_project_id)
     monkeypatch.setattr(
-        "litellm.proxy.proxy_server.premium_user",
+        "litellm_proxy.proxy_server.premium_user",
         True,
     )
 
diff --git a/tests/litellm/proxy/auth/test_auth_exception_handler.py b/tests/litellm/proxy/auth/test_auth_exception_handler.py
index 3e780c6ee9..8eb309d4aa 100644
--- a/tests/litellm/proxy/auth/test_auth_exception_handler.py
+++ b/tests/litellm/proxy/auth/test_auth_exception_handler.py
@@ -25,8 +25,8 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import ProxyErrorTypes, ProxyException
-from litellm.proxy.auth.auth_exception_handler import UserAPIKeyAuthExceptionHandler
+from litellm_proxy._types import ProxyErrorTypes, ProxyException
+from litellm_proxy.auth.auth_exception_handler import UserAPIKeyAuthExceptionHandler
 
 
 @pytest.mark.asyncio
@@ -67,7 +67,7 @@ async def test_handle_authentication_error_db_unavailable(prisma_error):
 
     # Test with DB connection error when requests are allowed
     with patch(
-        "litellm.proxy.proxy_server.general_settings",
+        "litellm_proxy.proxy_server.general_settings",
         {"allow_requests_on_db_unavailable": True},
     ):
         result = await handler._handle_authentication_error(
@@ -128,12 +128,12 @@ async def test_route_passed_to_post_call_failure_hook():
 
     # Mock proxy_logging_obj.post_call_failure_hook
     with patch(
-        "litellm.proxy.proxy_server.proxy_logging_obj.post_call_failure_hook",
+        "litellm_proxy.proxy_server.proxy_logging_obj.post_call_failure_hook",
         new_callable=AsyncMock,
     ) as mock_post_call_failure_hook:
         # Test with DB connection error
         with patch(
-            "litellm.proxy.proxy_server.general_settings",
+            "litellm_proxy.proxy_server.general_settings",
             {"allow_requests_on_db_unavailable": False},
         ):
             try:
diff --git a/tests/litellm/proxy/common_utils/test_http_parsing_utils.py b/tests/litellm/proxy/common_utils/test_http_parsing_utils.py
index 38624422c6..406d9016a3 100644
--- a/tests/litellm/proxy/common_utils/test_http_parsing_utils.py
+++ b/tests/litellm/proxy/common_utils/test_http_parsing_utils.py
@@ -14,7 +14,7 @@ sys.path.insert(
 
 
 import litellm
-from litellm.proxy.common_utils.http_parsing_utils import (
+from litellm_proxy.common_utils.http_parsing_utils import (
     _read_request_body,
     _safe_get_request_parsed_body,
     _safe_set_request_parsed_body,
diff --git a/tests/litellm/proxy/common_utils/test_reset_budget_job.py b/tests/litellm/proxy/common_utils/test_reset_budget_job.py
index bb4af00d78..87c3fddcd4 100644
--- a/tests/litellm/proxy/common_utils/test_reset_budget_job.py
+++ b/tests/litellm/proxy/common_utils/test_reset_budget_job.py
@@ -13,8 +13,8 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy.common_utils.reset_budget_job import ResetBudgetJob
-from litellm.proxy.utils import ProxyLogging
+from litellm_proxy.common_utils.reset_budget_job import ResetBudgetJob
+from litellm_proxy.utils import ProxyLogging
 
 
 # Mock classes for testing
diff --git a/tests/litellm/proxy/db/db_transaction_queue/test_base_update_queue.py b/tests/litellm/proxy/db/db_transaction_queue/test_base_update_queue.py
index e1d4cb0541..9d85a96675 100644
--- a/tests/litellm/proxy/db/db_transaction_queue/test_base_update_queue.py
+++ b/tests/litellm/proxy/db/db_transaction_queue/test_base_update_queue.py
@@ -11,7 +11,7 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 
 from litellm.constants import MAX_IN_MEMORY_QUEUE_FLUSH_COUNT
-from litellm.proxy.db.db_transaction_queue.base_update_queue import BaseUpdateQueue
+from litellm_proxy.db.db_transaction_queue.base_update_queue import BaseUpdateQueue
 
 
 @pytest.mark.asyncio
diff --git a/tests/litellm/proxy/db/db_transaction_queue/test_daily_spend_update_queue.py b/tests/litellm/proxy/db/db_transaction_queue/test_daily_spend_update_queue.py
index 86bd9b71d0..3b71989077 100644
--- a/tests/litellm/proxy/db/db_transaction_queue/test_daily_spend_update_queue.py
+++ b/tests/litellm/proxy/db/db_transaction_queue/test_daily_spend_update_queue.py
@@ -11,15 +11,15 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 import litellm
 from litellm.constants import MAX_SIZE_IN_MEMORY_QUEUE
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     DailyUserSpendTransaction,
     Litellm_EntityType,
     SpendUpdateQueueItem,
 )
-from litellm.proxy.db.db_transaction_queue.daily_spend_update_queue import (
+from litellm_proxy.db.db_transaction_queue.daily_spend_update_queue import (
     DailySpendUpdateQueue,
 )
-from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
+from litellm_proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
 
 
 @pytest.fixture
diff --git a/tests/litellm/proxy/db/db_transaction_queue/test_pod_lock_manager.py b/tests/litellm/proxy/db/db_transaction_queue/test_pod_lock_manager.py
index e83fd75c3a..36f864f7b2 100644
--- a/tests/litellm/proxy/db/db_transaction_queue/test_pod_lock_manager.py
+++ b/tests/litellm/proxy/db/db_transaction_queue/test_pod_lock_manager.py
@@ -12,7 +12,7 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 
 from litellm.constants import DEFAULT_CRON_JOB_LOCK_TTL_SECONDS
-from litellm.proxy.db.db_transaction_queue.pod_lock_manager import PodLockManager
+from litellm_proxy.db.db_transaction_queue.pod_lock_manager import PodLockManager
 
 
 class MockRedisCache:
diff --git a/tests/litellm/proxy/db/db_transaction_queue/test_spend_update_queue.py b/tests/litellm/proxy/db/db_transaction_queue/test_spend_update_queue.py
index 9993b25dfd..1435d1b8db 100644
--- a/tests/litellm/proxy/db/db_transaction_queue/test_spend_update_queue.py
+++ b/tests/litellm/proxy/db/db_transaction_queue/test_spend_update_queue.py
@@ -7,8 +7,8 @@ import pytest
 from fastapi.testclient import TestClient
 
 from litellm.constants import MAX_SIZE_IN_MEMORY_QUEUE
-from litellm.proxy._types import Litellm_EntityType, SpendUpdateQueueItem
-from litellm.proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
+from litellm_proxy._types import Litellm_EntityType, SpendUpdateQueueItem
+from litellm_proxy.db.db_transaction_queue.spend_update_queue import SpendUpdateQueue
 
 sys.path.insert(
     0, os.path.abspath("../../..")
diff --git a/tests/litellm/proxy/db/test_check_migration.py b/tests/litellm/proxy/db/test_check_migration.py
index ad72a0d119..6ac96ec843 100644
--- a/tests/litellm/proxy/db/test_check_migration.py
+++ b/tests/litellm/proxy/db/test_check_migration.py
@@ -31,11 +31,11 @@ def test_check_migration_out_of_sync(mocker):
     mock_logger = mocker.patch("litellm._logging.verbose_logger")
 
     # Import the function after mocking the logger
-    from litellm.proxy.db.check_migration import check_prisma_schema_diff
+    from litellm_proxy.db.check_migration import check_prisma_schema_diff
 
     # Mock the helper function to simulate out-of-sync state
     mock_diff_helper = mocker.patch(
-        "litellm.proxy.db.check_migration.check_prisma_schema_diff_helper",
+        "litellm_proxy.db.check_migration.check_prisma_schema_diff_helper",
         return_value=(True, ["ALTER TABLE users ADD COLUMN new_field TEXT;"]),
     )
 
diff --git a/tests/litellm/proxy/db/test_db_spend_update_writer.py b/tests/litellm/proxy/db/test_db_spend_update_writer.py
index 02b94c44ec..fa5d535248 100644
--- a/tests/litellm/proxy/db/test_db_spend_update_writer.py
+++ b/tests/litellm/proxy/db/test_db_spend_update_writer.py
@@ -12,7 +12,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-from litellm.proxy.db.db_spend_update_writer import DBSpendUpdateWriter
+from litellm_proxy.db.db_spend_update_writer import DBSpendUpdateWriter
 
 
 @pytest.mark.asyncio
@@ -29,10 +29,10 @@ async def test_daily_spend_tracking_with_disabled_spend_logs():
     db_writer.add_spend_log_transaction_to_daily_user_transaction = AsyncMock()
 
     # Mock the imported modules/variables
-    with patch("litellm.proxy.proxy_server.disable_spend_logs", True), patch(
-        "litellm.proxy.proxy_server.prisma_client", MagicMock()
-    ), patch("litellm.proxy.proxy_server.user_api_key_cache", MagicMock()), patch(
-        "litellm.proxy.proxy_server.litellm_proxy_budget_name", "test-budget"
+    with patch("litellm_proxy.proxy_server.disable_spend_logs", True), patch(
+        "litellm_proxy.proxy_server.prisma_client", MagicMock()
+    ), patch("litellm_proxy.proxy_server.user_api_key_cache", MagicMock()), patch(
+        "litellm_proxy.proxy_server.litellm_proxy_budget_name", "test-budget"
     ):
         # Test data
         test_data = {
diff --git a/tests/litellm/proxy/db/test_exception_handler.py b/tests/litellm/proxy/db/test_exception_handler.py
index e68c9b6a99..0458f3780f 100644
--- a/tests/litellm/proxy/db/test_exception_handler.py
+++ b/tests/litellm/proxy/db/test_exception_handler.py
@@ -26,8 +26,8 @@ sys.path.insert(
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy._types import ProxyErrorTypes, ProxyException
-from litellm.proxy.db.exception_handler import PrismaDBExceptionHandler
+from litellm_proxy._types import ProxyErrorTypes, ProxyException
+from litellm_proxy.db.exception_handler import PrismaDBExceptionHandler
 
 
 # Test is_database_connection_error method
@@ -95,7 +95,7 @@ def test_is_database_connection_generic_errors():
 
 # Test should_allow_request_on_db_unavailable method
 @patch(
-    "litellm.proxy.proxy_server.general_settings",
+    "litellm_proxy.proxy_server.general_settings",
     {"allow_requests_on_db_unavailable": True},
 )
 def test_should_allow_request_on_db_unavailable_true():
@@ -103,7 +103,7 @@ def test_should_allow_request_on_db_unavailable_true():
 
 
 @patch(
-    "litellm.proxy.proxy_server.general_settings",
+    "litellm_proxy.proxy_server.general_settings",
     {"allow_requests_on_db_unavailable": False},
 )
 def test_should_allow_request_on_db_unavailable_false():
@@ -111,7 +111,7 @@ def test_should_allow_request_on_db_unavailable_false():
 
 
 @patch(
-    "litellm.proxy.proxy_server.general_settings",
+    "litellm_proxy.proxy_server.general_settings",
     {"allow_requests_on_db_unavailable": True},
 )
 def test_handle_db_exception_with_connection_error():
@@ -124,7 +124,7 @@ def test_handle_db_exception_with_connection_error():
 
 
 @patch(
-    "litellm.proxy.proxy_server.general_settings",
+    "litellm_proxy.proxy_server.general_settings",
     {"allow_requests_on_db_unavailable": False},
 )
 def test_handle_db_exception_raises_error():
diff --git a/tests/litellm/proxy/db/test_prisma_client.py b/tests/litellm/proxy/db/test_prisma_client.py
index c7e99aa754..274574ab43 100644
--- a/tests/litellm/proxy/db/test_prisma_client.py
+++ b/tests/litellm/proxy/db/test_prisma_client.py
@@ -10,7 +10,7 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 
 
-from litellm.proxy.db.prisma_client import should_update_prisma_schema
+from litellm_proxy.db.prisma_client import should_update_prisma_schema
 
 
 def test_should_update_prisma_schema(monkeypatch):
diff --git a/tests/litellm/proxy/experimental/mcp_server/test_tool_registry.py b/tests/litellm/proxy/experimental/mcp_server/test_tool_registry.py
index d5ba9744c7..6c35604ed8 100644
--- a/tests/litellm/proxy/experimental/mcp_server/test_tool_registry.py
+++ b/tests/litellm/proxy/experimental/mcp_server/test_tool_registry.py
@@ -8,7 +8,7 @@ sys.path.insert(
     0, os.path.abspath("../../..")
 )  # Adds the parent directory to the system path
 
-from litellm.proxy._experimental.mcp_server.tool_registry import MCPToolRegistry
+from litellm_proxy._experimental.mcp_server.tool_registry import MCPToolRegistry
 
 
 # Test handler function
diff --git a/tests/litellm/proxy/health_endpoints/test_health_endpoints.py b/tests/litellm/proxy/health_endpoints/test_health_endpoints.py
index e2dd429357..8c43cb1a96 100644
--- a/tests/litellm/proxy/health_endpoints/test_health_endpoints.py
+++ b/tests/litellm/proxy/health_endpoints/test_health_endpoints.py
@@ -12,8 +12,8 @@ sys.path.insert(
 import pytest
 from prisma.errors import ClientNotConnectedError, HTTPClientClosedError, PrismaError
 
-from litellm.proxy._types import ProxyErrorTypes, ProxyException
-from litellm.proxy.health_endpoints._health_endpoints import (
+from litellm_proxy._types import ProxyErrorTypes, ProxyException
+from litellm_proxy.health_endpoints._health_endpoints import (
     _db_health_readiness_check,
     db_health_cache,
 )
@@ -46,8 +46,8 @@ async def test_db_health_readiness_check_with_prisma_error(prisma_error):
     }
 
     # Patch the imports and general_settings
-    with patch("litellm.proxy.proxy_server.prisma_client", mock_prisma_client), patch(
-        "litellm.proxy.proxy_server.general_settings",
+    with patch("litellm_proxy.proxy_server.prisma_client", mock_prisma_client), patch(
+        "litellm_proxy.proxy_server.general_settings",
         {"allow_requests_on_db_unavailable": True},
     ):
 
@@ -88,8 +88,8 @@ async def test_db_health_readiness_check_with_error_and_flag_off(prisma_error):
     }
 
     # Patch the imports and general_settings where the flag is False
-    with patch("litellm.proxy.proxy_server.prisma_client", mock_prisma_client), patch(
-        "litellm.proxy.proxy_server.general_settings",
+    with patch("litellm_proxy.proxy_server.prisma_client", mock_prisma_client), patch(
+        "litellm_proxy.proxy_server.general_settings",
         {"allow_requests_on_db_unavailable": False},
     ):
 
diff --git a/tests/litellm/proxy/hooks/test_managed_files.py b/tests/litellm/proxy/hooks/test_managed_files.py
index b76e6c76d5..04d4263e8c 100644
--- a/tests/litellm/proxy/hooks/test_managed_files.py
+++ b/tests/litellm/proxy/hooks/test_managed_files.py
@@ -12,8 +12,8 @@ sys.path.insert(
 from unittest.mock import MagicMock
 
 from litellm.caching import DualCache
-from litellm.proxy.hooks.managed_files import _PROXY_LiteLLMManagedFiles
 from litellm.types.utils import SpecialEnums
+from litellm_proxy.hooks.managed_files import _PROXY_LiteLLMManagedFiles
 
 
 def test_get_file_ids_from_messages():
diff --git a/tests/litellm/proxy/hooks/test_proxy_track_cost_callback.py b/tests/litellm/proxy/hooks/test_proxy_track_cost_callback.py
index cb6d90103f..8ab9d8fd9e 100644
--- a/tests/litellm/proxy/hooks/test_proxy_track_cost_callback.py
+++ b/tests/litellm/proxy/hooks/test_proxy_track_cost_callback.py
@@ -12,9 +12,9 @@ sys.path.insert(
 from datetime import datetime
 from unittest.mock import AsyncMock, MagicMock, patch
 
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.hooks.proxy_track_cost_callback import _ProxyDBLogger
 from litellm.types.utils import StandardLoggingPayload
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.hooks.proxy_track_cost_callback import _ProxyDBLogger
 
 
 @pytest.mark.asyncio
@@ -47,7 +47,7 @@ async def test_async_post_call_failure_hook():
 
     # Mock update_database function
     with patch(
-        "litellm.proxy.db.db_spend_update_writer.DBSpendUpdateWriter.update_database",
+        "litellm_proxy.db.db_spend_update_writer.DBSpendUpdateWriter.update_database",
         new_callable=AsyncMock,
     ) as mock_update_database:
         # Call the method
@@ -114,7 +114,7 @@ async def test_async_post_call_failure_hook_non_llm_route():
 
     # Mock update_database function
     with patch(
-        "litellm.proxy.db.db_spend_update_writer.DBSpendUpdateWriter.update_database",
+        "litellm_proxy.db.db_spend_update_writer.DBSpendUpdateWriter.update_database",
         new_callable=AsyncMock,
     ) as mock_update_database:
         # Call the method
diff --git a/tests/litellm/proxy/management_endpoints/scim/test_scim_transformations.py b/tests/litellm/proxy/management_endpoints/scim/test_scim_transformations.py
index 9432fab68f..042b161611 100644
--- a/tests/litellm/proxy/management_endpoints/scim/test_scim_transformations.py
+++ b/tests/litellm/proxy/management_endpoints/scim/test_scim_transformations.py
@@ -14,11 +14,11 @@ sys.path.insert(
     0, os.path.abspath("../../../")
 )  # Adds the parent directory to the system path
 
-from litellm.proxy._types import LiteLLM_TeamTable, LiteLLM_UserTable, Member
-from litellm.proxy.management_endpoints.scim.scim_transformations import (
+from litellm.types.proxy.management_endpoints.scim_v2 import SCIMGroup, SCIMUser
+from litellm_proxy._types import LiteLLM_TeamTable, LiteLLM_UserTable, Member
+from litellm_proxy.management_endpoints.scim.scim_transformations import (
     ScimTransformations,
 )
-from litellm.types.proxy.management_endpoints.scim_v2 import SCIMGroup, SCIMUser
 
 
 # Mock data
@@ -115,7 +115,7 @@ class TestScimTransformations:
 
         mock_find_unique.side_effect = [team1, team2]
 
-        with patch("litellm.proxy.proxy_server.prisma_client", mock_client):
+        with patch("litellm_proxy.proxy_server.prisma_client", mock_client):
             scim_user = await ScimTransformations.transform_litellm_user_to_scim_user(
                 mock_user
             )
@@ -145,7 +145,7 @@ class TestScimTransformations:
         )
         mock_find_unique.return_value = team1
 
-        with patch("litellm.proxy.proxy_server.prisma_client", mock_client):
+        with patch("litellm_proxy.proxy_server.prisma_client", mock_client):
             scim_user = await ScimTransformations.transform_litellm_user_to_scim_user(
                 mock_user_with_scim_metadata
             )
@@ -159,7 +159,7 @@ class TestScimTransformations:
     ):
         mock_client, _ = mock_prisma_client
 
-        with patch("litellm.proxy.proxy_server.prisma_client", mock_client):
+        with patch("litellm_proxy.proxy_server.prisma_client", mock_client):
             scim_group = await ScimTransformations.transform_litellm_team_to_scim_group(
                 mock_team
             )
diff --git a/tests/litellm/proxy/management_endpoints/test_common_daily_activity.py b/tests/litellm/proxy/management_endpoints/test_common_daily_activity.py
index ffaed2d88f..a623e5bc98 100644
--- a/tests/litellm/proxy/management_endpoints/test_common_daily_activity.py
+++ b/tests/litellm/proxy/management_endpoints/test_common_daily_activity.py
@@ -11,8 +11,8 @@ sys.path.insert(
     0, os.path.abspath("../../../..")
 )  # Adds the parent directory to the system path
 
-from litellm.proxy.management_endpoints.common_daily_activity import get_daily_activity
-from litellm.proxy.proxy_server import app
+from litellm_proxy.management_endpoints.common_daily_activity import get_daily_activity
+from litellm_proxy.proxy_server import app
 
 client = TestClient(app)
 
diff --git a/tests/litellm/proxy/management_endpoints/test_internal_user_endpoints.py b/tests/litellm/proxy/management_endpoints/test_internal_user_endpoints.py
index 360f21f171..107bc4a1fc 100644
--- a/tests/litellm/proxy/management_endpoints/test_internal_user_endpoints.py
+++ b/tests/litellm/proxy/management_endpoints/test_internal_user_endpoints.py
@@ -10,14 +10,14 @@ sys.path.insert(
     0, os.path.abspath("../../../..")
 )  # Adds the parent directory to the system path
 
-from litellm.proxy._types import LiteLLM_UserTableFiltered, UserAPIKeyAuth
-from litellm.proxy.management_endpoints.internal_user_endpoints import (
+from litellm_proxy._types import LiteLLM_UserTableFiltered, UserAPIKeyAuth
+from litellm_proxy.management_endpoints.internal_user_endpoints import (
     LiteLLM_UserTableWithKeyCount,
     get_user_key_counts,
     get_users,
     ui_view_users,
 )
-from litellm.proxy.proxy_server import app
+from litellm_proxy.proxy_server import app
 
 client = TestClient(app)
 
@@ -47,7 +47,7 @@ async def test_ui_view_users_with_null_email(mocker, caplog):
     mock_prisma_client.db.litellm_usertable.find_many = mock_find_many
 
     # Patch the prisma client import in the endpoint
-    mocker.patch("litellm.proxy.proxy_server.prisma_client", mock_prisma_client)
+    mocker.patch("litellm_proxy.proxy_server.prisma_client", mock_prisma_client)
 
     # Call ui_view_users function directly
     response = await ui_view_users(
@@ -67,7 +67,7 @@ def test_user_daily_activity_types():
     """
     Assert all fiels in SpendMetrics are reported in DailySpendMetadata as "total_"
     """
-    from litellm.proxy.management_endpoints.common_daily_activity import (
+    from litellm_proxy.management_endpoints.common_daily_activity import (
         DailySpendMetadata,
         SpendMetrics,
     )
@@ -121,14 +121,14 @@ async def test_get_users_includes_timestamps(mocker):
     mock_prisma_client.db.litellm_usertable.count = mock_count
 
     # Patch the prisma client import in the endpoint
-    mocker.patch("litellm.proxy.proxy_server.prisma_client", mock_prisma_client)
+    mocker.patch("litellm_proxy.proxy_server.prisma_client", mock_prisma_client)
 
     # Mock the helper function get_user_key_counts
     async def mock_get_user_key_counts(*args, **kwargs):
         return {"test-user-timestamps": 0}
 
     mocker.patch(
-        "litellm.proxy.management_endpoints.internal_user_endpoints.get_user_key_counts",
+        "litellm_proxy.management_endpoints.internal_user_endpoints.get_user_key_counts",
         mock_get_user_key_counts,
     )
 
@@ -159,7 +159,7 @@ def test_validate_sort_params():
     """
     Test that validate_sort_params returns None if sort_by is None
     """
-    from litellm.proxy.management_endpoints.internal_user_endpoints import (
+    from litellm_proxy.management_endpoints.internal_user_endpoints import (
         _validate_sort_params,
     )
 
diff --git a/tests/litellm/proxy/management_endpoints/test_key_management_endpoints.py b/tests/litellm/proxy/management_endpoints/test_key_management_endpoints.py
index c436e08901..0da566fa53 100644
--- a/tests/litellm/proxy/management_endpoints/test_key_management_endpoints.py
+++ b/tests/litellm/proxy/management_endpoints/test_key_management_endpoints.py
@@ -11,8 +11,8 @@ sys.path.insert(
 
 from unittest.mock import AsyncMock, MagicMock
 
-from litellm.proxy.management_endpoints.key_management_endpoints import _list_key_helper
-from litellm.proxy.proxy_server import app
+from litellm_proxy.management_endpoints.key_management_endpoints import _list_key_helper
+from litellm_proxy.proxy_server import app
 
 client = TestClient(app)
 
@@ -73,15 +73,15 @@ async def test_key_token_handling(monkeypatch):
         return_value=MagicMock(token="hashed_token_123", litellm_budget_table=None)
     )
 
-    from litellm.proxy._types import GenerateKeyRequest, LitellmUserRoles
-    from litellm.proxy.auth.user_api_key_auth import UserAPIKeyAuth
-    from litellm.proxy.management_endpoints.key_management_endpoints import (
+    from litellm_proxy._types import GenerateKeyRequest, LitellmUserRoles
+    from litellm_proxy.auth.user_api_key_auth import UserAPIKeyAuth
+    from litellm_proxy.management_endpoints.key_management_endpoints import (
         generate_key_fn,
     )
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
 
     # Use monkeypatch to set the prisma_client
-    monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", mock_prisma_client)
+    monkeypatch.setattr("litellm_proxy.proxy_server.prisma_client", mock_prisma_client)
 
     # Test key generation
     response = await generate_key_fn(
diff --git a/tests/litellm/proxy/management_endpoints/test_model_management_endpoints.py b/tests/litellm/proxy/management_endpoints/test_model_management_endpoints.py
index 3bed3fc761..70f81b7e70 100644
--- a/tests/litellm/proxy/management_endpoints/test_model_management_endpoints.py
+++ b/tests/litellm/proxy/management_endpoints/test_model_management_endpoints.py
@@ -9,17 +9,17 @@ from fastapi.testclient import TestClient
 sys.path.insert(
     0, os.path.abspath("../../../..")
 )  # Adds the parent directory to the system path
-from litellm.proxy._types import (
+from litellm.types.router import Deployment, LiteLLM_Params, updateDeployment
+from litellm_proxy._types import (
     LiteLLM_TeamTable,
     LitellmUserRoles,
     Member,
     UserAPIKeyAuth,
 )
-from litellm.proxy.management_endpoints.model_management_endpoints import (
+from litellm_proxy.management_endpoints.model_management_endpoints import (
     ModelManagementAuthChecks,
 )
-from litellm.proxy.utils import PrismaClient
-from litellm.types.router import Deployment, LiteLLM_Params, updateDeployment
+from litellm_proxy.utils import PrismaClient
 
 
 class MockPrismaClient:
diff --git a/tests/litellm/proxy/management_endpoints/test_tag_management_endpoints.py b/tests/litellm/proxy/management_endpoints/test_tag_management_endpoints.py
index 8c2da0cc8a..de2c15485a 100644
--- a/tests/litellm/proxy/management_endpoints/test_tag_management_endpoints.py
+++ b/tests/litellm/proxy/management_endpoints/test_tag_management_endpoints.py
@@ -13,8 +13,8 @@ sys.path.insert(
 from unittest.mock import patch
 
 import litellm
-from litellm.proxy.proxy_server import app
 from litellm.types.tag_management import TagDeleteRequest, TagInfoRequest, TagNewRequest
+from litellm_proxy.proxy_server import app
 
 client = TestClient(app)
 
@@ -25,14 +25,14 @@ async def test_create_and_get_tag():
     Test creation of a new tag and retrieving its information
     """
     # Mock the prisma client and _get_tags_config and _save_tags_config
-    with patch("litellm.proxy.proxy_server.prisma_client") as mock_prisma, patch(
-        "litellm.proxy.management_endpoints.tag_management_endpoints._get_tags_config"
+    with patch("litellm_proxy.proxy_server.prisma_client") as mock_prisma, patch(
+        "litellm_proxy.management_endpoints.tag_management_endpoints._get_tags_config"
     ) as mock_get_tags, patch(
-        "litellm.proxy.management_endpoints.tag_management_endpoints._save_tags_config"
+        "litellm_proxy.management_endpoints.tag_management_endpoints._save_tags_config"
     ) as mock_save_tags, patch(
-        "litellm.proxy.management_endpoints.tag_management_endpoints._add_tag_to_deployment"
+        "litellm_proxy.management_endpoints.tag_management_endpoints._add_tag_to_deployment"
     ) as mock_add_tag, patch(
-        "litellm.proxy.management_endpoints.tag_management_endpoints._get_model_names"
+        "litellm_proxy.management_endpoints.tag_management_endpoints._get_model_names"
     ) as mock_get_models:
         # Setup mocks
         mock_get_tags.return_value = {}
@@ -81,12 +81,12 @@ async def test_update_tag():
     Test updating an existing tag
     """
     # Mock the prisma client and _get_tags_config and _save_tags_config
-    with patch("litellm.proxy.proxy_server.prisma_client") as mock_prisma, patch(
-        "litellm.proxy.management_endpoints.tag_management_endpoints._get_tags_config"
+    with patch("litellm_proxy.proxy_server.prisma_client") as mock_prisma, patch(
+        "litellm_proxy.management_endpoints.tag_management_endpoints._get_tags_config"
     ) as mock_get_tags, patch(
-        "litellm.proxy.management_endpoints.tag_management_endpoints._save_tags_config"
+        "litellm_proxy.management_endpoints.tag_management_endpoints._save_tags_config"
     ) as mock_save_tags, patch(
-        "litellm.proxy.management_endpoints.tag_management_endpoints._get_model_names"
+        "litellm_proxy.management_endpoints.tag_management_endpoints._get_model_names"
     ) as mock_get_models:
         # Setup mocks for existing tag
         mock_get_tags.return_value = {
@@ -127,10 +127,10 @@ async def test_delete_tag():
     Test deleting a tag
     """
     # Mock the prisma client and _get_tags_config and _save_tags_config
-    with patch("litellm.proxy.proxy_server.prisma_client") as mock_prisma, patch(
-        "litellm.proxy.management_endpoints.tag_management_endpoints._get_tags_config"
+    with patch("litellm_proxy.proxy_server.prisma_client") as mock_prisma, patch(
+        "litellm_proxy.management_endpoints.tag_management_endpoints._get_tags_config"
     ) as mock_get_tags, patch(
-        "litellm.proxy.management_endpoints.tag_management_endpoints._save_tags_config"
+        "litellm_proxy.management_endpoints.tag_management_endpoints._save_tags_config"
     ) as mock_save_tags:
         # Setup mocks for existing tag
         mock_get_tags.return_value = {
diff --git a/tests/litellm/proxy/management_endpoints/test_team_endpoints.py b/tests/litellm/proxy/management_endpoints/test_team_endpoints.py
index ec47196ae0..33bf54ac34 100644
--- a/tests/litellm/proxy/management_endpoints/test_team_endpoints.py
+++ b/tests/litellm/proxy/management_endpoints/test_team_endpoints.py
@@ -13,20 +13,20 @@ from fastapi.testclient import TestClient
 sys.path.insert(
     0, os.path.abspath("../../../")
 )  # Adds the parent directory to the system path
-from litellm.proxy._types import UserAPIKeyAuth  # Import UserAPIKeyAuth
-from litellm.proxy._types import LiteLLM_TeamTable, LitellmUserRoles
-from litellm.proxy.management_endpoints.team_endpoints import (
+from litellm_proxy._types import UserAPIKeyAuth  # Import UserAPIKeyAuth
+from litellm_proxy._types import LiteLLM_TeamTable, LitellmUserRoles
+from litellm_proxy.management_endpoints.team_endpoints import (
     user_api_key_auth,  # Assuming this dependency is needed
 )
-from litellm.proxy.management_endpoints.team_endpoints import (
+from litellm_proxy.management_endpoints.team_endpoints import (
     GetTeamMemberPermissionsResponse,
     UpdateTeamMemberPermissionsRequest,
     router,
 )
-from litellm.proxy.management_helpers.team_member_permission_checks import (
+from litellm_proxy.management_helpers.team_member_permission_checks import (
     TeamMemberPermissionChecks,
 )
-from litellm.proxy.proxy_server import app
+from litellm_proxy.proxy_server import app
 
 # Setup TestClient
 client = TestClient(app)
@@ -39,7 +39,7 @@ mock_prisma_client = MagicMock()
 @pytest.fixture(autouse=True)
 def mock_db_client():
     with patch(
-        "litellm.proxy.proxy_server.prisma_client", mock_prisma_client
+        "litellm_proxy.proxy_server.prisma_client", mock_prisma_client
     ):  # Mock in both places if necessary
         yield mock_prisma_client
     mock_prisma_client.reset_mock()
@@ -77,7 +77,7 @@ async def test_get_team_permissions_list_success(mock_db_client, mock_admin_auth
 
     # Mock the get_team_object function used in the endpoint
     with patch(
-        "litellm.proxy.management_endpoints.team_endpoints.get_team_object",
+        "litellm_proxy.management_endpoints.team_endpoints.get_team_object",
         new_callable=AsyncMock,
         return_value=mock_team_row,
     ):
@@ -150,7 +150,7 @@ async def test_update_team_permissions_success(mock_db_client, mock_admin_auth):
 
     # Mock the get_team_object function used in the endpoint
     with patch(
-        "litellm.proxy.management_endpoints.team_endpoints.get_team_object",
+        "litellm_proxy.management_endpoints.team_endpoints.get_team_object",
         new_callable=AsyncMock,
         return_value=mock_existing_team_row,
     ):
diff --git a/tests/litellm/proxy/management_endpoints/test_ui_sso.py b/tests/litellm/proxy/management_endpoints/test_ui_sso.py
index d266cd0f61..af1ac4fa08 100644
--- a/tests/litellm/proxy/management_endpoints/test_ui_sso.py
+++ b/tests/litellm/proxy/management_endpoints/test_ui_sso.py
@@ -15,20 +15,20 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 
 import litellm
-from litellm.proxy._types import NewTeamRequest
-from litellm.proxy.auth.handle_jwt import JWTHandler
-from litellm.proxy.management_endpoints.types import CustomOpenID
-from litellm.proxy.management_endpoints.ui_sso import (
-    DefaultTeamSSOParams,
-    GoogleSSOHandler,
-    MicrosoftSSOHandler,
-    SSOAuthenticationHandler,
-)
 from litellm.types.proxy.management_endpoints.ui_sso import (
     MicrosoftGraphAPIUserGroupDirectoryObject,
     MicrosoftGraphAPIUserGroupResponse,
     MicrosoftServicePrincipalTeam,
 )
+from litellm_proxy._types import NewTeamRequest
+from litellm_proxy.auth.handle_jwt import JWTHandler
+from litellm_proxy.management_endpoints.types import CustomOpenID
+from litellm_proxy.management_endpoints.ui_sso import (
+    DefaultTeamSSOParams,
+    GoogleSSOHandler,
+    MicrosoftSSOHandler,
+    SSOAuthenticationHandler,
+)
 
 
 def test_microsoft_sso_handler_openid_from_response_user_principal_name():
@@ -254,7 +254,7 @@ async def test_get_user_groups_from_graph_api():
         return mock
 
     with patch(
-        "litellm.proxy.management_endpoints.ui_sso.get_async_httpx_client"
+        "litellm_proxy.management_endpoints.ui_sso.get_async_httpx_client"
     ) as mock_client:
         mock_client.return_value = MagicMock()
         mock_client.return_value.get = mock_get
@@ -306,7 +306,7 @@ async def test_get_user_groups_pagination():
         return mock
 
     with patch(
-        "litellm.proxy.management_endpoints.ui_sso.get_async_httpx_client"
+        "litellm_proxy.management_endpoints.ui_sso.get_async_httpx_client"
     ) as mock_client:
         mock_client.return_value = MagicMock()
         mock_client.return_value.get = mock_get
@@ -338,7 +338,7 @@ async def test_get_user_groups_empty_response():
         return mock
 
     with patch(
-        "litellm.proxy.management_endpoints.ui_sso.get_async_httpx_client"
+        "litellm_proxy.management_endpoints.ui_sso.get_async_httpx_client"
     ) as mock_client:
         mock_client.return_value = MagicMock()
         mock_client.return_value.get = mock_get
@@ -360,7 +360,7 @@ async def test_get_user_groups_error_handling():
         raise Exception("API Error")
 
     with patch(
-        "litellm.proxy.management_endpoints.ui_sso.get_async_httpx_client"
+        "litellm_proxy.management_endpoints.ui_sso.get_async_httpx_client"
     ) as mock_client:
         mock_client.return_value = MagicMock()
         mock_client.return_value.get = mock_get
@@ -447,7 +447,7 @@ async def test_default_team_params(team_params):
     mock_prisma.get_data = AsyncMock(return_value=None)
     mock_prisma.jsonify_team_object = MagicMock(side_effect=mock_jsonify_team_object)
 
-    with patch("litellm.proxy.proxy_server.prisma_client", mock_prisma):
+    with patch("litellm_proxy.proxy_server.prisma_client", mock_prisma):
         # Act
         team_id = str(uuid.uuid4())
         await MicrosoftSSOHandler.create_litellm_teams_from_service_principal_team_ids(
@@ -495,7 +495,7 @@ async def test_create_team_without_default_params():
     mock_prisma.get_data = AsyncMock(return_value=None)
     mock_prisma.jsonify_team_object = MagicMock(side_effect=mock_jsonify_team_object)
 
-    with patch("litellm.proxy.proxy_server.prisma_client", mock_prisma):
+    with patch("litellm_proxy.proxy_server.prisma_client", mock_prisma):
         # Act
         team_id = str(uuid.uuid4())
         await MicrosoftSSOHandler.create_litellm_teams_from_service_principal_team_ids(
diff --git a/tests/litellm/proxy/middleware/test_prometheus_auth_middleware.py b/tests/litellm/proxy/middleware/test_prometheus_auth_middleware.py
index b72ff75002..f493a947b3 100644
--- a/tests/litellm/proxy/middleware/test_prometheus_auth_middleware.py
+++ b/tests/litellm/proxy/middleware/test_prometheus_auth_middleware.py
@@ -16,8 +16,8 @@ from fastapi.responses import JSONResponse
 from fastapi.testclient import TestClient
 
 import litellm
-from litellm.proxy._types import SpecialHeaders
-from litellm.proxy.middleware.prometheus_auth_middleware import PrometheusAuthMiddleware
+from litellm_proxy._types import SpecialHeaders
+from litellm_proxy.middleware.prometheus_auth_middleware import PrometheusAuthMiddleware
 
 
 # Fake auth functions to simulate valid and invalid auth behavior.
@@ -32,7 +32,7 @@ async def fake_invalid_auth(request, api_key):
     raise Exception("Invalid API key")
 
 
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
 
 
 @pytest.fixture
@@ -70,7 +70,7 @@ def test_valid_auth_metrics(app_with_middleware, monkeypatch):
     litellm.require_auth_for_metrics_endpoint = True
     # Patch the auth function to simulate a valid authentication.
     monkeypatch.setattr(
-        "litellm.proxy.middleware.prometheus_auth_middleware.user_api_key_auth",
+        "litellm_proxy.middleware.prometheus_auth_middleware.user_api_key_auth",
         fake_valid_auth,
     )
 
@@ -95,7 +95,7 @@ def test_invalid_auth_metrics(app_with_middleware, monkeypatch):
     litellm.require_auth_for_metrics_endpoint = True
     # Patch the auth function to simulate a failed authentication.
     monkeypatch.setattr(
-        "litellm.proxy.middleware.prometheus_auth_middleware.user_api_key_auth",
+        "litellm_proxy.middleware.prometheus_auth_middleware.user_api_key_auth",
         fake_invalid_auth,
     )
 
@@ -119,7 +119,7 @@ def test_no_auth_metrics_when_disabled(app_with_middleware, monkeypatch):
         raise Exception("Auth should not be called")
 
     monkeypatch.setattr(
-        "litellm.proxy.middleware.prometheus_auth_middleware.user_api_key_auth",
+        "litellm_proxy.middleware.prometheus_auth_middleware.user_api_key_auth",
         should_not_be_called,
     )
 
diff --git a/tests/litellm/proxy/openai_files_endpoint/test_files_endpoint.py b/tests/litellm/proxy/openai_files_endpoint/test_files_endpoint.py
index 40f914d726..03be6304a8 100644
--- a/tests/litellm/proxy/openai_files_endpoint/test_files_endpoint.py
+++ b/tests/litellm/proxy/openai_files_endpoint/test_files_endpoint.py
@@ -14,15 +14,15 @@ sys.path.insert(
 
 import litellm
 from litellm import Router
-from litellm.proxy._types import LiteLLM_UserTableFiltered, UserAPIKeyAuth
-from litellm.proxy.hooks import get_proxy_hook
-from litellm.proxy.management_endpoints.internal_user_endpoints import ui_view_users
-from litellm.proxy.proxy_server import app
+from litellm_proxy._types import LiteLLM_UserTableFiltered, UserAPIKeyAuth
+from litellm_proxy.hooks import get_proxy_hook
+from litellm_proxy.management_endpoints.internal_user_endpoints import ui_view_users
+from litellm_proxy.proxy_server import app
 
 client = TestClient(app)
 from litellm.caching.caching import DualCache
-from litellm.proxy.proxy_server import hash_token
-from litellm.proxy.utils import ProxyLogging
+from litellm_proxy.proxy_server import hash_token
+from litellm_proxy.utils import ProxyLogging
 
 
 @pytest.fixture
@@ -71,7 +71,7 @@ def setup_proxy_logging_object(monkeypatch, llm_router: Router) -> ProxyLogging:
     )
     proxy_logging_object._add_proxy_hooks(llm_router)
     monkeypatch.setattr(
-        "litellm.proxy.proxy_server.proxy_logging_obj", proxy_logging_object
+        "litellm_proxy.proxy_server.proxy_logging_obj", proxy_logging_object
     )
     return proxy_logging_object
 
@@ -104,7 +104,7 @@ def test_mock_create_audio_file(mocker: MockerFixture, monkeypatch, llm_router:
     Asserts 'create_file' is called with the correct arguments
     """
     from litellm import Router
-    from litellm.proxy.utils import ProxyLogging
+    from litellm_proxy.utils import ProxyLogging
 
     mock_create_file = mocker.patch("litellm.files.main.create_file")
 
@@ -114,9 +114,9 @@ def test_mock_create_audio_file(mocker: MockerFixture, monkeypatch, llm_router:
 
     proxy_logging_obj._add_proxy_hooks(llm_router)
 
-    monkeypatch.setattr("litellm.proxy.proxy_server.llm_router", llm_router)
+    monkeypatch.setattr("litellm_proxy.proxy_server.llm_router", llm_router)
     monkeypatch.setattr(
-        "litellm.proxy.proxy_server.proxy_logging_obj", proxy_logging_obj
+        "litellm_proxy.proxy_server.proxy_logging_obj", proxy_logging_obj
     )
 
     # Create a simple test file content
@@ -183,7 +183,7 @@ def test_create_file_and_call_chat_completion_e2e(
     try:
         from litellm.types.llms.openai import OpenAIFileObject
 
-        monkeypatch.setattr("litellm.proxy.proxy_server.llm_router", llm_router)
+        monkeypatch.setattr("litellm_proxy.proxy_server.llm_router", llm_router)
         proxy_logging_object = setup_proxy_logging_object(monkeypatch, llm_router)
 
         # Create a simple test file content
@@ -326,12 +326,12 @@ def test_create_file_for_each_model(
     import asyncio
 
     from litellm import CreateFileRequest
-    from litellm.proxy._types import LitellmUserRoles, UserAPIKeyAuth
-    from litellm.proxy.openai_files_endpoints.files_endpoints import (
+    from litellm.types.llms.openai import OpenAIFileObject, OpenAIFilesPurpose
+    from litellm_proxy._types import LitellmUserRoles, UserAPIKeyAuth
+    from litellm_proxy.openai_files_endpoints.files_endpoints import (
         create_file_for_each_model,
     )
-    from litellm.proxy.utils import ProxyLogging
-    from litellm.types.llms.openai import OpenAIFileObject, OpenAIFilesPurpose
+    from litellm_proxy.utils import ProxyLogging
 
     # Setup proxy logging
     proxy_logging_obj = ProxyLogging(
@@ -339,7 +339,7 @@ def test_create_file_for_each_model(
     )
     proxy_logging_obj._add_proxy_hooks(llm_router)
     monkeypatch.setattr(
-        "litellm.proxy.proxy_server.proxy_logging_obj", proxy_logging_obj
+        "litellm_proxy.proxy_server.proxy_logging_obj", proxy_logging_obj
     )
 
     # Mock user API key dict
diff --git a/tests/litellm/proxy/pass_through_endpoints/test_llm_pass_through_endpoints.py b/tests/litellm/proxy/pass_through_endpoints/test_llm_pass_through_endpoints.py
index 501b4364da..a5d5299faf 100644
--- a/tests/litellm/proxy/pass_through_endpoints/test_llm_pass_through_endpoints.py
+++ b/tests/litellm/proxy/pass_through_endpoints/test_llm_pass_through_endpoints.py
@@ -15,14 +15,14 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 
 import litellm
-from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+from litellm.types.passthrough_endpoints.vertex_ai import VertexPassThroughCredentials
+from litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints import (
     BaseOpenAIPassThroughHandler,
     RouteChecks,
     create_pass_through_route,
     vertex_discovery_proxy_route,
     vertex_proxy_route,
 )
-from litellm.types.passthrough_endpoints.vertex_ai import VertexPassThroughCredentials
 
 
 class TestBaseOpenAIPassThroughHandler:
@@ -132,7 +132,7 @@ class TestBaseOpenAIPassThroughHandler:
             assert result["test-header"] == "value"
 
     @patch(
-        "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
+        "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
     )
     async def test_base_openai_pass_through_handler(self, mock_create_pass_through):
         print("\nTesting _base_openai_pass_through_handler method...")
@@ -196,7 +196,7 @@ class TestVertexAIPassThroughHandler:
         """
         Test that when passthrough credentials are set, they are correctly used in the request
         """
-        from litellm.proxy.pass_through_endpoints.passthrough_endpoint_router import (
+        from litellm_proxy.pass_through_endpoints.passthrough_endpoint_router import (
             PassthroughEndpointRouter,
         )
 
@@ -213,7 +213,7 @@ class TestVertexAIPassThroughHandler:
         )
 
         monkeypatch.setattr(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router",
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router",
             pass_through_router,
         )
 
@@ -241,11 +241,11 @@ class TestVertexAIPassThroughHandler:
         test_token = vertex_credentials
 
         with mock.patch(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._ensure_access_token_async"
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._ensure_access_token_async"
         ) as mock_ensure_token, mock.patch(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._get_token_and_url"
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._get_token_and_url"
         ) as mock_get_token, mock.patch(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
         ) as mock_create_route:
             mock_ensure_token.return_value = ("test-auth-header", test_project)
             mock_get_token.return_value = (test_token, "")
@@ -281,7 +281,7 @@ class TestVertexAIPassThroughHandler:
         """
         Test that when no passthrough credentials are set, default credentials are used in the request
         """
-        from litellm.proxy.pass_through_endpoints.passthrough_endpoint_router import (
+        from litellm_proxy.pass_through_endpoints.passthrough_endpoint_router import (
             PassthroughEndpointRouter,
         )
 
@@ -298,7 +298,7 @@ class TestVertexAIPassThroughHandler:
         )
 
         monkeypatch.setattr(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router",
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router",
             pass_through_router,
         )
 
@@ -316,11 +316,11 @@ class TestVertexAIPassThroughHandler:
         mock_response = Response()
 
         with mock.patch(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._ensure_access_token_async"
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._ensure_access_token_async"
         ) as mock_ensure_token, mock.patch(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._get_token_and_url"
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._get_token_and_url"
         ) as mock_get_token, mock.patch(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
         ) as mock_create_route:
             mock_ensure_token.return_value = ("test-auth-header", default_project)
             mock_get_token.return_value = (default_credentials, "")
@@ -350,7 +350,7 @@ class TestVertexAIPassThroughHandler:
         """
         Test that when passthrough credentials are set, they are correctly used in the request
         """
-        from litellm.proxy.pass_through_endpoints.passthrough_endpoint_router import (
+        from litellm_proxy.pass_through_endpoints.passthrough_endpoint_router import (
             PassthroughEndpointRouter,
         )
 
@@ -371,7 +371,7 @@ class TestVertexAIPassThroughHandler:
         )
 
         monkeypatch.setattr(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router",
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router",
             pass_through_router,
         )
 
@@ -393,11 +393,11 @@ class TestVertexAIPassThroughHandler:
         mock_response = Response()
 
         with mock.patch(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._ensure_access_token_async"
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._ensure_access_token_async"
         ) as mock_ensure_token, mock.patch(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._get_token_and_url"
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._get_token_and_url"
         ) as mock_get_token, mock.patch(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
         ) as mock_create_route:
             mock_ensure_token.return_value = ("test-auth-header", test_project)
             mock_get_token.return_value = (test_token, "")
@@ -434,12 +434,12 @@ class TestVertexAIPassThroughHandler:
         mock_response = Mock()
 
         with patch(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.user_api_key_auth"
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.user_api_key_auth"
         ) as mock_auth:
             mock_auth.return_value = {"api_key": "test-key-123"}
 
             with patch(
-                "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
+                "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
             ) as mock_pass_through:
                 mock_pass_through.return_value = AsyncMock(
                     return_value={"status": "success"}
@@ -468,7 +468,7 @@ class TestVertexAIDiscoveryPassThroughHandler:
         """
         Test that when passthrough credentials are set, they are correctly used in the request
         """
-        from litellm.proxy.pass_through_endpoints.passthrough_endpoint_router import (
+        from litellm_proxy.pass_through_endpoints.passthrough_endpoint_router import (
             PassthroughEndpointRouter,
         )
 
@@ -485,7 +485,7 @@ class TestVertexAIDiscoveryPassThroughHandler:
         )
 
         monkeypatch.setattr(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router",
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router",
             pass_through_router,
         )
 
@@ -513,11 +513,11 @@ class TestVertexAIDiscoveryPassThroughHandler:
         test_token = vertex_credentials
 
         with mock.patch(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._ensure_access_token_async"
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._ensure_access_token_async"
         ) as mock_ensure_token, mock.patch(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._get_token_and_url"
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.vertex_llm_base._get_token_and_url"
         ) as mock_get_token, mock.patch(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
         ) as mock_create_route:
             mock_ensure_token.return_value = ("test-auth-header", test_project)
             mock_get_token.return_value = (test_token, "")
@@ -551,12 +551,12 @@ class TestVertexAIDiscoveryPassThroughHandler:
         mock_response = Mock()
 
         with patch(
-            "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.user_api_key_auth"
+            "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.user_api_key_auth"
         ) as mock_auth:
             mock_auth.return_value = {"api_key": "test-key-123"}
 
             with patch(
-                "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
+                "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route"
             ) as mock_pass_through:
                 mock_pass_through.return_value = AsyncMock(
                     return_value={"status": "success"}
diff --git a/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py b/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py
index 43d4dd9cd8..be516307ac 100644
--- a/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py
+++ b/tests/litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py
@@ -15,7 +15,7 @@ sys.path.insert(
     0, os.path.abspath("../../..")
 )  # Adds the parent directory to the system path
 
-from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
+from litellm_proxy.pass_through_endpoints.pass_through_endpoints import (
     HttpPassThroughEndpointHelpers,
 )
 
diff --git a/tests/litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py b/tests/litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py
index bd8c5f5a99..296872f0e4 100644
--- a/tests/litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py
+++ b/tests/litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py
@@ -16,7 +16,7 @@ sys.path.insert(
 
 from unittest.mock import Mock
 
-from litellm.proxy.pass_through_endpoints.common_utils import get_litellm_virtual_key
+from litellm_proxy.pass_through_endpoints.common_utils import get_litellm_virtual_key
 
 
 @pytest.mark.asyncio
diff --git a/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py b/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py
index 181ca59aac..f8b8c53161 100644
--- a/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py
+++ b/tests/litellm/proxy/spend_tracking/test_spend_management_endpoints.py
@@ -15,10 +15,11 @@ sys.path.insert(
 from unittest.mock import MagicMock, patch
 
 import litellm
-from litellm.proxy._types import SpendLogsPayload
-from litellm.proxy.hooks.proxy_track_cost_callback import _ProxyDBLogger
-from litellm.proxy.proxy_server import app, prisma_client
+import litellm_proxy
 from litellm.router import Router
+from litellm_proxy._types import SpendLogsPayload
+from litellm_proxy.hooks.proxy_track_cost_callback import _ProxyDBLogger
+from litellm_proxy.proxy_server import app, prisma_client
 
 ignored_keys = [
     "request_id",
@@ -97,7 +98,7 @@ async def test_ui_view_spend_logs_with_user_id(client, monkeypatch):
 
     # Apply the monkeypatch to replace the prisma_client
     mock_prisma_client = MockPrismaClient()
-    monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", mock_prisma_client)
+    monkeypatch.setattr("litellm_proxy.proxy_server.prisma_client", mock_prisma_client)
 
     # Set up test dates
     start_date = (
@@ -188,7 +189,7 @@ async def test_ui_view_spend_logs_with_team_id(client, monkeypatch):
 
     # Apply the monkeypatch
     mock_prisma_client = MockPrismaClient()
-    monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", mock_prisma_client)
+    monkeypatch.setattr("litellm_proxy.proxy_server.prisma_client", mock_prisma_client)
 
     # Set up test dates
     start_date = (
@@ -252,7 +253,7 @@ async def test_ui_view_spend_logs_pagination(client, monkeypatch):
 
     # Apply the monkeypatch
     mock_prisma_client = MockPrismaClient()
-    monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", mock_prisma_client)
+    monkeypatch.setattr("litellm_proxy.proxy_server.prisma_client", mock_prisma_client)
 
     # Set up test dates
     start_date = (
@@ -388,7 +389,7 @@ async def test_ui_view_spend_logs_date_range_filter(client, monkeypatch):
 
     # Apply the monkeypatch
     mock_prisma_client = MockPrismaClient()
-    monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", mock_prisma_client)
+    monkeypatch.setattr("litellm_proxy.proxy_server.prisma_client", mock_prisma_client)
 
     # Test with a date range that should only include the second log
     start_date = (today - datetime.timedelta(days=5)).strftime("%Y-%m-%d %H:%M:%S")
@@ -431,9 +432,9 @@ class TestSpendLogsPayload:
         # litellm._turn_on_debug()
 
         with patch.object(
-            litellm.proxy.db.db_spend_update_writer.DBSpendUpdateWriter,
+            litellm_proxy.db.db_spend_update_writer.DBSpendUpdateWriter,
             "_insert_spend_log_to_db",
-        ) as mock_client, patch.object(litellm.proxy.proxy_server, "prisma_client"):
+        ) as mock_client, patch.object(litellm_proxy.proxy_server, "prisma_client"):
             response = await litellm.acompletion(
                 model="gpt-4o",
                 messages=[{"role": "user", "content": "Hello, world!"}],
@@ -517,10 +518,10 @@ class TestSpendLogsPayload:
         client = AsyncHTTPHandler()
 
         with patch.object(
-            litellm.proxy.db.db_spend_update_writer.DBSpendUpdateWriter,
+            litellm_proxy.db.db_spend_update_writer.DBSpendUpdateWriter,
             "_insert_spend_log_to_db",
         ) as mock_client, patch.object(
-            litellm.proxy.proxy_server, "prisma_client"
+            litellm_proxy.proxy_server, "prisma_client"
         ), patch.object(
             client, "post", side_effect=self.mock_anthropic_response
         ):
@@ -605,10 +606,10 @@ class TestSpendLogsPayload:
         )
 
         with patch.object(
-            litellm.proxy.db.db_spend_update_writer.DBSpendUpdateWriter,
+            litellm_proxy.db.db_spend_update_writer.DBSpendUpdateWriter,
             "_insert_spend_log_to_db",
         ) as mock_client, patch.object(
-            litellm.proxy.proxy_server, "prisma_client"
+            litellm_proxy.proxy_server, "prisma_client"
         ), patch.object(
             client, "post", side_effect=self.mock_anthropic_response
         ):
@@ -748,7 +749,7 @@ async def test_global_spend_keys_endpoint_limit_validation(client, monkeypatch):
     mock_db.query_raw = mock_query_raw
     mock_prisma_client.db = mock_db
     # Apply the mock to the prisma_client module
-    monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", mock_prisma_client)
+    monkeypatch.setattr("litellm_proxy.proxy_server.prisma_client", mock_prisma_client)
 
     # Call the endpoint without specifying a limit
     no_limit_response = client.get("/global/spend/keys")
diff --git a/tests/litellm/proxy/spend_tracking/test_spend_tracking_utils.py b/tests/litellm/proxy/spend_tracking/test_spend_tracking_utils.py
index 2bef2512f3..441d6f6b20 100644
--- a/tests/litellm/proxy/spend_tracking/test_spend_tracking_utils.py
+++ b/tests/litellm/proxy/spend_tracking/test_spend_tracking_utils.py
@@ -16,7 +16,7 @@ sys.path.insert(
 from unittest.mock import MagicMock, patch
 
 import litellm
-from litellm.proxy.spend_tracking.spend_tracking_utils import (
+from litellm_proxy.spend_tracking.spend_tracking_utils import (
     _sanitize_request_body_for_spend_logs_payload,
 )
 
diff --git a/tests/litellm/proxy/test_caching_routes.py b/tests/litellm/proxy/test_caching_routes.py
index 6f22f66501..fd0e493ac7 100644
--- a/tests/litellm/proxy/test_caching_routes.py
+++ b/tests/litellm/proxy/test_caching_routes.py
@@ -12,7 +12,7 @@ sys.path.insert(
 
 import litellm
 from litellm.caching import RedisCache
-from litellm.proxy.proxy_server import app
+from litellm_proxy.proxy_server import app
 
 client = TestClient(app)
 
diff --git a/tests/litellm/proxy/test_common_request_processing.py b/tests/litellm/proxy/test_common_request_processing.py
index 8e795f8b3b..33b28a8cc1 100644
--- a/tests/litellm/proxy/test_common_request_processing.py
+++ b/tests/litellm/proxy/test_common_request_processing.py
@@ -1,16 +1,25 @@
 import copy
+import os
+import sys
 import uuid
-import pytest
-import litellm
 from unittest.mock import AsyncMock, MagicMock
+
+import pytest
 from fastapi import Request
 
+sys.path.insert(
+    0, os.path.abspath("../../../..")
+)  # Adds the parent directory to the system path
+
+
+import litellm
+import litellm_proxy
 from litellm.integrations.opentelemetry import UserAPIKeyAuth
-from litellm.proxy.common_request_processing import (
+from litellm_proxy.common_request_processing import (
     ProxyBaseLLMRequestProcessing,
     ProxyConfig,
 )
-from litellm.proxy.utils import ProxyLogging
+from litellm_proxy.utils import ProxyLogging
 
 
 class TestProxyBaseLLMRequestProcessing:
@@ -38,7 +47,7 @@ class TestProxyBaseLLMRequestProcessing:
             side_effect=mock_common_processing_pre_call_logic
         )
         monkeypatch.setattr(
-            litellm.proxy.common_request_processing,
+            litellm_proxy.common_request_processing,
             "add_litellm_data_to_request",
             mock_add_litellm_data_to_request,
         )
diff --git a/tests/litellm/proxy/test_litellm_pre_call_utils.py b/tests/litellm/proxy/test_litellm_pre_call_utils.py
index 94f2c512ea..ba84ed8b25 100644
--- a/tests/litellm/proxy/test_litellm_pre_call_utils.py
+++ b/tests/litellm/proxy/test_litellm_pre_call_utils.py
@@ -5,8 +5,8 @@ from unittest.mock import MagicMock, patch
 
 import pytest
 
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.litellm_pre_call_utils import (
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.litellm_pre_call_utils import (
     _get_enforced_params,
     check_if_token_is_service_account,
 )
@@ -99,7 +99,7 @@ def test_get_enforced_params_for_service_account_settings():
 def test_get_enforced_params(
     general_settings, user_api_key_dict, expected_enforced_params
 ):
-    from litellm.proxy.litellm_pre_call_utils import _get_enforced_params
+    from litellm_proxy.litellm_pre_call_utils import _get_enforced_params
 
     enforced_params = _get_enforced_params(general_settings, user_api_key_dict)
     assert enforced_params == expected_enforced_params
diff --git a/tests/litellm/proxy/test_proxy_cli.py b/tests/litellm/proxy/test_proxy_cli.py
index 6e1d70553f..eb724d349d 100644
--- a/tests/litellm/proxy/test_proxy_cli.py
+++ b/tests/litellm/proxy/test_proxy_cli.py
@@ -17,7 +17,7 @@ sys.path.insert(
 )  # Adds the parent directory to the system-path
 
 import litellm
-from litellm.proxy.proxy_cli import ProxyInitializationHelpers
+from litellm_proxy.proxy_cli import ProxyInitializationHelpers
 
 
 class TestProxyInitializationHelpers:
@@ -93,7 +93,7 @@ class TestProxyInitializationHelpers:
         args = ProxyInitializationHelpers._get_default_unvicorn_init_args(
             "localhost", 8000
         )
-        assert args["app"] == "litellm.proxy.proxy_server:app"
+        assert args["app"] == "litellm_proxy.proxy_server:app"
         assert args["host"] == "localhost"
         assert args["port"] == 8000
 
diff --git a/tests/litellm/proxy/test_proxy_server.py b/tests/litellm/proxy/test_proxy_server.py
index 919a00d670..41861b6e68 100644
--- a/tests/litellm/proxy/test_proxy_server.py
+++ b/tests/litellm/proxy/test_proxy_server.py
@@ -27,8 +27,8 @@ async def test_initialize_scheduled_jobs_credentials(monkeypatch):
     """
     monkeypatch.delenv("DISABLE_PRISMA_SCHEMA_UPDATE", raising=False)
     monkeypatch.delenv("STORE_MODEL_IN_DB", raising=False)
-    from litellm.proxy.proxy_server import ProxyStartupEvent
-    from litellm.proxy.utils import ProxyLogging
+    from litellm_proxy.proxy_server import ProxyStartupEvent
+    from litellm_proxy.utils import ProxyLogging
 
     # Mock dependencies
     mock_prisma_client = MagicMock()
@@ -36,8 +36,8 @@ async def test_initialize_scheduled_jobs_credentials(monkeypatch):
     mock_proxy_logging.slack_alerting_instance = MagicMock()
     mock_proxy_config = AsyncMock()
 
-    with patch("litellm.proxy.proxy_server.proxy_config", mock_proxy_config), patch(
-        "litellm.proxy.proxy_server.store_model_in_db", False
+    with patch("litellm_proxy.proxy_server.proxy_config", mock_proxy_config), patch(
+        "litellm_proxy.proxy_server.store_model_in_db", False
     ):  # set store_model_in_db to False
         # Test when store_model_in_db is False
         await ProxyStartupEvent.initialize_scheduled_background_jobs(
@@ -53,9 +53,9 @@ async def test_initialize_scheduled_jobs_credentials(monkeypatch):
         mock_proxy_config.get_credentials.assert_not_called()
 
     # Now test with store_model_in_db = True
-    with patch("litellm.proxy.proxy_server.proxy_config", mock_proxy_config), patch(
-        "litellm.proxy.proxy_server.store_model_in_db", True
-    ), patch("litellm.proxy.proxy_server.get_secret_bool", return_value=True):
+    with patch("litellm_proxy.proxy_server.proxy_config", mock_proxy_config), patch(
+        "litellm_proxy.proxy_server.store_model_in_db", True
+    ), patch("litellm_proxy.proxy_server.get_secret_bool", return_value=True):
         await ProxyStartupEvent.initialize_scheduled_background_jobs(
             general_settings={},
             prisma_client=mock_prisma_client,
@@ -93,7 +93,7 @@ mock_prisma = MockPrisma()
 
 
 @patch(
-    "litellm.proxy.proxy_server.ProxyStartupEvent._setup_prisma_client",
+    "litellm_proxy.proxy_server.ProxyStartupEvent._setup_prisma_client",
     return_value=mock_prisma,
 )
 @pytest.mark.asyncio
@@ -105,10 +105,10 @@ async def test_aaaproxy_startup_master_key(mock_prisma, monkeypatch, tmp_path):
     from fastapi import FastAPI
 
     # Import happens here - this is when the module probably reads the config path
-    from litellm.proxy.proxy_server import proxy_startup_event
+    from litellm_proxy.proxy_server import proxy_startup_event
 
     # Mock the Prisma import
-    monkeypatch.setattr("litellm.proxy.proxy_server.PrismaClient", MockPrisma)
+    monkeypatch.setattr("litellm_proxy.proxy_server.PrismaClient", MockPrisma)
 
     # Create test app
     app = FastAPI()
@@ -128,7 +128,7 @@ async def test_aaaproxy_startup_master_key(mock_prisma, monkeypatch, tmp_path):
     print(f"config_path: {config_path}")
     print(f"os.getenv('CONFIG_FILE_PATH'): {os.getenv('CONFIG_FILE_PATH')}")
     async with proxy_startup_event(app):
-        from litellm.proxy.proxy_server import master_key
+        from litellm_proxy.proxy_server import master_key
 
         assert master_key == test_master_key
 
@@ -143,7 +143,7 @@ async def test_aaaproxy_startup_master_key(mock_prisma, monkeypatch, tmp_path):
     monkeypatch.setenv("LITELLM_MASTER_KEY", test_env_master_key)
     print("test_env_master_key: {}".format(test_env_master_key))
     async with proxy_startup_event(app):
-        from litellm.proxy.proxy_server import master_key
+        from litellm_proxy.proxy_server import master_key
 
         assert master_key == test_env_master_key
 
@@ -159,7 +159,7 @@ async def test_aaaproxy_startup_master_key(mock_prisma, monkeypatch, tmp_path):
 
     monkeypatch.setenv("CUSTOM_MASTER_KEY", test_resolved_key)
     async with proxy_startup_event(app):
-        from litellm.proxy.proxy_server import master_key
+        from litellm_proxy.proxy_server import master_key
 
         assert master_key == test_resolved_key
 
@@ -170,7 +170,7 @@ def test_team_info_masking():
 
     Ref: https://huntr.com/bounties/661b388a-44d8-4ad5-862b-4dc5b80be30a
     """
-    from litellm.proxy.proxy_server import ProxyConfig
+    from litellm_proxy.proxy_server import ProxyConfig
 
     proxy_config = ProxyConfig()
     # Test team object with sensitive data
diff --git a/tests/litellm/proxy/test_route_llm_request.py b/tests/litellm/proxy/test_route_llm_request.py
index 6e8fedf0cb..f4acef8410 100644
--- a/tests/litellm/proxy/test_route_llm_request.py
+++ b/tests/litellm/proxy/test_route_llm_request.py
@@ -12,7 +12,7 @@ sys.path.insert(
 
 from unittest.mock import MagicMock
 
-from litellm.proxy.route_llm_request import route_request
+from litellm_proxy.route_llm_request import route_request
 
 
 @pytest.mark.parametrize(
diff --git a/tests/litellm/proxy/types_utils/test_utils.py b/tests/litellm/proxy/types_utils/test_utils.py
index 5685489bfc..25cded10d0 100644
--- a/tests/litellm/proxy/types_utils/test_utils.py
+++ b/tests/litellm/proxy/types_utils/test_utils.py
@@ -5,7 +5,7 @@ import sys
 import pytest
 from fastapi.testclient import TestClient
 
-from litellm.proxy.types_utils.utils import security_checks
+from litellm_proxy.types_utils.utils import security_checks
 
 sys.path.insert(
     0, os.path.abspath("../../..")
diff --git a/tests/litellm/proxy/ui_crud_endpoints/test_proxy_setting_endpoints.py b/tests/litellm/proxy/ui_crud_endpoints/test_proxy_setting_endpoints.py
index 0b29ef6c7a..592dcfac36 100644
--- a/tests/litellm/proxy/ui_crud_endpoints/test_proxy_setting_endpoints.py
+++ b/tests/litellm/proxy/ui_crud_endpoints/test_proxy_setting_endpoints.py
@@ -9,9 +9,9 @@ sys.path.insert(
     0, os.path.abspath("../../..")
 )  # Adds the parent directory to the system path
 
-from litellm.proxy._types import DefaultInternalUserParams, LitellmUserRoles
-from litellm.proxy.proxy_server import app
 from litellm.types.proxy.management_endpoints.ui_sso import DefaultTeamSSOParams
+from litellm_proxy._types import DefaultInternalUserParams, LitellmUserRoles
+from litellm_proxy.proxy_server import app
 
 client = TestClient(app)
 
@@ -50,7 +50,7 @@ def mock_proxy_config(monkeypatch):
             mock_config = new_config
         return mock_config
 
-    from litellm.proxy.proxy_server import proxy_config
+    from litellm_proxy.proxy_server import proxy_config
 
     monkeypatch.setattr(proxy_config, "get_config", mock_get_config)
     monkeypatch.setattr(proxy_config, "save_config", mock_save_config)
@@ -66,12 +66,12 @@ def mock_auth(monkeypatch):
     async def mock_user_api_key_auth():
         return {"user_id": "test_user"}
 
-    from litellm.proxy.ui_crud_endpoints.proxy_setting_endpoints import (
+    from litellm_proxy.ui_crud_endpoints.proxy_setting_endpoints import (
         user_api_key_auth,
     )
 
     monkeypatch.setattr(
-        "litellm.proxy.ui_crud_endpoints.proxy_setting_endpoints.user_api_key_auth",
+        "litellm_proxy.ui_crud_endpoints.proxy_setting_endpoints.user_api_key_auth",
         mock_user_api_key_auth,
     )
 
diff --git a/tests/litellm_utils_tests/test_get_secret.py b/tests/litellm_utils_tests/test_get_secret.py
index eec67b5d76..e7d2dbb720 100644
--- a/tests/litellm_utils_tests/test_get_secret.py
+++ b/tests/litellm_utils_tests/test_get_secret.py
@@ -10,7 +10,7 @@ sys.path.insert(
 import pytest
 
 import litellm
-from litellm.proxy._types import KeyManagementSystem
+from litellm_proxy._types import KeyManagementSystem
 from litellm.secret_managers.main import get_secret
 
 
diff --git a/tests/litellm_utils_tests/test_proxy_budget_reset.py b/tests/litellm_utils_tests/test_proxy_budget_reset.py
index 1fbe493d8d..4601ce1558 100644
--- a/tests/litellm_utils_tests/test_proxy_budget_reset.py
+++ b/tests/litellm_utils_tests/test_proxy_budget_reset.py
@@ -20,8 +20,8 @@ sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 
-from litellm.proxy.common_utils.reset_budget_job import ResetBudgetJob
-from litellm.proxy._types import (
+from litellm_proxy.common_utils.reset_budget_job import ResetBudgetJob
+from litellm_proxy._types import (
     LiteLLM_VerificationToken,
     LiteLLM_UserTable,
     LiteLLM_TeamTable,
@@ -386,7 +386,7 @@ async def test_service_logger_keys_success():
         side_effect=fake_reset_key,
     ):
         with patch(
-            "litellm.proxy.common_utils.reset_budget_job.verbose_proxy_logger.exception"
+            "litellm_proxy.common_utils.reset_budget_job.verbose_proxy_logger.exception"
         ) as mock_verbose_exc:
             await job.reset_budget_for_litellm_keys()
             # Allow async logging task to complete
@@ -443,7 +443,7 @@ async def test_service_logger_keys_failure():
         side_effect=fake_reset_key,
     ):
         with patch(
-            "litellm.proxy.common_utils.reset_budget_job.verbose_proxy_logger.exception"
+            "litellm_proxy.common_utils.reset_budget_job.verbose_proxy_logger.exception"
         ) as mock_verbose_exc:
             await job.reset_budget_for_litellm_keys()
             await asyncio.sleep(0.1)
@@ -501,7 +501,7 @@ async def test_service_logger_users_success():
         side_effect=fake_reset_user,
     ):
         with patch(
-            "litellm.proxy.common_utils.reset_budget_job.verbose_proxy_logger.exception"
+            "litellm_proxy.common_utils.reset_budget_job.verbose_proxy_logger.exception"
         ) as mock_verbose_exc:
             await job.reset_budget_for_litellm_users()
             await asyncio.sleep(0.1)
@@ -554,7 +554,7 @@ async def test_service_logger_users_failure():
         side_effect=fake_reset_user,
     ):
         with patch(
-            "litellm.proxy.common_utils.reset_budget_job.verbose_proxy_logger.exception"
+            "litellm_proxy.common_utils.reset_budget_job.verbose_proxy_logger.exception"
         ) as mock_verbose_exc:
             await job.reset_budget_for_litellm_users()
             await asyncio.sleep(0.1)
@@ -611,7 +611,7 @@ async def test_service_logger_teams_success():
         side_effect=fake_reset_team,
     ):
         with patch(
-            "litellm.proxy.common_utils.reset_budget_job.verbose_proxy_logger.exception"
+            "litellm_proxy.common_utils.reset_budget_job.verbose_proxy_logger.exception"
         ) as mock_verbose_exc:
             await job.reset_budget_for_litellm_teams()
             await asyncio.sleep(0.1)
@@ -664,7 +664,7 @@ async def test_service_logger_teams_failure():
         side_effect=fake_reset_team,
     ):
         with patch(
-            "litellm.proxy.common_utils.reset_budget_job.verbose_proxy_logger.exception"
+            "litellm_proxy.common_utils.reset_budget_job.verbose_proxy_logger.exception"
         ) as mock_verbose_exc:
             await job.reset_budget_for_litellm_teams()
             await asyncio.sleep(0.1)
diff --git a/tests/litellm_utils_tests/test_secret_manager.py b/tests/litellm_utils_tests/test_secret_manager.py
index fd1adeb964..1f5d5b4283 100644
--- a/tests/litellm_utils_tests/test_secret_manager.py
+++ b/tests/litellm_utils_tests/test_secret_manager.py
@@ -292,7 +292,7 @@ def test_should_read_secret_from_secret_manager():
     """
     Test that _should_read_secret_from_secret_manager returns correct values based on access mode
     """
-    from litellm.proxy._types import KeyManagementSettings
+    from litellm_proxy._types import KeyManagementSettings
 
     # Test when secret manager client is None
     litellm.secret_manager_client = None
@@ -323,7 +323,7 @@ def test_get_secret_with_access_mode():
     """
     Test that get_secret respects access mode settings
     """
-    from litellm.proxy._types import KeyManagementSettings
+    from litellm_proxy._types import KeyManagementSettings
 
     # Set up test environment
     test_secret_name = "TEST_SECRET_KEY"
diff --git a/tests/llm_responses_api_testing/conftest.py b/tests/llm_responses_api_testing/conftest.py
index b3561d8a62..e4083f1933 100644
--- a/tests/llm_responses_api_testing/conftest.py
+++ b/tests/llm_responses_api_testing/conftest.py
@@ -29,11 +29,11 @@ def setup_and_teardown():
 
     try:
         if hasattr(litellm, "proxy") and hasattr(litellm.proxy, "proxy_server"):
-            import litellm.proxy.proxy_server
+            import litellm_proxy.proxy_server
 
-            importlib.reload(litellm.proxy.proxy_server)
+            importlib.reload(litellm_proxy.proxy_server)
     except Exception as e:
-        print(f"Error reloading litellm.proxy.proxy_server: {e}")
+        print(f"Error reloading litellm_proxy.proxy_server: {e}")
 
     import asyncio
 
diff --git a/tests/load_tests/test_memory_usage.py b/tests/load_tests/test_memory_usage.py
index f273865a29..a2e1db5a66 100644
--- a/tests/load_tests/test_memory_usage.py
+++ b/tests/load_tests/test_memory_usage.py
@@ -32,7 +32,7 @@ import gc
 from typing import Type
 from pydantic import BaseModel
 
-from litellm.proxy.proxy_server import app
+from litellm_proxy.proxy_server import app
 
 
 async def get_memory_usage() -> float:
diff --git a/tests/local_testing/conftest.py b/tests/local_testing/conftest.py
index b3561d8a62..e4083f1933 100644
--- a/tests/local_testing/conftest.py
+++ b/tests/local_testing/conftest.py
@@ -29,11 +29,11 @@ def setup_and_teardown():
 
     try:
         if hasattr(litellm, "proxy") and hasattr(litellm.proxy, "proxy_server"):
-            import litellm.proxy.proxy_server
+            import litellm_proxy.proxy_server
 
-            importlib.reload(litellm.proxy.proxy_server)
+            importlib.reload(litellm_proxy.proxy_server)
     except Exception as e:
-        print(f"Error reloading litellm.proxy.proxy_server: {e}")
+        print(f"Error reloading litellm_proxy.proxy_server: {e}")
 
     import asyncio
 
diff --git a/tests/local_testing/test_add_update_models.py b/tests/local_testing/test_add_update_models.py
index 163687c1c2..3efbbb3c29 100644
--- a/tests/local_testing/test_add_update_models.py
+++ b/tests/local_testing/test_add_update_models.py
@@ -16,14 +16,14 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 import pytest, logging, asyncio
 import litellm
-from litellm.proxy.management_endpoints.model_management_endpoints import (
+from litellm_proxy.management_endpoints.model_management_endpoints import (
     add_new_model,
     update_model,
 )
-from litellm.proxy._types import LitellmUserRoles
+from litellm_proxy._types import LitellmUserRoles
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy.utils import PrismaClient, ProxyLogging
-from litellm.proxy.management_endpoints.team_endpoints import new_team
+from litellm_proxy.utils import PrismaClient, ProxyLogging
+from litellm_proxy.management_endpoints.team_endpoints import new_team
 
 verbose_proxy_logger.setLevel(level=logging.DEBUG)
 from litellm.caching.caching import DualCache
@@ -33,14 +33,14 @@ from litellm.router import (
 )
 from litellm.types.router import ModelInfo, updateDeployment, updateLiteLLMParams
 
-from litellm.proxy._types import UserAPIKeyAuth, NewTeamRequest, LiteLLM_TeamTable
+from litellm_proxy._types import UserAPIKeyAuth, NewTeamRequest, LiteLLM_TeamTable
 
 proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
 
 
 @pytest.fixture
 def prisma_client():
-    from litellm.proxy.proxy_cli import append_query_params
+    from litellm_proxy.proxy_cli import append_query_params
 
     ### add connection pool + pool timeout args
     params = {"connection_limit": 100, "pool_timeout": 60}
@@ -54,11 +54,11 @@ def prisma_client():
         database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
     )
 
-    # Reset litellm.proxy.proxy_server.prisma_client to None
-    litellm.proxy.proxy_server.litellm_proxy_budget_name = (
+    # Reset litellm_proxy.proxy_server.prisma_client to None
+    litellm_proxy.proxy_server.litellm_proxy_budget_name = (
         f"litellm-proxy-budget-{time.time()}"
     )
-    litellm.proxy.proxy_server.user_custom_key_generate = None
+    litellm_proxy.proxy_server.user_custom_key_generate = None
 
     return prisma_client
 
@@ -66,12 +66,12 @@ def prisma_client():
 @pytest.mark.asyncio
 @pytest.mark.skip(reason="new feature, tests passing locally")
 async def test_add_new_model(prisma_client):
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "store_model_in_db", True)
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "store_model_in_db", True)
 
-    await litellm.proxy.proxy_server.prisma_client.connect()
-    from litellm.proxy.proxy_server import user_api_key_cache
+    await litellm_proxy.proxy_server.prisma_client.connect()
+    from litellm_proxy.proxy_server import user_api_key_cache
     import uuid
 
     _new_model_id = f"local-test-{uuid.uuid4().hex}"
@@ -115,12 +115,12 @@ async def test_add_new_model(prisma_client):
 async def test_add_update_model(prisma_client):
     # test that existing litellm_params are not updated
     # only new / updated params get updated
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "store_model_in_db", True)
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "store_model_in_db", True)
 
-    await litellm.proxy.proxy_server.prisma_client.connect()
-    from litellm.proxy.proxy_server import user_api_key_cache
+    await litellm_proxy.proxy_server.prisma_client.connect()
+    from litellm_proxy.proxy_server import user_api_key_cache
     import uuid
 
     _new_model_id = f"local-test-{uuid.uuid4().hex}"
@@ -221,13 +221,13 @@ async def test_add_team_model_to_db(prisma_client):
     """
     Test adding a team model and verifying the team_public_model_name is stored correctly
     """
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "store_model_in_db", True)
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "store_model_in_db", True)
 
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
-    from litellm.proxy.management_endpoints.model_management_endpoints import (
+    from litellm_proxy.management_endpoints.model_management_endpoints import (
         _add_team_model_to_db,
     )
     import uuid
diff --git a/tests/local_testing/test_aim_guardrails.py b/tests/local_testing/test_aim_guardrails.py
index 4e33bcda7c..61744731a5 100644
--- a/tests/local_testing/test_aim_guardrails.py
+++ b/tests/local_testing/test_aim_guardrails.py
@@ -10,13 +10,13 @@ from fastapi.exceptions import HTTPException
 from httpx import Request, Response
 
 from litellm import DualCache
-from litellm.proxy.guardrails.guardrail_hooks.aim import AimGuardrail, AimGuardrailMissingSecrets
-from litellm.proxy.proxy_server import StreamingCallbackError, UserAPIKeyAuth
+from litellm_proxy.guardrails.guardrail_hooks.aim import AimGuardrail, AimGuardrailMissingSecrets
+from litellm_proxy.proxy_server import StreamingCallbackError, UserAPIKeyAuth
 from litellm.types.utils import ModelResponseStream
 
 sys.path.insert(0, os.path.abspath("../.."))  # Adds the parent directory to the system path
 import litellm
-from litellm.proxy.guardrails.init_guardrails import init_guardrails_v2
+from litellm_proxy.guardrails.init_guardrails import init_guardrails_v2
 
 
 class ReceiveMock:
@@ -158,7 +158,7 @@ async def test_post_call_stream__all_chunks_are_valid(monkeypatch, length: int):
     async def connect_mock(*args, **kwargs):
         yield websocket_mock
 
-    monkeypatch.setattr("litellm.proxy.guardrails.guardrail_hooks.aim.connect", connect_mock)
+    monkeypatch.setattr("litellm_proxy.guardrails.guardrail_hooks.aim.connect", connect_mock)
 
     results = []
     async for result in aim_guardrail.async_post_call_streaming_iterator_hook(
@@ -213,7 +213,7 @@ async def test_post_call_stream__blocked_chunks(monkeypatch):
     async def connect_mock(*args, **kwargs):
         yield websocket_mock
 
-    monkeypatch.setattr("litellm.proxy.guardrails.guardrail_hooks.aim.connect", connect_mock)
+    monkeypatch.setattr("litellm_proxy.guardrails.guardrail_hooks.aim.connect", connect_mock)
 
     results = []
     with pytest.raises(StreamingCallbackError, match="Jailbreak detected"):
diff --git a/tests/local_testing/test_auth_utils.py b/tests/local_testing/test_auth_utils.py
index 73abedb3f0..f27247ca7e 100644
--- a/tests/local_testing/test_auth_utils.py
+++ b/tests/local_testing/test_auth_utils.py
@@ -13,7 +13,7 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 import pytest
 import litellm
-from litellm.proxy.auth.auth_utils import (
+from litellm_proxy.auth.auth_utils import (
     _allow_model_level_clientside_configurable_parameters,
 )
 from litellm.router import Router
@@ -71,7 +71,7 @@ def test_configurable_clientside_parameters(
 
 
 def test_get_end_user_id_from_request_body_always_returns_str():
-    from litellm.proxy.auth.auth_utils import get_end_user_id_from_request_body
+    from litellm_proxy.auth.auth_utils import get_end_user_id_from_request_body
 
     request_body = {"user": 123}
     end_user_id = get_end_user_id_from_request_body(request_body)
diff --git a/tests/local_testing/test_azure_content_safety.py b/tests/local_testing/test_azure_content_safety.py
index 91eb92b745..54701afb89 100644
--- a/tests/local_testing/test_azure_content_safety.py
+++ b/tests/local_testing/test_azure_content_safety.py
@@ -22,8 +22,8 @@ import pytest
 import litellm
 from litellm import Router, mock_completion
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.utils import ProxyLogging
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.utils import ProxyLogging
 
 
 @pytest.mark.asyncio
@@ -33,7 +33,7 @@ async def test_strict_input_filtering_01():
     - have a response with a filtered input
     - call the pre call hook
     """
-    from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
+    from litellm_proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
 
     azure_content_safety = _PROXY_AzureContentSafety(
         endpoint=os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT"),
@@ -68,7 +68,7 @@ async def test_strict_input_filtering_02():
     - have a response with a filtered input
     - call the pre call hook
     """
-    from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
+    from litellm_proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
 
     azure_content_safety = _PROXY_AzureContentSafety(
         endpoint=os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT"),
@@ -98,7 +98,7 @@ async def test_loose_input_filtering_01():
     - have a response with a filtered input
     - call the pre call hook
     """
-    from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
+    from litellm_proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
 
     azure_content_safety = _PROXY_AzureContentSafety(
         endpoint=os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT"),
@@ -128,7 +128,7 @@ async def test_loose_input_filtering_02():
     - have a response with a filtered input
     - call the pre call hook
     """
-    from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
+    from litellm_proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
 
     azure_content_safety = _PROXY_AzureContentSafety(
         endpoint=os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT"),
@@ -158,7 +158,7 @@ async def test_strict_output_filtering_01():
     - have a response with a filtered output
     - call the post call hook
     """
-    from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
+    from litellm_proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
 
     azure_content_safety = _PROXY_AzureContentSafety(
         endpoint=os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT"),
@@ -204,7 +204,7 @@ async def test_strict_output_filtering_02():
     - have a response with a filtered output
     - call the post call hook
     """
-    from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
+    from litellm_proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
 
     azure_content_safety = _PROXY_AzureContentSafety(
         endpoint=os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT"),
@@ -243,7 +243,7 @@ async def test_loose_output_filtering_01():
     - have a response with a filtered output
     - call the post call hook
     """
-    from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
+    from litellm_proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
 
     azure_content_safety = _PROXY_AzureContentSafety(
         endpoint=os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT"),
@@ -282,7 +282,7 @@ async def test_loose_output_filtering_02():
     - have a response with a filtered output
     - call the post call hook
     """
-    from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
+    from litellm_proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
 
     azure_content_safety = _PROXY_AzureContentSafety(
         endpoint=os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT"),
diff --git a/tests/local_testing/test_basic_python_version.py b/tests/local_testing/test_basic_python_version.py
index c629ef3df8..38bb3fce0f 100644
--- a/tests/local_testing/test_basic_python_version.py
+++ b/tests/local_testing/test_basic_python_version.py
@@ -29,7 +29,7 @@ def test_litellm_proxy_server():
 
     # Import the proxy_server module
     try:
-        import litellm.proxy.proxy_server
+        import litellm_proxy.proxy_server
     except ImportError:
         pytest.fail("Failed to import litellm.proxy_server")
 
@@ -102,7 +102,7 @@ def test_litellm_proxy_server_config_no_general_settings():
             [
                 "python",
                 "-m",
-                "litellm.proxy.proxy_cli",
+                "litellm_proxy.proxy_cli",
                 "--config",
                 config_fp,
             ]
diff --git a/tests/local_testing/test_blocked_user_list.py b/tests/local_testing/test_blocked_user_list.py
index 172d6e85eb..0819a09450 100644
--- a/tests/local_testing/test_blocked_user_list.py
+++ b/tests/local_testing/test_blocked_user_list.py
@@ -28,37 +28,37 @@ import litellm
 from litellm import Router, mock_completion
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.enterprise.enterprise_hooks.blocked_user_list import (
+from litellm_proxy._types import UserAPIKeyAuth
+from enterprise.enterprise_hooks.blocked_user_list import (
     _ENTERPRISE_BlockedUserList,
 )
-from litellm.proxy.management_endpoints.internal_user_endpoints import (
+from litellm_proxy.management_endpoints.internal_user_endpoints import (
     new_user,
     user_info,
     user_update,
 )
-from litellm.proxy.management_endpoints.key_management_endpoints import (
+from litellm_proxy.management_endpoints.key_management_endpoints import (
     delete_key_fn,
     generate_key_fn,
     generate_key_helper_fn,
     info_key_fn,
     update_key_fn,
 )
-from litellm.proxy.proxy_server import user_api_key_auth
-from litellm.proxy.management_endpoints.customer_endpoints import block_user
-from litellm.proxy.spend_tracking.spend_management_endpoints import (
+from litellm_proxy.proxy_server import user_api_key_auth
+from litellm_proxy.management_endpoints.customer_endpoints import block_user
+from litellm_proxy.spend_tracking.spend_management_endpoints import (
     spend_key_fn,
     spend_user_fn,
     view_spend_logs,
 )
-from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token
+from litellm_proxy.utils import PrismaClient, ProxyLogging, hash_token
 
 verbose_proxy_logger.setLevel(level=logging.DEBUG)
 
 from starlette.datastructures import URL
 
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     BlockUsers,
     DynamoDBArgs,
     GenerateKeyRequest,
@@ -72,7 +72,7 @@ proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
 
 @pytest.fixture
 def prisma_client():
-    from litellm.proxy.proxy_cli import append_query_params
+    from litellm_proxy.proxy_cli import append_query_params
 
     ### add connection pool + pool timeout args
     params = {"connection_limit": 100, "pool_timeout": 60}
@@ -85,11 +85,11 @@ def prisma_client():
         database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
     )
 
-    # Reset litellm.proxy.proxy_server.prisma_client to None
-    litellm.proxy.proxy_server.litellm_proxy_budget_name = (
+    # Reset litellm_proxy.proxy_server.prisma_client to None
+    litellm_proxy.proxy_server.litellm_proxy_budget_name = (
         f"litellm-proxy-budget-{time.time()}"
     )
-    litellm.proxy.proxy_server.user_custom_key_generate = None
+    litellm_proxy.proxy_server.user_custom_key_generate = None
 
     return prisma_client
 
@@ -101,13 +101,13 @@ async def test_block_user_check(prisma_client):
     - Test to see if a call with that user id is made, an error is raised
     - Test to see if a call without that user is passes
     """
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
 
     litellm.blocked_user_list = ["user_id_1"]
 
     blocked_user_obj = _ENTERPRISE_BlockedUserList(
-        prisma_client=litellm.proxy.proxy_server.prisma_client
+        prisma_client=litellm_proxy.proxy_server.prisma_client
     )
 
     _api_key = "sk-12345"
@@ -145,9 +145,9 @@ async def test_block_user_db_check(prisma_client):
     - Block end user via "/user/block"
     - Check returned value
     """
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
     _block_users = BlockUsers(user_ids=["user_id_1"])
     result = await block_user(data=_block_users)
     result = result["blocked_users"]
diff --git a/tests/local_testing/test_caching.py b/tests/local_testing/test_caching.py
index 8c12f3fd9b..32b639110d 100644
--- a/tests/local_testing/test_caching.py
+++ b/tests/local_testing/test_caching.py
@@ -2087,8 +2087,8 @@ async def test_redis_proxy_batch_redis_get_cache():
     """
 
     from litellm.caching.caching import Cache, DualCache
-    from litellm.proxy._types import UserAPIKeyAuth
-    from litellm.proxy.hooks.batch_redis_get import _PROXY_BatchRedisRequests
+    from litellm_proxy._types import UserAPIKeyAuth
+    from litellm_proxy.hooks.batch_redis_get import _PROXY_BatchRedisRequests
 
     litellm.cache = Cache(
         type="redis",
diff --git a/tests/local_testing/test_config.py b/tests/local_testing/test_config.py
index b56be32881..15cf762543 100644
--- a/tests/local_testing/test_config.py
+++ b/tests/local_testing/test_config.py
@@ -21,9 +21,10 @@ import pytest
 from pydantic import BaseModel, ConfigDict
 
 import litellm
-from litellm.proxy.common_utils.encrypt_decrypt_utils import encrypt_value
-from litellm.proxy.proxy_server import ProxyConfig
-from litellm.proxy.utils import DualCache, ProxyLogging
+import litellm_proxy
+from litellm_proxy.common_utils.encrypt_decrypt_utils import encrypt_value
+from litellm_proxy.proxy_server import ProxyConfig
+from litellm_proxy.utils import DualCache, ProxyLogging
 from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
 
 
@@ -55,7 +56,7 @@ async def test_delete_deployment():
 
     master_key = "sk-1234"
 
-    setattr(litellm.proxy.proxy_server, "master_key", master_key)
+    setattr(litellm_proxy.proxy_server, "master_key", master_key)
 
     for k, v in encrypted_litellm_params.items():
         if isinstance(v, str):
@@ -75,7 +76,7 @@ async def test_delete_deployment():
             deployment_2.to_json(exclude_none=True),
         ]
     )
-    setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
+    setattr(litellm_proxy.proxy_server, "llm_router", llm_router)
     print(f"llm_router: {llm_router}")
 
     pc = ProxyConfig()
@@ -104,7 +105,7 @@ async def test_delete_deployment():
         ]
     )
     print(f"llm_router: {llm_router}")
-    setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
+    setattr(litellm_proxy.proxy_server, "llm_router", llm_router)
     pc = ProxyConfig()
 
     db_model = DBModel(
@@ -150,8 +151,8 @@ async def test_add_existing_deployment():
     init_len_list = len(llm_router.model_list)
     print(f"llm_router: {llm_router}")
     master_key = "sk-1234"
-    setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
-    setattr(litellm.proxy.proxy_server, "master_key", master_key)
+    setattr(litellm_proxy.proxy_server, "llm_router", llm_router)
+    setattr(litellm_proxy.proxy_server, "master_key", master_key)
     pc = ProxyConfig()
 
     encrypted_litellm_params = litellm_params.dict(exclude_none=True)
@@ -205,8 +206,8 @@ async def test_db_error_new_model_check():
     init_len_list = len(llm_router.model_list)
     print(f"llm_router: {llm_router}")
     master_key = "sk-1234"
-    setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
-    setattr(litellm.proxy.proxy_server, "master_key", master_key)
+    setattr(litellm_proxy.proxy_server, "llm_router", llm_router)
+    setattr(litellm_proxy.proxy_server, "master_key", master_key)
     pc = ProxyConfig()
 
     encrypted_litellm_params = litellm_params.dict(exclude_none=True)
@@ -306,8 +307,8 @@ async def test_add_and_delete_deployments(llm_router, model_list_flag_value):
     """
 
     master_key = "sk-1234"
-    setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
-    setattr(litellm.proxy.proxy_server, "master_key", master_key)
+    setattr(litellm_proxy.proxy_server, "llm_router", llm_router)
+    setattr(litellm_proxy.proxy_server, "master_key", master_key)
     pc = ProxyConfig()
     pl = ProxyLogging(DualCache())
 
@@ -332,7 +333,7 @@ async def test_add_and_delete_deployments(llm_router, model_list_flag_value):
 
     await pc._update_llm_router(new_models=model_list, proxy_logging_obj=pl)
 
-    llm_router = getattr(litellm.proxy.proxy_server, "llm_router")
+    llm_router = getattr(litellm_proxy.proxy_server, "llm_router")
 
     if model_list_flag_value == 0:
         if prev_llm_router_val is None:
diff --git a/tests/local_testing/test_configs/custom_auth.py b/tests/local_testing/test_configs/custom_auth.py
index 1b6bec43b5..86c83c8e66 100644
--- a/tests/local_testing/test_configs/custom_auth.py
+++ b/tests/local_testing/test_configs/custom_auth.py
@@ -1,4 +1,4 @@
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from fastapi import Request
 from dotenv import load_dotenv
 import os
diff --git a/tests/local_testing/test_dynamic_rate_limit_handler.py b/tests/local_testing/test_dynamic_rate_limit_handler.py
index 3f52e25389..6cede1f4f5 100644
--- a/tests/local_testing/test_dynamic_rate_limit_handler.py
+++ b/tests/local_testing/test_dynamic_rate_limit_handler.py
@@ -22,8 +22,8 @@ import pytest
 
 import litellm
 from litellm import DualCache, Router
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.hooks.dynamic_rate_limiter import (
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.hooks.dynamic_rate_limiter import (
     _PROXY_DynamicRateLimitHandler as DynamicRateLimitHandler,
 )
 
diff --git a/tests/local_testing/test_exceptions.py b/tests/local_testing/test_exceptions.py
index be7710f58a..5b2f9487ca 100644
--- a/tests/local_testing/test_exceptions.py
+++ b/tests/local_testing/test_exceptions.py
@@ -946,7 +946,7 @@ async def test_exception_with_headers(sync_mode, provider, model, call_type, str
     but Azure says to retry in at most 9s
 
     ```
-    {"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"}
+    {"message": "litellm_proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"}
     ```
     """
     print(f"Received args: {locals()}")
@@ -1067,7 +1067,7 @@ async def test_exception_with_headers_httpx(
     but Azure says to retry in at most 9s
 
     ```
-    {"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"}
+    {"message": "litellm_proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"}
     ```
     """
     print(f"Received args: {locals()}")
diff --git a/tests/local_testing/test_guardrails_ai.py b/tests/local_testing/test_guardrails_ai.py
index 004ffa0b9e..aeba956285 100644
--- a/tests/local_testing/test_guardrails_ai.py
+++ b/tests/local_testing/test_guardrails_ai.py
@@ -6,7 +6,7 @@ sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import litellm
-from litellm.proxy.guardrails.init_guardrails import init_guardrails_v2
+from litellm_proxy.guardrails.init_guardrails import init_guardrails_v2
 
 
 def test_guardrails_ai():
diff --git a/tests/local_testing/test_health_check.py b/tests/local_testing/test_health_check.py
index bf326d884b..104d103bbd 100644
--- a/tests/local_testing/test_health_check.py
+++ b/tests/local_testing/test_health_check.py
@@ -230,7 +230,7 @@ def test_update_litellm_params_for_health_check():
     1. Updates messages with a random message
     2. Updates model name when health_check_model is provided
     """
-    from litellm.proxy.health_check import _update_litellm_params_for_health_check
+    from litellm_proxy.health_check import _update_litellm_params_for_health_check
 
     # Test with health_check_model
     model_info = {"health_check_model": "gpt-3.5-turbo"}
@@ -266,7 +266,7 @@ async def test_perform_health_check_with_health_check_model():
     1. Verifies that health_check_model overrides the original model when model=`openai/*`
     2. Ensures the health check is performed with the override model
     """
-    from litellm.proxy.health_check import _perform_health_check
+    from litellm_proxy.health_check import _perform_health_check
 
     # Mock model list with health_check_model specified
     model_list = [
@@ -301,7 +301,7 @@ async def test_perform_health_check_with_health_check_model():
 
 @pytest.mark.asyncio
 async def test_health_check_bad_model():
-    from litellm.proxy.health_check import _perform_health_check
+    from litellm_proxy.health_check import _perform_health_check
     import time
 
     model_list = [
diff --git a/tests/local_testing/test_http_parsing_utils.py b/tests/local_testing/test_http_parsing_utils.py
index 4d509fc16d..bae027aadd 100644
--- a/tests/local_testing/test_http_parsing_utils.py
+++ b/tests/local_testing/test_http_parsing_utils.py
@@ -10,7 +10,7 @@ sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system-path
 
-from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
+from litellm_proxy.common_utils.http_parsing_utils import _read_request_body
 
 
 @pytest.mark.asyncio
diff --git a/tests/local_testing/test_lakera_ai_prompt_injection.py b/tests/local_testing/test_lakera_ai_prompt_injection.py
index 0d6cc20846..efee1ff33c 100644
--- a/tests/local_testing/test_lakera_ai_prompt_injection.py
+++ b/tests/local_testing/test_lakera_ai_prompt_injection.py
@@ -26,10 +26,10 @@ import pytest
 import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.guardrails.guardrail_hooks.lakera_ai import lakeraAI_Moderation
-from litellm.proxy.proxy_server import embeddings
-from litellm.proxy.utils import ProxyLogging, hash_token
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.guardrails.guardrail_hooks.lakera_ai import lakeraAI_Moderation
+from litellm_proxy.proxy_server import embeddings
+from litellm_proxy.utils import ProxyLogging, hash_token
 
 verbose_proxy_logger.setLevel(logging.DEBUG)
 
@@ -164,7 +164,7 @@ async def test_moderations_on_embeddings():
             ]
         )
 
-        setattr(litellm.proxy.proxy_server, "llm_router", temp_router)
+        setattr(litellm_proxy.proxy_server, "llm_router", temp_router)
 
         api_route = APIRoute(path="/embeddings", endpoint=embeddings)
         litellm.callbacks = [lakeraAI_Moderation()]
@@ -375,8 +375,8 @@ async def test_callback_specific_param_run_pre_call_check_lakera():
     from typing import Dict, List, Optional, Union
 
     import litellm
-    from litellm.proxy.guardrails.guardrail_hooks.lakera_ai import lakeraAI_Moderation
-    from litellm.proxy.guardrails.init_guardrails import initialize_guardrails
+    from litellm_proxy.guardrails.guardrail_hooks.lakera_ai import lakeraAI_Moderation
+    from litellm_proxy.guardrails.init_guardrails import initialize_guardrails
     from litellm.types.guardrails import GuardrailItem, GuardrailItemSpec
 
     guardrails_config: List[Dict[str, GuardrailItemSpec]] = [
@@ -422,8 +422,8 @@ async def test_callback_specific_thresholds():
     from typing import Dict, List, Optional, Union
 
     import litellm
-    from litellm.proxy.guardrails.guardrail_hooks.lakera_ai import lakeraAI_Moderation
-    from litellm.proxy.guardrails.init_guardrails import initialize_guardrails
+    from litellm_proxy.guardrails.guardrail_hooks.lakera_ai import lakeraAI_Moderation
+    from litellm_proxy.guardrails.init_guardrails import initialize_guardrails
     from litellm.types.guardrails import GuardrailItem, GuardrailItemSpec
 
     guardrails_config: List[Dict[str, GuardrailItemSpec]] = [
diff --git a/tests/local_testing/test_llm_guard.py b/tests/local_testing/test_llm_guard.py
index ff380b74df..97f3cf18dd 100644
--- a/tests/local_testing/test_llm_guard.py
+++ b/tests/local_testing/test_llm_guard.py
@@ -16,10 +16,10 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 import pytest
 import litellm
-from litellm.proxy.enterprise.enterprise_hooks.llm_guard import _ENTERPRISE_LLMGuard
+from enterprise.enterprise_hooks.llm_guard import _ENTERPRISE_LLMGuard
 from litellm import Router, mock_completion
-from litellm.proxy.utils import ProxyLogging, hash_token
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy.utils import ProxyLogging, hash_token
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.caching.caching import DualCache
 
 ### UNIT TESTS FOR LLM GUARD ###
diff --git a/tests/local_testing/test_max_tpm_rpm_limiter.py b/tests/local_testing/test_max_tpm_rpm_limiter.py
index 29f9a85c4d..56dcf0f6b5 100644
--- a/tests/local_testing/test_max_tpm_rpm_limiter.py
+++ b/tests/local_testing/test_max_tpm_rpm_limiter.py
@@ -17,10 +17,10 @@
 # import pytest
 # import litellm
 # from litellm import Router
-# from litellm.proxy.utils import ProxyLogging, hash_token
-# from litellm.proxy._types import UserAPIKeyAuth
+# from litellm_proxy.utils import ProxyLogging, hash_token
+# from litellm_proxy._types import UserAPIKeyAuth
 # from litellm.caching.caching import DualCache, RedisCache
-# from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
+# from litellm_proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
 # from datetime import datetime
 
 
@@ -133,19 +133,19 @@
 #     - test if default namespace set via `proxyconfig._init_cache`
 #     - respected for tpm/rpm caching
 #     """
-#     from litellm.proxy.proxy_server import ProxyConfig
+#     from litellm_proxy.proxy_server import ProxyConfig
 
 #     redis_usage_cache: Optional[RedisCache] = None
 #     cache_params = {"type": "redis", "namespace": "litellm_default"}
 
 #     ## INIT CACHE ##
 #     proxy_config = ProxyConfig()
-#     setattr(litellm.proxy.proxy_server, "proxy_config", proxy_config)
+#     setattr(litellm_proxy.proxy_server, "proxy_config", proxy_config)
 
 #     proxy_config._init_cache(cache_params=cache_params)
 
 #     redis_cache: Optional[RedisCache] = getattr(
-#         litellm.proxy.proxy_server, "redis_usage_cache"
+#         litellm_proxy.proxy_server, "redis_usage_cache"
 #     )
 
 #     ## CHECK IF NAMESPACE SET ##
diff --git a/tests/local_testing/test_openai_moderations_hook.py b/tests/local_testing/test_openai_moderations_hook.py
index 2ab8669951..46e2d9b6a9 100644
--- a/tests/local_testing/test_openai_moderations_hook.py
+++ b/tests/local_testing/test_openai_moderations_hook.py
@@ -16,12 +16,12 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 import pytest
 import litellm
-from litellm.proxy.enterprise.enterprise_hooks.openai_moderation import (
+from enterprise.enterprise_hooks.openai_moderation import (
     _ENTERPRISE_OpenAI_Moderation,
 )
 from litellm import Router, mock_completion
-from litellm.proxy.utils import ProxyLogging, hash_token
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy.utils import ProxyLogging, hash_token
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.caching.caching import DualCache
 
 ### UNIT TESTS FOR OpenAI Moderation ###
@@ -40,7 +40,7 @@ async def test_openai_moderation_error_raising():
     user_api_key_dict = UserAPIKeyAuth(api_key=_api_key)
     local_cache = DualCache()
 
-    from litellm.proxy.proxy_server import llm_router
+    from litellm_proxy.proxy_server import llm_router
 
     llm_router = litellm.Router(
         model_list=[
@@ -54,7 +54,7 @@ async def test_openai_moderation_error_raising():
         ]
     )
 
-    setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
+    setattr(litellm_proxy.proxy_server, "llm_router", llm_router)
 
     try:
         await openai_mod.async_moderation_hook(
diff --git a/tests/local_testing/test_parallel_request_limiter.py b/tests/local_testing/test_parallel_request_limiter.py
index 8b34e03454..ad0985fd35 100644
--- a/tests/local_testing/test_parallel_request_limiter.py
+++ b/tests/local_testing/test_parallel_request_limiter.py
@@ -24,11 +24,11 @@ import pytest
 import litellm
 from litellm import Router
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.hooks.parallel_request_limiter import (
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.hooks.parallel_request_limiter import (
     _PROXY_MaxParallelRequestsHandler as MaxParallelRequestsHandler,
 )
-from litellm.proxy.utils import InternalUsageCache, ProxyLogging, hash_token
+from litellm_proxy.utils import InternalUsageCache, ProxyLogging, hash_token
 
 ## On Request received
 ## On Request success
diff --git a/tests/local_testing/test_pass_through_endpoints.py b/tests/local_testing/test_pass_through_endpoints.py
index ae9644afb8..cc2c5b9b28 100644
--- a/tests/local_testing/test_pass_through_endpoints.py
+++ b/tests/local_testing/test_pass_through_endpoints.py
@@ -15,7 +15,7 @@ from unittest.mock import Mock
 
 import httpx
 
-from litellm.proxy.proxy_server import initialize_pass_through_endpoints
+from litellm_proxy.proxy_server import initialize_pass_through_endpoints
 
 
 # Mock the async_client used in the pass_through_request function
@@ -36,7 +36,7 @@ def remove_rerank_route(app):
 
 @pytest.fixture
 def client():
-    from litellm.proxy.proxy_server import app
+    from litellm_proxy.proxy_server import app
 
     remove_rerank_route(
         app=app
@@ -61,10 +61,10 @@ async def test_pass_through_endpoint_no_headers(client, monkeypatch):
     # Initialize the pass-through endpoint
     await initialize_pass_through_endpoints(pass_through_endpoints)
     general_settings: dict = (
-        getattr(litellm.proxy.proxy_server, "general_settings", {}) or {}
+        getattr(litellm_proxy.proxy_server, "general_settings", {}) or {}
     )
     general_settings.update({"pass_through_endpoints": pass_through_endpoints})
-    setattr(litellm.proxy.proxy_server, "general_settings", general_settings)
+    setattr(litellm_proxy.proxy_server, "general_settings", general_settings)
 
     # Make a request to the pass-through endpoint
     response = client.post("/test-endpoint", json={"prompt": "Hello, world!"})
@@ -92,10 +92,10 @@ async def test_pass_through_endpoint(client, monkeypatch):
     # Initialize the pass-through endpoint
     await initialize_pass_through_endpoints(pass_through_endpoints)
     general_settings: Optional[dict] = (
-        getattr(litellm.proxy.proxy_server, "general_settings", {}) or {}
+        getattr(litellm_proxy.proxy_server, "general_settings", {}) or {}
     )
     general_settings.update({"pass_through_endpoints": pass_through_endpoints})
-    setattr(litellm.proxy.proxy_server, "general_settings", general_settings)
+    setattr(litellm_proxy.proxy_server, "general_settings", general_settings)
 
     # Make a request to the pass-through endpoint
     response = client.post("/test-endpoint", json={"prompt": "Hello, world!"})
@@ -122,10 +122,10 @@ async def test_pass_through_endpoint_rerank(client):
     # Initialize the pass-through endpoint
     await initialize_pass_through_endpoints(pass_through_endpoints)
     general_settings: Optional[dict] = (
-        getattr(litellm.proxy.proxy_server, "general_settings", {}) or {}
+        getattr(litellm_proxy.proxy_server, "general_settings", {}) or {}
     )
     general_settings.update({"pass_through_endpoints": pass_through_endpoints})
-    setattr(litellm.proxy.proxy_server, "general_settings", general_settings)
+    setattr(litellm_proxy.proxy_server, "general_settings", general_settings)
 
     _json_data = {
         "model": "rerank-english-v3.0",
@@ -154,8 +154,8 @@ async def test_pass_through_endpoint_rpm_limit(
     client, auth, expected_error_code, rpm_limit
 ):
     import litellm
-    from litellm.proxy._types import UserAPIKeyAuth
-    from litellm.proxy.proxy_server import ProxyLogging, hash_token, user_api_key_cache
+    from litellm_proxy._types import UserAPIKeyAuth
+    from litellm_proxy.proxy_server import ProxyLogging, hash_token, user_api_key_cache
 
     mock_api_key = "sk-my-test-key"
     cache_value = UserAPIKeyAuth(token=hash_token(mock_api_key), rpm_limit=rpm_limit)
@@ -167,10 +167,10 @@ async def test_pass_through_endpoint_rpm_limit(
     proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache)
     proxy_logging_obj._init_litellm_callbacks()
 
-    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "prisma_client", "FAKE-VAR")
-    setattr(litellm.proxy.proxy_server, "proxy_logging_obj", proxy_logging_obj)
+    setattr(litellm_proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", "FAKE-VAR")
+    setattr(litellm_proxy.proxy_server, "proxy_logging_obj", proxy_logging_obj)
 
     # Define a pass-through endpoint
     pass_through_endpoints = [
@@ -185,10 +185,10 @@ async def test_pass_through_endpoint_rpm_limit(
     # Initialize the pass-through endpoint
     await initialize_pass_through_endpoints(pass_through_endpoints)
     general_settings: Optional[dict] = (
-        getattr(litellm.proxy.proxy_server, "general_settings", {}) or {}
+        getattr(litellm_proxy.proxy_server, "general_settings", {}) or {}
     )
     general_settings.update({"pass_through_endpoints": pass_through_endpoints})
-    setattr(litellm.proxy.proxy_server, "general_settings", general_settings)
+    setattr(litellm_proxy.proxy_server, "general_settings", general_settings)
 
     _json_data = {
         "model": "rerank-english-v3.0",
@@ -220,22 +220,22 @@ async def test_pass_through_endpoint_rpm_limit(
 async def test_aaapass_through_endpoint_pass_through_keys_langfuse(
     auth, expected_error_code, rpm_limit
 ):
-    from litellm.proxy.proxy_server import app
+    from litellm_proxy.proxy_server import app
 
     client = TestClient(app)
     import litellm
 
-    from litellm.proxy._types import UserAPIKeyAuth
-    from litellm.proxy.proxy_server import ProxyLogging, hash_token, user_api_key_cache
+    from litellm_proxy._types import UserAPIKeyAuth
+    from litellm_proxy.proxy_server import ProxyLogging, hash_token, user_api_key_cache
 
     # Store original values
     original_user_api_key_cache = getattr(
-        litellm.proxy.proxy_server, "user_api_key_cache", None
+        litellm_proxy.proxy_server, "user_api_key_cache", None
     )
-    original_master_key = getattr(litellm.proxy.proxy_server, "master_key", None)
-    original_prisma_client = getattr(litellm.proxy.proxy_server, "prisma_client", None)
+    original_master_key = getattr(litellm_proxy.proxy_server, "master_key", None)
+    original_prisma_client = getattr(litellm_proxy.proxy_server, "prisma_client", None)
     original_proxy_logging_obj = getattr(
-        litellm.proxy.proxy_server, "proxy_logging_obj", None
+        litellm_proxy.proxy_server, "proxy_logging_obj", None
     )
 
     try:
@@ -252,10 +252,10 @@ async def test_aaapass_through_endpoint_pass_through_keys_langfuse(
         proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache)
         proxy_logging_obj._init_litellm_callbacks()
 
-        setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
-        setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-        setattr(litellm.proxy.proxy_server, "prisma_client", "FAKE-VAR")
-        setattr(litellm.proxy.proxy_server, "proxy_logging_obj", proxy_logging_obj)
+        setattr(litellm_proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
+        setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+        setattr(litellm_proxy.proxy_server, "prisma_client", "FAKE-VAR")
+        setattr(litellm_proxy.proxy_server, "proxy_logging_obj", proxy_logging_obj)
 
         # Define a pass-through endpoint
         pass_through_endpoints = [
@@ -274,11 +274,11 @@ async def test_aaapass_through_endpoint_pass_through_keys_langfuse(
         # Initialize the pass-through endpoint
         await initialize_pass_through_endpoints(pass_through_endpoints)
         general_settings: Optional[dict] = (
-            getattr(litellm.proxy.proxy_server, "general_settings", {}) or {}
+            getattr(litellm_proxy.proxy_server, "general_settings", {}) or {}
         )
         old_general_settings = general_settings
         general_settings.update({"pass_through_endpoints": pass_through_endpoints})
-        setattr(litellm.proxy.proxy_server, "general_settings", general_settings)
+        setattr(litellm_proxy.proxy_server, "general_settings", general_settings)
 
         _json_data = {
             "batch": [
@@ -316,18 +316,18 @@ async def test_aaapass_through_endpoint_pass_through_keys_langfuse(
         # Assert the response
         assert response.status_code == expected_error_code
 
-        setattr(litellm.proxy.proxy_server, "general_settings", old_general_settings)
+        setattr(litellm_proxy.proxy_server, "general_settings", old_general_settings)
     finally:
         # Reset to original values
         setattr(
-            litellm.proxy.proxy_server,
+            litellm_proxy.proxy_server,
             "user_api_key_cache",
             original_user_api_key_cache,
         )
-        setattr(litellm.proxy.proxy_server, "master_key", original_master_key)
-        setattr(litellm.proxy.proxy_server, "prisma_client", original_prisma_client)
+        setattr(litellm_proxy.proxy_server, "master_key", original_master_key)
+        setattr(litellm_proxy.proxy_server, "prisma_client", original_prisma_client)
         setattr(
-            litellm.proxy.proxy_server, "proxy_logging_obj", original_proxy_logging_obj
+            litellm_proxy.proxy_server, "proxy_logging_obj", original_proxy_logging_obj
         )
 
 @pytest.mark.asyncio
@@ -378,10 +378,10 @@ async def test_pass_through_endpoint_bing(client, monkeypatch):
     # Initialize the pass-through endpoint
     await initialize_pass_through_endpoints(pass_through_endpoints)
     general_settings: Optional[dict] = (
-        getattr(litellm.proxy.proxy_server, "general_settings", {}) or {}
+        getattr(litellm_proxy.proxy_server, "general_settings", {}) or {}
     )
     general_settings.update({"pass_through_endpoints": pass_through_endpoints})
-    setattr(litellm.proxy.proxy_server, "general_settings", general_settings)
+    setattr(litellm_proxy.proxy_server, "general_settings", general_settings)
 
     # Make 2 requests thru the pass-through endpoint
     client.get("/bing/search?q=bob+barker")
diff --git a/tests/local_testing/test_presidio_masking.py b/tests/local_testing/test_presidio_masking.py
index c9d1adc9b2..69a165ed97 100644
--- a/tests/local_testing/test_presidio_masking.py
+++ b/tests/local_testing/test_presidio_masking.py
@@ -22,11 +22,11 @@ import pytest
 import litellm
 from litellm import Router, mock_completion
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.guardrails.guardrail_hooks.presidio import (
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.guardrails.guardrail_hooks.presidio import (
     _OPTIONAL_PresidioPIIMasking,
 )
-from litellm.proxy.utils import ProxyLogging
+from litellm_proxy.utils import ProxyLogging
 
 
 @pytest.mark.parametrize(
@@ -296,7 +296,7 @@ async def test_presidio_pii_masking_logging_output_only_logged_response_guardrai
     from typing import Dict, List, Optional
 
     import litellm
-    from litellm.proxy.guardrails.init_guardrails import initialize_guardrails
+    from litellm_proxy.guardrails.init_guardrails import initialize_guardrails
     from litellm.types.guardrails import (
         GuardrailItem,
         GuardrailItemSpec,
diff --git a/tests/local_testing/test_prometheus.py b/tests/local_testing/test_prometheus.py
index 8a4d03e630..6fea06671f 100644
--- a/tests/local_testing/test_prometheus.py
+++ b/tests/local_testing/test_prometheus.py
@@ -39,7 +39,7 @@ async def prometheus_logger():
     for collector in collectors:
         REGISTRY.unregister(collector)
 
-    with patch("litellm.proxy.proxy_server.premium_user", True):
+    with patch("litellm_proxy.proxy_server.premium_user", True):
         yield PrometheusLogger()
 
 
diff --git a/tests/local_testing/test_prometheus_service.py b/tests/local_testing/test_prometheus_service.py
index cfbd6a1a83..b7928a406b 100644
--- a/tests/local_testing/test_prometheus_service.py
+++ b/tests/local_testing/test_prometheus_service.py
@@ -11,7 +11,7 @@ import pytest
 from litellm import acompletion, Cache
 from litellm._service_logger import ServiceLogging
 from litellm.integrations.prometheus_services import PrometheusServicesLogger
-from litellm.proxy.utils import ServiceTypes
+from litellm_proxy.utils import ServiceTypes
 from unittest.mock import patch, AsyncMock
 import litellm
 
diff --git a/tests/local_testing/test_prompt_injection_detection.py b/tests/local_testing/test_prompt_injection_detection.py
index 8443aadcc6..fe8fcbf28b 100644
--- a/tests/local_testing/test_prompt_injection_detection.py
+++ b/tests/local_testing/test_prompt_injection_detection.py
@@ -14,12 +14,12 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 import pytest
 import litellm
-from litellm.proxy.hooks.prompt_injection_detection import (
+from litellm_proxy.hooks.prompt_injection_detection import (
     _OPTIONAL_PromptInjectionDetection,
 )
 from litellm import Router, mock_completion
-from litellm.proxy.utils import ProxyLogging
-from litellm.proxy._types import UserAPIKeyAuth, LiteLLMPromptInjectionParams
+from litellm_proxy.utils import ProxyLogging
+from litellm_proxy._types import UserAPIKeyAuth, LiteLLMPromptInjectionParams
 from litellm.caching.caching import DualCache
 
 
diff --git a/tests/local_testing/test_router.py b/tests/local_testing/test_router.py
index eb845559e2..9129399157 100644
--- a/tests/local_testing/test_router.py
+++ b/tests/local_testing/test_router.py
@@ -2164,7 +2164,7 @@ def test_router_dynamic_cooldown_correct_retry_after_time():
     but Azure says to retry in at most 9s
 
     ```
-    {"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"}
+    {"message": "litellm_proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"}
     ```
     """
     router = Router(
@@ -2248,7 +2248,7 @@ async def test_aaarouter_dynamic_cooldown_message_retry_time(sync_mode):
     but Azure says to retry in at most 9s
 
     ```
-    {"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"}
+    {"message": "litellm_proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"}
     ```
     """
     litellm.set_verbose = True
diff --git a/tests/local_testing/test_secret_detect_hook.py b/tests/local_testing/test_secret_detect_hook.py
index f240e9b606..b6d768bb1a 100644
--- a/tests/local_testing/test_secret_detect_hook.py
+++ b/tests/local_testing/test_secret_detect_hook.py
@@ -28,12 +28,12 @@ import litellm
 from litellm import Router, mock_completion
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (
+from litellm_proxy._types import UserAPIKeyAuth
+from enterprise.enterprise_hooks.secret_detection import (
     _ENTERPRISE_SecretDetection,
 )
-from litellm.proxy.proxy_server import chat_completion
-from litellm.proxy.utils import ProxyLogging, hash_token
+from litellm_proxy.proxy_server import chat_completion
+from litellm_proxy.utils import ProxyLogging, hash_token
 from litellm.router import Router
 
 ### UNIT TESTS FOR OpenAI Moderation ###
@@ -53,7 +53,7 @@ async def test_basic_secret_detection_chat():
     user_api_key_dict = UserAPIKeyAuth(api_key=_api_key)
     local_cache = DualCache()
 
-    from litellm.proxy.proxy_server import llm_router
+    from litellm_proxy.proxy_server import llm_router
 
     test_data = {
         "messages": [
@@ -121,7 +121,7 @@ async def test_basic_secret_detection_text_completion():
     user_api_key_dict = UserAPIKeyAuth(api_key=_api_key)
     local_cache = DualCache()
 
-    from litellm.proxy.proxy_server import llm_router
+    from litellm_proxy.proxy_server import llm_router
 
     test_data = {
         "prompt": "Hey, how's it going, API_KEY = 'sk_1234567890abcdef', my OPENAI_API_KEY = 'sk_1234567890abcdef' and i want to know what is the weather",
@@ -159,7 +159,7 @@ async def test_basic_secret_detection_embeddings():
     user_api_key_dict = UserAPIKeyAuth(api_key=_api_key)
     local_cache = DualCache()
 
-    from litellm.proxy.proxy_server import llm_router
+    from litellm_proxy.proxy_server import llm_router
 
     test_data = {
         "input": "Hey, how's it going, API_KEY = 'sk_1234567890abcdef', my OPENAI_API_KEY = 'sk_1234567890abcdef' and i want to know what is the weather",
@@ -197,7 +197,7 @@ async def test_basic_secret_detection_embeddings_list():
     user_api_key_dict = UserAPIKeyAuth(api_key=_api_key)
     local_cache = DualCache()
 
-    from litellm.proxy.proxy_server import llm_router
+    from litellm_proxy.proxy_server import llm_router
 
     test_data = {
         "input": [
@@ -262,7 +262,7 @@ async def test_chat_completion_request_with_redaction():
 
     Ensures that the secret is redacted EVEN on the callback
     """
-    from litellm.proxy import proxy_server
+    from litellm_proxy import proxy_server
 
     setattr(proxy_server, "llm_router", router)
     _test_logger = testLogger()
diff --git a/tests/local_testing/test_spend_calculate_endpoint.py b/tests/local_testing/test_spend_calculate_endpoint.py
index 8bdd4a54d8..7ee2a6782e 100644
--- a/tests/local_testing/test_spend_calculate_endpoint.py
+++ b/tests/local_testing/test_spend_calculate_endpoint.py
@@ -7,8 +7,8 @@ from fastapi import Request
 from fastapi.routing import APIRoute
 
 import litellm
-from litellm.proxy._types import SpendCalculateRequest
-from litellm.proxy.spend_tracking.spend_management_endpoints import calculate_spend
+from litellm_proxy._types import SpendCalculateRequest
+from litellm_proxy.spend_tracking.spend_management_endpoints import calculate_spend
 from litellm.router import Router
 
 # this file is to test litellm/proxy
@@ -36,7 +36,7 @@ async def test_spend_calc_model_messages():
 
 @pytest.mark.asyncio
 async def test_spend_calc_model_on_router_messages():
-    from litellm.proxy.proxy_server import llm_router as init_llm_router
+    from litellm_proxy.proxy_server import llm_router as init_llm_router
 
     temp_llm_router = Router(
         model_list=[
@@ -49,7 +49,7 @@ async def test_spend_calc_model_on_router_messages():
         ]
     )
 
-    setattr(litellm.proxy.proxy_server, "llm_router", temp_llm_router)
+    setattr(litellm_proxy.proxy_server, "llm_router", temp_llm_router)
 
     cost_obj = await calculate_spend(
         request=SpendCalculateRequest(
@@ -66,7 +66,7 @@ async def test_spend_calc_model_on_router_messages():
     assert _cost > 0.0
 
     # set router to init value
-    setattr(litellm.proxy.proxy_server, "llm_router", init_llm_router)
+    setattr(litellm_proxy.proxy_server, "llm_router", init_llm_router)
 
 
 @pytest.mark.asyncio
@@ -105,7 +105,7 @@ async def test_spend_calc_using_response():
 
 @pytest.mark.asyncio
 async def test_spend_calc_model_alias_on_router_messages():
-    from litellm.proxy.proxy_server import llm_router as init_llm_router
+    from litellm_proxy.proxy_server import llm_router as init_llm_router
 
     temp_llm_router = Router(
         model_list=[
@@ -121,7 +121,7 @@ async def test_spend_calc_model_alias_on_router_messages():
         },
     )
 
-    setattr(litellm.proxy.proxy_server, "llm_router", temp_llm_router)
+    setattr(litellm_proxy.proxy_server, "llm_router", temp_llm_router)
 
     cost_obj = await calculate_spend(
         request=SpendCalculateRequest(
@@ -138,4 +138,4 @@ async def test_spend_calc_model_alias_on_router_messages():
     assert _cost > 0.0
 
     # set router to init value
-    setattr(litellm.proxy.proxy_server, "llm_router", init_llm_router)
+    setattr(litellm_proxy.proxy_server, "llm_router", init_llm_router)
diff --git a/tests/local_testing/test_team_config.py b/tests/local_testing/test_team_config.py
index 8a5f8c8407..56f7a2e1a5 100644
--- a/tests/local_testing/test_team_config.py
+++ b/tests/local_testing/test_team_config.py
@@ -8,7 +8,7 @@
 #     0, os.path.abspath("../..")
 # )  # Adds the parent directory to the system path
 # import litellm
-# from litellm.proxy.proxy_server import ProxyConfig
+# from litellm_proxy.proxy_server import ProxyConfig
 
 
 # @pytest.mark.asyncio
diff --git a/tests/local_testing/test_ui_sso_helper_utils.py b/tests/local_testing/test_ui_sso_helper_utils.py
index c720636327..d55d4d566e 100644
--- a/tests/local_testing/test_ui_sso_helper_utils.py
+++ b/tests/local_testing/test_ui_sso_helper_utils.py
@@ -20,11 +20,11 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 
 import logging
-from litellm.proxy.management_endpoints.sso_helper_utils import (
+from litellm_proxy.management_endpoints.sso_helper_utils import (
     check_is_admin_only_access,
     has_admin_ui_access,
 )
-from litellm.proxy._types import LitellmUserRoles
+from litellm_proxy._types import LitellmUserRoles
 
 
 def test_check_is_admin_only_access():
diff --git a/tests/local_testing/test_update_spend.py b/tests/local_testing/test_update_spend.py
index cc2c94af27..b287718834 100644
--- a/tests/local_testing/test_update_spend.py
+++ b/tests/local_testing/test_update_spend.py
@@ -28,34 +28,34 @@ import litellm
 from litellm import Router, mock_completion
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.management_endpoints.internal_user_endpoints import (
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.management_endpoints.internal_user_endpoints import (
     new_user,
     user_info,
     user_update,
 )
-from litellm.proxy.management_endpoints.key_management_endpoints import (
+from litellm_proxy.management_endpoints.key_management_endpoints import (
     delete_key_fn,
     generate_key_fn,
     generate_key_helper_fn,
     info_key_fn,
     update_key_fn,
 )
-from litellm.proxy.proxy_server import user_api_key_auth
-from litellm.proxy.management_endpoints.customer_endpoints import block_user
-from litellm.proxy.spend_tracking.spend_management_endpoints import (
+from litellm_proxy.proxy_server import user_api_key_auth
+from litellm_proxy.management_endpoints.customer_endpoints import block_user
+from litellm_proxy.spend_tracking.spend_management_endpoints import (
     spend_key_fn,
     spend_user_fn,
     view_spend_logs,
 )
-from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
+from litellm_proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
 
 verbose_proxy_logger.setLevel(level=logging.DEBUG)
 
 from starlette.datastructures import URL
 
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     BlockUsers,
     DynamoDBArgs,
     GenerateKeyRequest,
@@ -71,7 +71,7 @@ proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
 
 @pytest.fixture
 def prisma_client():
-    from litellm.proxy.proxy_cli import append_query_params
+    from litellm_proxy.proxy_cli import append_query_params
 
     ### add connection pool + pool timeout args
     params = {"connection_limit": 100, "pool_timeout": 60}
@@ -84,11 +84,11 @@ def prisma_client():
         database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
     )
 
-    # Reset litellm.proxy.proxy_server.prisma_client to None
-    litellm.proxy.proxy_server.litellm_proxy_budget_name = (
+    # Reset litellm_proxy.proxy_server.prisma_client to None
+    litellm_proxy.proxy_server.litellm_proxy_budget_name = (
         f"litellm-proxy-budget-{time.time()}"
     )
-    litellm.proxy.proxy_server.user_custom_key_generate = None
+    litellm_proxy.proxy_server.user_custom_key_generate = None
 
     return prisma_client
 
@@ -102,11 +102,11 @@ async def test_batch_update_spend(prisma_client):
             response_cost=23,
         )
     )
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
     await update_spend(
-        prisma_client=litellm.proxy.proxy_server.prisma_client,
+        prisma_client=litellm_proxy.proxy_server.prisma_client,
         db_writer_client=None,
         proxy_logging_obj=proxy_logging_obj,
     )
diff --git a/tests/logging_callback_tests/test_alerting.py b/tests/logging_callback_tests/test_alerting.py
index 26a5e0822f..b991fda0f5 100644
--- a/tests/logging_callback_tests/test_alerting.py
+++ b/tests/logging_callback_tests/test_alerting.py
@@ -33,8 +33,8 @@ from litellm.integrations.SlackAlerting.slack_alerting import (
     DeploymentMetrics,
     SlackAlerting,
 )
-from litellm.proxy._types import CallInfo
-from litellm.proxy.utils import ProxyLogging
+from litellm_proxy._types import CallInfo
+from litellm_proxy.utils import ProxyLogging
 from litellm.router import AlertingConfig, Router
 from litellm.utils import get_api_base
 
@@ -944,7 +944,7 @@ async def test_spend_report_cache(report_type):
         {"individual_request_tag": "tag2", "total_spend": 150.0},
     ]
 
-    with patch("litellm.proxy.proxy_server.prisma_client") as mock_prisma:
+    with patch("litellm_proxy.proxy_server.prisma_client") as mock_prisma:
         # Setup mock for database query
         mock_prisma.db.query_raw = AsyncMock(
             side_effect=[mock_spend_data, mock_tag_data]
diff --git a/tests/logging_callback_tests/test_custom_guardrail.py b/tests/logging_callback_tests/test_custom_guardrail.py
index af1270756f..1fd33b59ee 100644
--- a/tests/logging_callback_tests/test_custom_guardrail.py
+++ b/tests/logging_callback_tests/test_custom_guardrail.py
@@ -30,10 +30,10 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_guardrail import CustomGuardrail
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
 from litellm.types.guardrails import GuardrailEventHooks
-from litellm.proxy.guardrails.guardrail_endpoints import _get_guardrails_list_response
+from litellm_proxy.guardrails.guardrail_endpoints import _get_guardrails_list_response
 from litellm.types.guardrails import GuardrailInfoResponse, ListGuardrailsResponse
 
 
diff --git a/tests/logging_callback_tests/test_log_db_redis_services.py b/tests/logging_callback_tests/test_log_db_redis_services.py
index fa0c3b595a..e92fce37fb 100644
--- a/tests/logging_callback_tests/test_log_db_redis_services.py
+++ b/tests/logging_callback_tests/test_log_db_redis_services.py
@@ -17,7 +17,7 @@ import pytest
 import litellm
 from litellm import completion
 from litellm._logging import verbose_logger
-from litellm.proxy.utils import log_db_metrics, ServiceTypes
+from litellm_proxy.utils import log_db_metrics, ServiceTypes
 from datetime import datetime
 import httpx
 from prisma.errors import ClientNotConnectedError
@@ -37,7 +37,7 @@ async def sample_proxy_function(*args, **kwargs):
 @pytest.mark.asyncio
 async def test_log_db_metrics_success():
     # Mock the proxy_logging_obj
-    with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy_logging:
+    with patch("litellm_proxy.proxy_server.proxy_logging_obj") as mock_proxy_logging:
         # Setup mock
         mock_proxy_logging.service_logging_obj.async_service_success_hook = AsyncMock()
 
@@ -65,7 +65,7 @@ async def test_log_db_metrics_success():
 @pytest.mark.asyncio
 async def test_log_db_metrics_duration():
     # Mock the proxy_logging_obj
-    with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy_logging:
+    with patch("litellm_proxy.proxy_server.proxy_logging_obj") as mock_proxy_logging:
         # Setup mock
         mock_proxy_logging.service_logging_obj.async_service_success_hook = AsyncMock()
 
@@ -104,7 +104,7 @@ async def test_log_db_metrics_failure():
     # Mock the proxy_logging_obj
     from prisma.errors import ClientNotConnectedError
 
-    with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy_logging:
+    with patch("litellm_proxy.proxy_server.proxy_logging_obj") as mock_proxy_logging:
         # Setup mock
         mock_proxy_logging.service_logging_obj.async_service_failure_hook = AsyncMock()
 
@@ -158,7 +158,7 @@ async def test_log_db_metrics_failure_error_types(exception, should_log):
     - DB-related errors (Prisma, httpx) are logged as service failures
     - Non-DB errors (ValueError, KeyError, etc.) are not logged
     """
-    with patch("litellm.proxy.proxy_server.proxy_logging_obj") as mock_proxy_logging:
+    with patch("litellm_proxy.proxy_server.proxy_logging_obj") as mock_proxy_logging:
         mock_proxy_logging.service_logging_obj.async_service_failure_hook = AsyncMock()
 
         @log_db_metrics
diff --git a/tests/logging_callback_tests/test_opentelemetry_unit_tests.py b/tests/logging_callback_tests/test_opentelemetry_unit_tests.py
index b0d09562c5..a9a1652353 100644
--- a/tests/logging_callback_tests/test_opentelemetry_unit_tests.py
+++ b/tests/logging_callback_tests/test_opentelemetry_unit_tests.py
@@ -26,7 +26,7 @@ class TestOpentelemetryUnitTests(BaseLoggingCallbackTest):
     def test_parallel_tool_calls(self, mock_response_obj: ModelResponse):
         tool_calls = mock_response_obj.choices[0].message.tool_calls
         from litellm.integrations.opentelemetry import OpenTelemetry
-        from litellm.proxy._types import SpanAttributes
+        from litellm_proxy._types import SpanAttributes
 
         kv_pair_dict = OpenTelemetry._tool_calls_kv_pair(tool_calls)
 
diff --git a/tests/logging_callback_tests/test_otel_logging.py b/tests/logging_callback_tests/test_otel_logging.py
index ecbeef5d88..b8f0aba1d9 100644
--- a/tests/logging_callback_tests/test_otel_logging.py
+++ b/tests/logging_callback_tests/test_otel_logging.py
@@ -15,7 +15,7 @@ import asyncio
 import logging
 from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
 from litellm._logging import verbose_logger
-from litellm.proxy._types import SpanAttributes
+from litellm_proxy._types import SpanAttributes
 
 verbose_logger.setLevel(logging.DEBUG)
 
diff --git a/tests/logging_callback_tests/test_pagerduty_alerting.py b/tests/logging_callback_tests/test_pagerduty_alerting.py
index 00e427d01d..4559b89d27 100644
--- a/tests/logging_callback_tests/test_pagerduty_alerting.py
+++ b/tests/logging_callback_tests/test_pagerduty_alerting.py
@@ -9,7 +9,7 @@ sys.path.insert(0, os.path.abspath("../.."))
 import pytest
 import litellm
 from litellm.integrations.pagerduty.pagerduty import PagerDutyAlerting, AlertingConfig
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 
 
 @pytest.mark.asyncio
diff --git a/tests/logging_callback_tests/test_prometheus_unit_tests.py b/tests/logging_callback_tests/test_prometheus_unit_tests.py
index 0b58bc7aaf..fe4ee31aef 100644
--- a/tests/logging_callback_tests/test_prometheus_unit_tests.py
+++ b/tests/logging_callback_tests/test_prometheus_unit_tests.py
@@ -30,7 +30,7 @@ import pytest
 from unittest.mock import MagicMock, patch, call
 from datetime import datetime, timedelta, timezone
 from litellm.integrations.prometheus import PrometheusLogger
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 
 verbose_logger.setLevel(logging.DEBUG)
 
@@ -274,9 +274,9 @@ async def test_increment_remaining_budget_metrics(prometheus_logger):
     future_reset_time_key = datetime.now() + timedelta(hours=12)
     # Mock the get_team_object and get_key_object functions to return objects with budget reset times
     with patch(
-        "litellm.proxy.auth.auth_checks.get_team_object"
+        "litellm_proxy.auth.auth_checks.get_team_object"
     ) as mock_get_team, patch(
-        "litellm.proxy.auth.auth_checks.get_key_object"
+        "litellm_proxy.auth.auth_checks.get_key_object"
     ) as mock_get_key:
 
         mock_get_team.return_value = MagicMock(budget_reset_at=future_reset_time_team)
@@ -1081,8 +1081,8 @@ async def test_initialize_remaining_budget_metrics(prometheus_logger):
     """
     litellm.prometheus_initialize_budget_metrics = True
     # Mock the prisma client and get_paginated_teams function
-    with patch("litellm.proxy.proxy_server.prisma_client") as mock_prisma, patch(
-        "litellm.proxy.management_endpoints.team_endpoints.get_paginated_teams"
+    with patch("litellm_proxy.proxy_server.prisma_client") as mock_prisma, patch(
+        "litellm_proxy.management_endpoints.team_endpoints.get_paginated_teams"
     ) as mock_get_teams:
 
         # Create mock team data with proper datetime objects for budget_reset_at
@@ -1177,10 +1177,10 @@ async def test_initialize_remaining_budget_metrics_exception_handling(
     """
     litellm.prometheus_initialize_budget_metrics = True
     # Mock the prisma client and get_paginated_teams function to raise an exception
-    with patch("litellm.proxy.proxy_server.prisma_client") as mock_prisma, patch(
-        "litellm.proxy.management_endpoints.team_endpoints.get_paginated_teams"
+    with patch("litellm_proxy.proxy_server.prisma_client") as mock_prisma, patch(
+        "litellm_proxy.management_endpoints.team_endpoints.get_paginated_teams"
     ) as mock_get_teams, patch(
-        "litellm.proxy.management_endpoints.key_management_endpoints._list_key_helper"
+        "litellm_proxy.management_endpoints.key_management_endpoints._list_key_helper"
     ) as mock_list_keys:
 
         # Make get_paginated_teams raise an exception
@@ -1219,8 +1219,8 @@ async def test_initialize_api_key_budget_metrics(prometheus_logger):
     """
     litellm.prometheus_initialize_budget_metrics = True
     # Mock the prisma client and _list_key_helper function
-    with patch("litellm.proxy.proxy_server.prisma_client") as mock_prisma, patch(
-        "litellm.proxy.management_endpoints.key_management_endpoints._list_key_helper"
+    with patch("litellm_proxy.proxy_server.prisma_client") as mock_prisma, patch(
+        "litellm_proxy.management_endpoints.key_management_endpoints._list_key_helper"
     ) as mock_list_keys:
 
         # Create mock key data with proper datetime objects for budget_reset_at
diff --git a/tests/logging_callback_tests/test_spend_logs.py b/tests/logging_callback_tests/test_spend_logs.py
index d592931f25..6bca753c87 100644
--- a/tests/logging_callback_tests/test_spend_logs.py
+++ b/tests/logging_callback_tests/test_spend_logs.py
@@ -25,8 +25,8 @@ from typing import Optional
 import pytest
 
 import litellm
-from litellm.proxy.spend_tracking.spend_tracking_utils import get_logging_payload
-from litellm.proxy._types import SpendLogsMetadata, SpendLogsPayload
+from litellm_proxy.spend_tracking.spend_tracking_utils import get_logging_payload
+from litellm_proxy._types import SpendLogsMetadata, SpendLogsPayload
 
 
 @pytest.mark.parametrize(
@@ -323,7 +323,7 @@ def test_spend_logs_payload_with_prompts_enabled(monkeypatch):
     Test that messages and responses are logged in spend logs when store_prompts_in_spend_logs is enabled
     """
     # Mock general_settings
-    from litellm.proxy.proxy_server import general_settings
+    from litellm_proxy.proxy_server import general_settings
 
     general_settings["store_prompts_in_spend_logs"] = True
 
diff --git a/tests/logging_callback_tests/test_unit_test_litellm_logging.py b/tests/logging_callback_tests/test_unit_test_litellm_logging.py
index 455d0dacb9..fa1917a8c4 100644
--- a/tests/logging_callback_tests/test_unit_test_litellm_logging.py
+++ b/tests/logging_callback_tests/test_unit_test_litellm_logging.py
@@ -13,8 +13,8 @@ from typing import Literal
 import pytest
 import litellm
 from litellm.litellm_core_utils.litellm_logging import Logging
-from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
-from litellm.proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
+from litellm_proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
+from litellm_proxy.hooks.cache_control_check import _PROXY_CacheControlCheck
 from litellm._service_logger import ServiceLogging
 import asyncio
 
diff --git a/tests/logging_callback_tests/test_unit_tests_init_callbacks.py b/tests/logging_callback_tests/test_unit_tests_init_callbacks.py
index 445c773d99..e3b40c6143 100644
--- a/tests/logging_callback_tests/test_unit_tests_init_callbacks.py
+++ b/tests/logging_callback_tests/test_unit_tests_init_callbacks.py
@@ -40,7 +40,7 @@ from litellm.integrations.langfuse.langfuse_prompt_management import (
 from litellm.integrations.azure_storage.azure_storage import AzureBlobStorageLogger
 from litellm.integrations.agentops import AgentOps
 from litellm.integrations.humanloop import HumanloopLogger
-from litellm.proxy.hooks.dynamic_rate_limiter import _PROXY_DynamicRateLimitHandler
+from litellm_proxy.hooks.dynamic_rate_limiter import _PROXY_DynamicRateLimitHandler
 from unittest.mock import patch
 
 # clear prometheus collectors / registry
diff --git a/tests/mcp_tests/test_mcp_server.py b/tests/mcp_tests/test_mcp_server.py
index 2cf9193871..d07506decb 100644
--- a/tests/mcp_tests/test_mcp_server.py
+++ b/tests/mcp_tests/test_mcp_server.py
@@ -7,7 +7,7 @@ sys.path.insert(
     0, os.path.abspath("../../..")
 )  # Adds the parent directory to the system path
 
-from litellm.proxy._experimental.mcp_server.mcp_server_manager import (
+from litellm_proxy._experimental.mcp_server.mcp_server_manager import (
     MCPServerManager,
     MCPSSEServer,
 )
diff --git a/tests/otel_tests/test_team_member_permissions.py b/tests/otel_tests/test_team_member_permissions.py
index 8c98fa3b92..d4dc61e6a2 100644
--- a/tests/otel_tests/test_team_member_permissions.py
+++ b/tests/otel_tests/test_team_member_permissions.py
@@ -47,7 +47,7 @@ import asyncio
 import aiohttp, openai
 import uuid
 import json
-from litellm.proxy._types import ProxyErrorTypes
+from litellm_proxy._types import ProxyErrorTypes
 from typing import Optional
 LITELLM_MASTER_KEY = "sk-1234"
 
diff --git a/tests/pass_through_unit_tests/test_assemblyai_unit_tests_passthrough.py b/tests/pass_through_unit_tests/test_assemblyai_unit_tests_passthrough.py
index 963f1ad6ef..67f23f55d2 100644
--- a/tests/pass_through_unit_tests/test_assemblyai_unit_tests_passthrough.py
+++ b/tests/pass_through_unit_tests/test_assemblyai_unit_tests_passthrough.py
@@ -29,11 +29,11 @@ import httpx
 import pytest
 import litellm
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
-from litellm.proxy.pass_through_endpoints.llm_provider_handlers.assembly_passthrough_logging_handler import (
+from litellm_proxy.pass_through_endpoints.llm_provider_handlers.assembly_passthrough_logging_handler import (
     AssemblyAIPassthroughLoggingHandler,
     AssemblyAITranscriptResponse,
 )
-from litellm.proxy.pass_through_endpoints.success_handler import (
+from litellm_proxy.pass_through_endpoints.success_handler import (
     PassThroughEndpointLogging,
 )
 
@@ -69,7 +69,7 @@ def test_get_assembly_transcript(assembly_handler, mock_transcript_response):
     """
     # Patch get_credentials to return "test-key"
     with patch(
-        "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router.get_credentials",
+        "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router.get_credentials",
         return_value="test-key",
     ):
         with patch("httpx.get") as mock_get:
@@ -95,7 +95,7 @@ def test_poll_assembly_for_transcript_response(
     Test that the _poll_assembly_for_transcript_response method returns the correct transcript response
     """
     with patch(
-        "litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router.get_credentials",
+        "litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router.get_credentials",
         return_value="test-key",
     ):
         with patch("httpx.get") as mock_get:
diff --git a/tests/pass_through_unit_tests/test_custom_logger_passthrough.py b/tests/pass_through_unit_tests/test_custom_logger_passthrough.py
index 747eb4bdbe..5d8e5f2223 100644
--- a/tests/pass_through_unit_tests/test_custom_logger_passthrough.py
+++ b/tests/pass_through_unit_tests/test_custom_logger_passthrough.py
@@ -13,10 +13,10 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 
 import litellm
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.types.passthrough_endpoints.pass_through_endpoints import PassthroughStandardLoggingPayload
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.proxy.pass_through_endpoints.pass_through_endpoints import pass_through_request
+from litellm_proxy.pass_through_endpoints.pass_through_endpoints import pass_through_request
 
 class TestCustomLogger(CustomLogger):
     def __init__(self):
diff --git a/tests/pass_through_unit_tests/test_pass_through_unit_tests.py b/tests/pass_through_unit_tests/test_pass_through_unit_tests.py
index dd5fc4275e..0cf42ceeab 100644
--- a/tests/pass_through_unit_tests/test_pass_through_unit_tests.py
+++ b/tests/pass_through_unit_tests/test_pass_through_unit_tests.py
@@ -18,19 +18,19 @@ import litellm
 from typing import AsyncGenerator
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.types.passthrough_endpoints.pass_through_endpoints import EndpointType
-from litellm.proxy.pass_through_endpoints.success_handler import (
+from litellm_proxy.pass_through_endpoints.success_handler import (
     PassThroughEndpointLogging,
 )
-from litellm.proxy.pass_through_endpoints.streaming_handler import (
+from litellm_proxy.pass_through_endpoints.streaming_handler import (
     PassThroughStreamingHandler,
 )
 
-from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
+from litellm_proxy.pass_through_endpoints.pass_through_endpoints import (
     pass_through_request,
 )
 from fastapi import Request
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.pass_through_endpoints.pass_through_endpoints import (
     _init_kwargs_for_pass_through_endpoint,
     _update_metadata_with_tags_in_header,
 )
@@ -253,7 +253,7 @@ async def test_pass_through_request_logging_failure(
 
     # Patch both the logging handler and the httpx client
     with patch(
-        "litellm.proxy.pass_through_endpoints.pass_through_endpoints.PassThroughEndpointLogging.pass_through_async_success_handler",
+        "litellm_proxy.pass_through_endpoints.pass_through_endpoints.PassThroughEndpointLogging.pass_through_async_success_handler",
         new=mock_logging_failure,
     ), patch(
         "httpx.AsyncClient.send",
@@ -320,7 +320,7 @@ async def test_pass_through_request_logging_failure_with_stream(
 
     # Patch both the logging handler and the httpx client
     with patch(
-        "litellm.proxy.pass_through_endpoints.streaming_handler.PassThroughStreamingHandler._route_streaming_logging_to_handler",
+        "litellm_proxy.pass_through_endpoints.streaming_handler.PassThroughStreamingHandler._route_streaming_logging_to_handler",
         new=mock_logging_failure,
     ), patch(
         "httpx.AsyncClient.send",
@@ -363,7 +363,7 @@ def test_pass_through_routes_support_all_methods():
     Test that all pass-through routes support GET, POST, PUT, DELETE, PATCH methods
     """
     # Import the routers
-    from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+    from litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints import (
         router as llm_router,
     )
 
@@ -391,7 +391,7 @@ def test_is_bedrock_agent_runtime_route():
     """
     Test that _is_bedrock_agent_runtime_route correctly identifies bedrock agent runtime endpoints
     """
-    from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+    from litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints import (
         _is_bedrock_agent_runtime_route,
     )
 
diff --git a/tests/pass_through_unit_tests/test_unit_test_anthropic_pass_through.py b/tests/pass_through_unit_tests/test_unit_test_anthropic_pass_through.py
index bcd93de0bb..5df6806139 100644
--- a/tests/pass_through_unit_tests/test_unit_test_anthropic_pass_through.py
+++ b/tests/pass_through_unit_tests/test_unit_test_anthropic_pass_through.py
@@ -15,7 +15,7 @@ import litellm
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 
 # Import the class we're testing
-from litellm.proxy.pass_through_endpoints.llm_provider_handlers.anthropic_passthrough_logging_handler import (
+from litellm_proxy.pass_through_endpoints.llm_provider_handlers.anthropic_passthrough_logging_handler import (
     AnthropicPassthroughLoggingHandler,
 )
 
@@ -207,7 +207,7 @@ def test_create_anthropic_response_logging_payload(mock_logging_obj, metadata_pa
     [{"litellm_metadata": {"user": "test"}}, {"metadata": {"user_id": "test"}}],
 )
 def test_get_user_from_metadata(end_user_id):
-    from litellm.proxy.pass_through_endpoints.llm_provider_handlers.anthropic_passthrough_logging_handler import (
+    from litellm_proxy.pass_through_endpoints.llm_provider_handlers.anthropic_passthrough_logging_handler import (
         AnthropicPassthroughLoggingHandler,
         PassthroughStandardLoggingPayload,
     )
@@ -310,7 +310,7 @@ def all_chunks():
 
 
 def test_handle_logging_anthropic_collected_chunks(all_chunks):
-    from litellm.proxy.pass_through_endpoints.llm_provider_handlers.anthropic_passthrough_logging_handler import (
+    from litellm_proxy.pass_through_endpoints.llm_provider_handlers.anthropic_passthrough_logging_handler import (
         AnthropicPassthroughLoggingHandler,
         PassthroughStandardLoggingPayload,
         EndpointType,
@@ -357,7 +357,7 @@ def test_handle_logging_anthropic_collected_chunks(all_chunks):
 
 
 def test_build_complete_streaming_response(all_chunks):
-    from litellm.proxy.pass_through_endpoints.llm_provider_handlers.anthropic_passthrough_logging_handler import (
+    from litellm_proxy.pass_through_endpoints.llm_provider_handlers.anthropic_passthrough_logging_handler import (
         AnthropicPassthroughLoggingHandler,
     )
     from litellm.types.utils import ModelResponse
diff --git a/tests/pass_through_unit_tests/test_unit_test_passthrough_router.py b/tests/pass_through_unit_tests/test_unit_test_passthrough_router.py
index 8e016b68d0..820a0d5144 100644
--- a/tests/pass_through_unit_tests/test_unit_test_passthrough_router.py
+++ b/tests/pass_through_unit_tests/test_unit_test_passthrough_router.py
@@ -8,7 +8,7 @@ sys.path.insert(0, os.path.abspath("../.."))  #
 
 import unittest
 from unittest.mock import patch
-from litellm.proxy.pass_through_endpoints.passthrough_endpoint_router import (
+from litellm_proxy.pass_through_endpoints.passthrough_endpoint_router import (
     PassthroughEndpointRouter,
 )
 from litellm.types.passthrough_endpoints.vertex_ai import VertexPassThroughCredentials
@@ -72,7 +72,7 @@ class TestPassthroughEndpointRouter(unittest.TestCase):
         """
         # Patch the get_secret_str function within the router's module.
         with patch(
-            "litellm.proxy.pass_through_endpoints.passthrough_endpoint_router.get_secret_str"
+            "litellm_proxy.pass_through_endpoints.passthrough_endpoint_router.get_secret_str"
         ) as mock_get_secret:
             mock_get_secret.return_value = "env_openai_key"
             # For "openai", if credentials are not set, it should fallback to the env variable.
@@ -81,7 +81,7 @@ class TestPassthroughEndpointRouter(unittest.TestCase):
             mock_get_secret.assert_called_once_with("OPENAI_API_KEY")
 
         with patch(
-            "litellm.proxy.pass_through_endpoints.passthrough_endpoint_router.get_secret_str"
+            "litellm_proxy.pass_through_endpoints.passthrough_endpoint_router.get_secret_str"
         ) as mock_get_secret:
             mock_get_secret.return_value = "env_cohere_key"
             result = self.router.get_credentials("cohere", None)
@@ -89,7 +89,7 @@ class TestPassthroughEndpointRouter(unittest.TestCase):
             mock_get_secret.assert_called_once_with("COHERE_API_KEY")
 
         with patch(
-            "litellm.proxy.pass_through_endpoints.passthrough_endpoint_router.get_secret_str"
+            "litellm_proxy.pass_through_endpoints.passthrough_endpoint_router.get_secret_str"
         ) as mock_get_secret:
             mock_get_secret.return_value = "env_anthropic_key"
             result = self.router.get_credentials("anthropic", None)
@@ -97,7 +97,7 @@ class TestPassthroughEndpointRouter(unittest.TestCase):
             mock_get_secret.assert_called_once_with("ANTHROPIC_API_KEY")
 
         with patch(
-            "litellm.proxy.pass_through_endpoints.passthrough_endpoint_router.get_secret_str"
+            "litellm_proxy.pass_through_endpoints.passthrough_endpoint_router.get_secret_str"
         ) as mock_get_secret:
             mock_get_secret.return_value = "env_azure_key"
             result = self.router.get_credentials("azure", None)
diff --git a/tests/pass_through_unit_tests/test_unit_test_streaming.py b/tests/pass_through_unit_tests/test_unit_test_streaming.py
index d3e0b6b0b0..425e8d8bed 100644
--- a/tests/pass_through_unit_tests/test_unit_test_streaming.py
+++ b/tests/pass_through_unit_tests/test_unit_test_streaming.py
@@ -15,10 +15,10 @@ from typing import AsyncGenerator
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.types.passthrough_endpoints.pass_through_endpoints import EndpointType
 from litellm.types.passthrough_endpoints.pass_through_endpoints import PassthroughStandardLoggingPayload
-from litellm.proxy.pass_through_endpoints.success_handler import (
+from litellm_proxy.pass_through_endpoints.success_handler import (
     PassThroughEndpointLogging,
 )
-from litellm.proxy.pass_through_endpoints.streaming_handler import (
+from litellm_proxy.pass_through_endpoints.streaming_handler import (
     PassThroughStreamingHandler,
 )
 
diff --git a/tests/proxy_admin_ui_tests/test_key_management.py b/tests/proxy_admin_ui_tests/test_key_management.py
index b943b9591c..be30bbcb34 100644
--- a/tests/proxy_admin_ui_tests/test_key_management.py
+++ b/tests/proxy_admin_ui_tests/test_key_management.py
@@ -26,15 +26,15 @@ import logging
 import pytest
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy.management_endpoints.team_endpoints import list_team
-from litellm.proxy._types import *
-from litellm.proxy.management_endpoints.internal_user_endpoints import (
+from litellm_proxy.management_endpoints.team_endpoints import list_team
+from litellm_proxy._types import *
+from litellm_proxy.management_endpoints.internal_user_endpoints import (
     new_user,
     user_info,
     user_update,
     get_users,
 )
-from litellm.proxy.management_endpoints.key_management_endpoints import (
+from litellm_proxy.management_endpoints.key_management_endpoints import (
     delete_key_fn,
     generate_key_fn,
     generate_key_helper_fn,
@@ -42,12 +42,12 @@ from litellm.proxy.management_endpoints.key_management_endpoints import (
     regenerate_key_fn,
     update_key_fn,
 )
-from litellm.proxy.management_endpoints.team_endpoints import (
+from litellm_proxy.management_endpoints.team_endpoints import (
     new_team,
     team_info,
     update_team,
 )
-from litellm.proxy.proxy_server import (
+from litellm_proxy.proxy_server import (
     LitellmUserRoles,
     audio_transcriptions,
     chat_completion,
@@ -58,10 +58,10 @@ from litellm.proxy.proxy_server import (
     moderations,
     user_api_key_auth,
 )
-from litellm.proxy.management_endpoints.customer_endpoints import (
+from litellm_proxy.management_endpoints.customer_endpoints import (
     new_end_user,
 )
-from litellm.proxy.spend_tracking.spend_management_endpoints import (
+from litellm_proxy.spend_tracking.spend_management_endpoints import (
     global_spend,
     global_spend_logs,
     global_spend_models,
@@ -70,14 +70,14 @@ from litellm.proxy.spend_tracking.spend_management_endpoints import (
     spend_user_fn,
     view_spend_logs,
 )
-from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
+from litellm_proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
 
 verbose_proxy_logger.setLevel(level=logging.DEBUG)
 
 from starlette.datastructures import URL
 
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     DynamoDBArgs,
     GenerateKeyRequest,
     KeyRequest,
@@ -99,7 +99,7 @@ proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
 
 @pytest.fixture
 def prisma_client():
-    from litellm.proxy.proxy_cli import append_query_params
+    from litellm_proxy.proxy_cli import append_query_params
 
     ### add connection pool + pool timeout args.
     params = {"connection_limit": 100, "pool_timeout": 60}
@@ -112,11 +112,11 @@ def prisma_client():
         database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
     )
 
-    # Reset litellm.proxy.proxy_server.prisma_client to None
-    litellm.proxy.proxy_server.litellm_proxy_budget_name = (
+    # Reset litellm_proxy.proxy_server.prisma_client to None
+    litellm_proxy.proxy_server.litellm_proxy_budget_name = (
         f"litellm-proxy-budget-{time.time()}"
     )
-    litellm.proxy.proxy_server.user_custom_key_generate = None
+    litellm_proxy.proxy_server.user_custom_key_generate = None
 
     return prisma_client
 
@@ -125,9 +125,9 @@ def prisma_client():
 @pytest.mark.asyncio()
 async def test_regenerate_api_key(prisma_client):
     litellm.set_verbose = True
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     # generate new key
     key_alias = f"test_alias_regenerate_key-{uuid.uuid4()}"
@@ -226,9 +226,9 @@ async def test_regenerate_api_key(prisma_client):
 @pytest.mark.asyncio()
 async def test_regenerate_api_key_with_new_alias_and_expiration(prisma_client):
     litellm.set_verbose = True
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
     import uuid
 
     # generate new key
@@ -277,9 +277,9 @@ async def test_regenerate_api_key_with_new_alias_and_expiration(prisma_client):
 @pytest.mark.asyncio()
 async def test_regenerate_key_ui(prisma_client):
     litellm.set_verbose = True
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
     import uuid
 
     # generate new key
@@ -335,9 +335,9 @@ async def test_get_users(prisma_client):
     Admin UI calls this endpoint to list all Internal Users
     """
     litellm.set_verbose = True
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     # Create some test users
     test_users = [
@@ -386,9 +386,9 @@ async def test_get_users_filters_dashboard_keys(prisma_client):
     The dashboard keys should be filtered out from the response
     """
     litellm.set_verbose = True
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     # Create a test user
     new_user_id = f"test_user_with_keys-{uuid.uuid4()}"
@@ -470,9 +470,9 @@ async def test_get_users_key_count(prisma_client):
     Test that verifies the key_count in get_users increases when a new key is created for a user
     """
     litellm.set_verbose = True
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     # Get initial user list and select the first user
     initial_users = await get_users(role=None, page=1, page_size=20)
@@ -523,9 +523,9 @@ async def test_list_teams(prisma_client):
     Tests /team/list endpoint to verify it returns both keys and members_with_roles
     """
     litellm.set_verbose = True
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     # Delete all existing teams first
     await cleanup_existing_teams(prisma_client)
@@ -629,18 +629,18 @@ async def test_list_teams(prisma_client):
 
 
 def test_is_team_key():
-    from litellm.proxy.management_endpoints.key_management_endpoints import _is_team_key
+    from litellm_proxy.management_endpoints.key_management_endpoints import _is_team_key
 
     assert _is_team_key(GenerateKeyRequest(team_id="test_team_id"))
     assert not _is_team_key(GenerateKeyRequest(user_id="test_user_id"))
 
 
 def test_team_key_generation_team_member_check():
-    from litellm.proxy.management_endpoints.key_management_endpoints import (
+    from litellm_proxy.management_endpoints.key_management_endpoints import (
         _team_key_generation_check,
     )
     from fastapi import HTTPException
-    from litellm.proxy._types import LiteLLM_TeamTableCachedObj
+    from litellm_proxy._types import LiteLLM_TeamTableCachedObj
 
     litellm.key_generation_settings = {
         "team_key_generation": {"allowed_team_member_roles": ["admin"]}
@@ -700,7 +700,7 @@ def test_team_key_generation_team_member_check():
 def test_key_generation_required_params_check(
     team_key_generation_settings, input_data, expected_result, key_type
 ):
-    from litellm.proxy.management_endpoints.key_management_endpoints import (
+    from litellm_proxy.management_endpoints.key_management_endpoints import (
         _team_key_generation_check,
         _personal_key_generation_check,
     )
@@ -709,7 +709,7 @@ def test_key_generation_required_params_check(
         StandardKeyGenerationConfig,
         PersonalUIKeyGenerationConfig,
     )
-    from litellm.proxy._types import LiteLLM_TeamTableCachedObj
+    from litellm_proxy._types import LiteLLM_TeamTableCachedObj
     from fastapi import HTTPException
 
     user_api_key_dict = UserAPIKeyAuth(
@@ -767,7 +767,7 @@ def test_key_generation_required_params_check(
 
 
 def test_personal_key_generation_check():
-    from litellm.proxy.management_endpoints.key_management_endpoints import (
+    from litellm_proxy.management_endpoints.key_management_endpoints import (
         _personal_key_generation_check,
     )
     from fastapi import HTTPException
@@ -820,7 +820,7 @@ def test_personal_key_generation_check():
 def test_prepare_metadata_fields(
     update_request_data, non_default_values, existing_metadata, expected_result
 ):
-    from litellm.proxy.management_endpoints.key_management_endpoints import (
+    from litellm_proxy.management_endpoints.key_management_endpoints import (
         prepare_metadata_fields,
     )
 
@@ -838,14 +838,14 @@ def test_prepare_metadata_fields(
 
 @pytest.mark.asyncio
 async def test_key_update_with_model_specific_params(prisma_client):
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
-    from litellm.proxy.management_endpoints.key_management_endpoints import (
+    from litellm_proxy.management_endpoints.key_management_endpoints import (
         update_key_fn,
     )
-    from litellm.proxy._types import UpdateKeyRequest
+    from litellm_proxy._types import UpdateKeyRequest
 
     new_key = await generate_key_fn(
         data=GenerateKeyRequest(models=["gpt-4"]),
@@ -919,14 +919,14 @@ async def test_list_key_helper(prisma_client):
     4. Filtering by key_alias
     5. Return full object vs token only
     """
-    from litellm.proxy.management_endpoints.key_management_endpoints import (
+    from litellm_proxy.management_endpoints.key_management_endpoints import (
         _list_key_helper,
     )
 
     # Setup - create multiple test keys
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     # Create test data
     test_user_id = f"test_user_{uuid.uuid4()}"
@@ -1069,15 +1069,15 @@ async def test_list_key_helper_team_filtering(prisma_client):
     3. Verify keys with team_id=None are included
     4. Test with pagination to ensure behavior is consistent across pages
     """
-    from litellm.proxy.management_endpoints.key_management_endpoints import (
+    from litellm_proxy.management_endpoints.key_management_endpoints import (
         _list_key_helper,
     )
     import uuid
 
     # Setup
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     # Create test data with different team_ids
     test_keys = []
@@ -1180,13 +1180,13 @@ async def test_list_key_helper_team_filtering(prisma_client):
 
 
 @pytest.mark.asyncio
-@patch("litellm.proxy.management_endpoints.key_management_endpoints.get_team_object")
+@patch("litellm_proxy.management_endpoints.key_management_endpoints.get_team_object")
 async def test_key_generate_always_db_team(mock_get_team_object):
-    from litellm.proxy.management_endpoints.key_management_endpoints import (
+    from litellm_proxy.management_endpoints.key_management_endpoints import (
         generate_key_fn,
     )
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", MagicMock())
+    setattr(litellm_proxy.proxy_server, "prisma_client", MagicMock())
     mock_get_team_object.return_value = None
     try:
         await generate_key_fn(
@@ -1222,9 +1222,9 @@ async def test_team_model_alias(prisma_client, requested_model, should_pass):
     3. Verify chat completion request works with aliased model = `gpt-4o`
     """
     litellm.set_verbose = True
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     # Create team with model alias
     team_id = f"test_team_{uuid.uuid4()}"
diff --git a/tests/proxy_admin_ui_tests/test_role_based_access.py b/tests/proxy_admin_ui_tests/test_role_based_access.py
index ff73143bf4..603ef703e2 100644
--- a/tests/proxy_admin_ui_tests/test_role_based_access.py
+++ b/tests/proxy_admin_ui_tests/test_role_based_access.py
@@ -29,8 +29,8 @@ import pytest
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy.auth.auth_checks import get_user_object
-from litellm.proxy.management_endpoints.key_management_endpoints import (
+from litellm_proxy.auth.auth_checks import get_user_object
+from litellm_proxy.management_endpoints.key_management_endpoints import (
     delete_key_fn,
     generate_key_fn,
     generate_key_helper_fn,
@@ -38,18 +38,18 @@ from litellm.proxy.management_endpoints.key_management_endpoints import (
     regenerate_key_fn,
     update_key_fn,
 )
-from litellm.proxy.management_endpoints.internal_user_endpoints import new_user
-from litellm.proxy.management_endpoints.organization_endpoints import (
+from litellm_proxy.management_endpoints.internal_user_endpoints import new_user
+from litellm_proxy.management_endpoints.organization_endpoints import (
     new_organization,
     organization_member_add,
 )
 
-from litellm.proxy.management_endpoints.team_endpoints import (
+from litellm_proxy.management_endpoints.team_endpoints import (
     new_team,
     team_info,
     update_team,
 )
-from litellm.proxy.proxy_server import (
+from litellm_proxy.proxy_server import (
     LitellmUserRoles,
     audio_transcriptions,
     chat_completion,
@@ -60,10 +60,10 @@ from litellm.proxy.proxy_server import (
     moderations,
     user_api_key_auth,
 )
-from litellm.proxy.management_endpoints.customer_endpoints import (
+from litellm_proxy.management_endpoints.customer_endpoints import (
     new_end_user,
 )
-from litellm.proxy.spend_tracking.spend_management_endpoints import (
+from litellm_proxy.spend_tracking.spend_management_endpoints import (
     global_spend,
     global_spend_logs,
     global_spend_models,
@@ -74,21 +74,21 @@ from litellm.proxy.spend_tracking.spend_management_endpoints import (
 )
 from starlette.datastructures import URL
 
-from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
+from litellm_proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
 
 verbose_proxy_logger.setLevel(level=logging.DEBUG)
 
 from starlette.datastructures import URL
 
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import *
+from litellm_proxy._types import *
 
 proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
 
 
 @pytest.fixture
 def prisma_client():
-    from litellm.proxy.proxy_cli import append_query_params
+    from litellm_proxy.proxy_cli import append_query_params
 
     ### add connection pool + pool timeout args
     params = {"connection_limit": 100, "pool_timeout": 60}
@@ -101,11 +101,11 @@ def prisma_client():
         database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
     )
 
-    # Reset litellm.proxy.proxy_server.prisma_client to None
-    litellm.proxy.proxy_server.litellm_proxy_budget_name = (
+    # Reset litellm_proxy.proxy_server.prisma_client to None
+    litellm_proxy.proxy_server.litellm_proxy_budget_name = (
         f"litellm-proxy-budget-{time.time()}"
     )
-    litellm.proxy.proxy_server.user_custom_key_generate = None
+    litellm_proxy.proxy_server.user_custom_key_generate = None
 
     return prisma_client
 
@@ -139,10 +139,10 @@ async def test_create_new_user_in_organization(prisma_client, user_role):
     Add a member to an organization and assert the user object is created with the correct organization memberships / roles
     """
     master_key = "sk-1234"
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", master_key)
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", master_key)
 
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     created_user_id = f"new-user-{uuid.uuid4()}"
 
@@ -201,10 +201,10 @@ async def test_org_admin_create_team_permissions(prisma_client):
     import json
 
     master_key = "sk-1234"
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", master_key)
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", master_key)
 
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     response = await new_organization(
         data=NewOrganizationRequest(
@@ -272,10 +272,10 @@ async def test_org_admin_create_user_permissions(prisma_client):
     import json
 
     master_key = "sk-1234"
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", master_key)
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", master_key)
 
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     # create new org
     response = await new_organization(
@@ -343,10 +343,10 @@ async def test_org_admin_create_user_team_wrong_org_permissions(prisma_client):
     import json
 
     master_key = "sk-1234"
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", master_key)
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", master_key)
 
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
     created_user_id = f"new-user-{uuid.uuid4()}"
     response = await new_organization(
         data=NewOrganizationRequest(
@@ -486,9 +486,9 @@ async def test_user_role_permissions(prisma_client, route, user_role, expected_r
     """Test user role based permissions for different routes"""
     try:
         # Setup
-        setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-        setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-        await litellm.proxy.proxy_server.prisma_client.connect()
+        setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+        setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+        await litellm_proxy.proxy_server.prisma_client.connect()
 
         # Admin - admin creates a new user
         user_api_key_dict = UserAPIKeyAuth(
diff --git a/tests/proxy_admin_ui_tests/test_route_check_unit_tests.py b/tests/proxy_admin_ui_tests/test_route_check_unit_tests.py
index 937eb6f298..e731feb1a4 100644
--- a/tests/proxy_admin_ui_tests/test_route_check_unit_tests.py
+++ b/tests/proxy_admin_ui_tests/test_route_check_unit_tests.py
@@ -25,14 +25,14 @@ import logging
 
 from fastapi import HTTPException, Request
 import pytest
-from litellm.proxy.auth.route_checks import RouteChecks
-from litellm.proxy._types import LiteLLM_UserTable, LitellmUserRoles, UserAPIKeyAuth
-from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+from litellm_proxy.auth.route_checks import RouteChecks
+from litellm_proxy._types import LiteLLM_UserTable, LitellmUserRoles, UserAPIKeyAuth
+from litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints import (
     router as llm_passthrough_router,
 )
 
 # Replace the actual hash_token function with our mock
-import litellm.proxy.auth.route_checks
+import litellm_proxy.auth.route_checks
 
 
 # Mock objects and functions
@@ -45,7 +45,7 @@ def mock_hash_token(token):
     return token
 
 
-litellm.proxy.auth.route_checks.hash_token = mock_hash_token
+litellm_proxy.auth.route_checks.hash_token = mock_hash_token
 
 
 # Test is_llm_api_route
diff --git a/tests/proxy_admin_ui_tests/test_sso_sign_in.py b/tests/proxy_admin_ui_tests/test_sso_sign_in.py
index 5de198b04b..38e4a9be1c 100644
--- a/tests/proxy_admin_ui_tests/test_sso_sign_in.py
+++ b/tests/proxy_admin_ui_tests/test_sso_sign_in.py
@@ -10,10 +10,10 @@ sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import litellm
-from litellm.proxy.proxy_server import app
-from litellm.proxy.utils import PrismaClient, ProxyLogging
-from litellm.proxy.management_endpoints.ui_sso import auth_callback
-from litellm.proxy._types import LitellmUserRoles
+from litellm_proxy.proxy_server import app
+from litellm_proxy.utils import PrismaClient, ProxyLogging
+from litellm_proxy.management_endpoints.ui_sso import auth_callback
+from litellm_proxy._types import LitellmUserRoles
 import os
 import jwt
 import time
@@ -32,7 +32,7 @@ def mock_env_vars(monkeypatch):
 
 @pytest.fixture
 def prisma_client():
-    from litellm.proxy.proxy_cli import append_query_params
+    from litellm_proxy.proxy_cli import append_query_params
 
     ### add connection pool + pool timeout args
     params = {"connection_limit": 100, "pool_timeout": 60}
@@ -45,11 +45,11 @@ def prisma_client():
         database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
     )
 
-    # Reset litellm.proxy.proxy_server.prisma_client to None
-    litellm.proxy.proxy_server.litellm_proxy_budget_name = (
+    # Reset litellm_proxy.proxy_server.prisma_client to None
+    litellm_proxy.proxy_server.litellm_proxy_budget_name = (
         f"litellm-proxy-budget-{time.time()}"
     )
-    litellm.proxy.proxy_server.user_custom_key_generate = None
+    litellm_proxy.proxy_server.user_custom_key_generate = None
 
     return prisma_client
 
@@ -71,11 +71,11 @@ async def test_auth_callback_new_user(mock_google_sso, mock_env_vars, prisma_cli
 
     try:
         # Set up the prisma client
-        setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-        await litellm.proxy.proxy_server.prisma_client.connect()
+        setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+        await litellm_proxy.proxy_server.prisma_client.connect()
 
         # Set up the master key
-        litellm.proxy.proxy_server.master_key = "mock_master_key"
+        litellm_proxy.proxy_server.master_key = "mock_master_key"
 
         # Mock the GoogleSSO verify_and_process method
         mock_sso_result = MagicMock()
@@ -141,14 +141,14 @@ async def test_auth_callback_new_user_with_sso_default(
 
     try:
         # Set up the prisma client
-        setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+        setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
         litellm.default_internal_user_params = {
             "user_role": LitellmUserRoles.INTERNAL_USER.value
         }
-        await litellm.proxy.proxy_server.prisma_client.connect()
+        await litellm_proxy.proxy_server.prisma_client.connect()
 
         # Set up the master key
-        litellm.proxy.proxy_server.master_key = "mock_master_key"
+        litellm_proxy.proxy_server.master_key = "mock_master_key"
 
         # Mock the GoogleSSO verify_and_process method
         mock_sso_result = MagicMock()
diff --git a/tests/proxy_admin_ui_tests/test_usage_endpoints.py b/tests/proxy_admin_ui_tests/test_usage_endpoints.py
index cd704e49cc..3a0dde5c36 100644
--- a/tests/proxy_admin_ui_tests/test_usage_endpoints.py
+++ b/tests/proxy_admin_ui_tests/test_usage_endpoints.py
@@ -40,12 +40,12 @@ import pytest
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy.management_endpoints.internal_user_endpoints import (
+from litellm_proxy.management_endpoints.internal_user_endpoints import (
     new_user,
     user_info,
     user_update,
 )
-from litellm.proxy.management_endpoints.key_management_endpoints import (
+from litellm_proxy.management_endpoints.key_management_endpoints import (
     delete_key_fn,
     generate_key_fn,
     generate_key_helper_fn,
@@ -53,12 +53,12 @@ from litellm.proxy.management_endpoints.key_management_endpoints import (
     regenerate_key_fn,
     update_key_fn,
 )
-from litellm.proxy.management_endpoints.team_endpoints import (
+from litellm_proxy.management_endpoints.team_endpoints import (
     new_team,
     team_info,
     update_team,
 )
-from litellm.proxy.proxy_server import (
+from litellm_proxy.proxy_server import (
     LitellmUserRoles,
     audio_transcriptions,
     chat_completion,
@@ -69,10 +69,10 @@ from litellm.proxy.proxy_server import (
     moderations,
     user_api_key_auth,
 )
-from litellm.proxy.management_endpoints.customer_endpoints import (
+from litellm_proxy.management_endpoints.customer_endpoints import (
     new_end_user,
 )
-from litellm.proxy.spend_tracking.spend_management_endpoints import (
+from litellm_proxy.spend_tracking.spend_management_endpoints import (
     global_spend,
     global_spend_logs,
     global_spend_models,
@@ -81,14 +81,14 @@ from litellm.proxy.spend_tracking.spend_management_endpoints import (
     spend_user_fn,
     view_spend_logs,
 )
-from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
+from litellm_proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
 
 verbose_proxy_logger.setLevel(level=logging.DEBUG)
 
 from starlette.datastructures import URL
 
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     DynamoDBArgs,
     GenerateKeyRequest,
     RegenerateKeyRequest,
@@ -110,7 +110,7 @@ proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
 
 @pytest.fixture
 def prisma_client():
-    from litellm.proxy.proxy_cli import append_query_params
+    from litellm_proxy.proxy_cli import append_query_params
 
     ### add connection pool + pool timeout args
     params = {"connection_limit": 100, "pool_timeout": 60}
@@ -123,11 +123,11 @@ def prisma_client():
         database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
     )
 
-    # Reset litellm.proxy.proxy_server.prisma_client to None
-    litellm.proxy.proxy_server.litellm_proxy_budget_name = (
+    # Reset litellm_proxy.proxy_server.prisma_client to None
+    litellm_proxy.proxy_server.litellm_proxy_budget_name = (
         f"litellm-proxy-budget-{time.time()}"
     )
-    litellm.proxy.proxy_server.user_custom_key_generate = None
+    litellm_proxy.proxy_server.user_custom_key_generate = None
 
     return prisma_client
 
@@ -135,11 +135,11 @@ def prisma_client():
 @pytest.mark.asyncio()
 async def test_view_daily_spend_ui(prisma_client):
     print("prisma client=", prisma_client)
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
 
-    await litellm.proxy.proxy_server.prisma_client.connect()
-    from litellm.proxy.proxy_server import user_api_key_cache
+    await litellm_proxy.proxy_server.prisma_client.connect()
+    from litellm_proxy.proxy_server import user_api_key_cache
 
     spend_logs_for_admin = await global_spend_logs(
         user_api_key_dict=UserAPIKeyAuth(
@@ -179,10 +179,10 @@ async def test_view_daily_spend_ui(prisma_client):
 @pytest.mark.asyncio
 async def test_global_spend_models(prisma_client):
     print("prisma client=", prisma_client)
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
 
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     # Test for admin user
     models_spend_for_admin = await global_spend_models(
@@ -271,10 +271,10 @@ async def test_global_spend_models(prisma_client):
 @pytest.mark.asyncio
 async def test_global_spend_keys(prisma_client):
     print("prisma client=", prisma_client)
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
 
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     # Test for admin user
     keys_spend_for_admin = await global_spend_keys(
diff --git a/tests/proxy_security_tests/test_master_key_not_in_db.py b/tests/proxy_security_tests/test_master_key_not_in_db.py
index e563b735a2..c32d14b04d 100644
--- a/tests/proxy_security_tests/test_master_key_not_in_db.py
+++ b/tests/proxy_security_tests/test_master_key_not_in_db.py
@@ -1,7 +1,7 @@
 import os
 import pytest
 from fastapi.testclient import TestClient
-from litellm.proxy.proxy_server import app, ProxyLogging
+from litellm_proxy.proxy_server import app, ProxyLogging
 from litellm.caching import DualCache
 
 TEST_DB_ENV_VAR_NAME = "MASTER_KEY_CHECK_DB_URL"
@@ -35,7 +35,7 @@ async def test_master_key_not_inserted(test_client):
     response = test_client.get("/health/liveliness")
     assert response.status_code == 200
 
-    from litellm.proxy.utils import PrismaClient
+    from litellm_proxy.utils import PrismaClient
 
     prisma_client = PrismaClient(
         database_url=os.environ[TEST_DB_ENV_VAR_NAME],
diff --git a/tests/proxy_unit_tests/conftest copy.py b/tests/proxy_unit_tests/conftest copy.py
index 1421700c9a..be69ad2d96 100644
--- a/tests/proxy_unit_tests/conftest copy.py	
+++ b/tests/proxy_unit_tests/conftest copy.py	
@@ -28,9 +28,9 @@ def setup_and_teardown():
     importlib.reload(litellm)
     try:
         if hasattr(litellm, "proxy") and hasattr(litellm.proxy, "proxy_server"):
-            importlib.reload(litellm.proxy.proxy_server)
+            importlib.reload(litellm_proxy.proxy_server)
     except Exception as e:
-        print(f"Error reloading litellm.proxy.proxy_server: {e}")
+        print(f"Error reloading litellm_proxy.proxy_server: {e}")
 
     import asyncio
 
diff --git a/tests/proxy_unit_tests/conftest.py b/tests/proxy_unit_tests/conftest.py
index 1421700c9a..be69ad2d96 100644
--- a/tests/proxy_unit_tests/conftest.py
+++ b/tests/proxy_unit_tests/conftest.py
@@ -28,9 +28,9 @@ def setup_and_teardown():
     importlib.reload(litellm)
     try:
         if hasattr(litellm, "proxy") and hasattr(litellm.proxy, "proxy_server"):
-            importlib.reload(litellm.proxy.proxy_server)
+            importlib.reload(litellm_proxy.proxy_server)
     except Exception as e:
-        print(f"Error reloading litellm.proxy.proxy_server: {e}")
+        print(f"Error reloading litellm_proxy.proxy_server: {e}")
 
     import asyncio
 
diff --git a/tests/proxy_unit_tests/test_aproxy_startup.py b/tests/proxy_unit_tests/test_aproxy_startup.py
index 4dbf5b462a..1f1a40be92 100644
--- a/tests/proxy_unit_tests/test_aproxy_startup.py
+++ b/tests/proxy_unit_tests/test_aproxy_startup.py
@@ -14,7 +14,7 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 import pytest, logging, asyncio
 import litellm
-from litellm.proxy.proxy_server import (
+from litellm_proxy.proxy_server import (
     router,
     save_worker_config,
     initialize,
@@ -39,7 +39,7 @@ async def test_proxy_gunicorn_startup_direct_config():
 
         # unset set DATABASE_URL in env for this test
         # set prisma client to None
-        setattr(litellm.proxy.proxy_server, "prisma_client", None)
+        setattr(litellm_proxy.proxy_server, "prisma_client", None)
         database_url = os.environ.pop("DATABASE_URL", None)
 
         verbose_proxy_logger.setLevel(level=logging.DEBUG)
@@ -71,7 +71,7 @@ async def test_proxy_gunicorn_startup_config_dict():
         verbose_router_logger.setLevel(level=logging.DEBUG)
         # unset set DATABASE_URL in env for this test
         # set prisma client to None
-        setattr(litellm.proxy.proxy_server, "prisma_client", None)
+        setattr(litellm_proxy.proxy_server, "prisma_client", None)
         database_url = os.environ.pop("DATABASE_URL", None)
 
         filepath = os.path.dirname(os.path.abspath(__file__))
diff --git a/tests/proxy_unit_tests/test_audit_logs_proxy.py b/tests/proxy_unit_tests/test_audit_logs_proxy.py
index 02303e13db..8dfbf73804 100644
--- a/tests/proxy_unit_tests/test_audit_logs_proxy.py
+++ b/tests/proxy_unit_tests/test_audit_logs_proxy.py
@@ -28,7 +28,7 @@ import uuid
 import litellm
 from litellm._logging import verbose_proxy_logger
 
-from litellm.proxy.proxy_server import (
+from litellm_proxy.proxy_server import (
     LitellmUserRoles,
     audio_transcriptions,
     chat_completion,
@@ -40,14 +40,14 @@ from litellm.proxy.proxy_server import (
     user_api_key_auth,
 )
 
-from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
+from litellm_proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
 
 verbose_proxy_logger.setLevel(level=logging.DEBUG)
 
 from starlette.datastructures import URL
 
-from litellm.proxy.management_helpers.audit_logs import create_audit_log_for_update
-from litellm.proxy._types import LiteLLM_AuditLogs, LitellmTableNames
+from litellm_proxy.management_helpers.audit_logs import create_audit_log_for_update
+from litellm_proxy._types import LiteLLM_AuditLogs, LitellmTableNames
 from litellm.caching.caching import DualCache
 from unittest.mock import patch, AsyncMock
 
@@ -62,9 +62,9 @@ async def test_create_audit_log_for_update_premium_user():
 
     Test that the audit log is created when a premium user updates a team
     """
-    with patch("litellm.proxy.proxy_server.premium_user", True), patch(
+    with patch("litellm_proxy.proxy_server.premium_user", True), patch(
         "litellm.store_audit_logs", True
-    ), patch("litellm.proxy.proxy_server.prisma_client") as mock_prisma:
+    ), patch("litellm_proxy.proxy_server.prisma_client") as mock_prisma:
 
         mock_prisma.db.litellm_auditlog.create = AsyncMock()
 
@@ -97,7 +97,7 @@ async def test_create_audit_log_for_update_premium_user():
 
 @pytest.fixture
 def prisma_client():
-    from litellm.proxy.proxy_cli import append_query_params
+    from litellm_proxy.proxy_cli import append_query_params
 
     ### add connection pool + pool timeout args
     params = {"connection_limit": 100, "pool_timeout": 60}
@@ -117,12 +117,12 @@ def prisma_client():
 async def test_create_audit_log_in_db(prisma_client):
     print("prisma client=", prisma_client)
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "premium_user", True)
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "premium_user", True)
     setattr(litellm, "store_audit_logs", True)
 
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
     audit_log_id = f"audit_log_id_{uuid.uuid4()}"
 
     # create a audit log for /key/generate
diff --git a/tests/proxy_unit_tests/test_auth_checks.py b/tests/proxy_unit_tests/test_auth_checks.py
index 7695306c87..be8edbd496 100644
--- a/tests/proxy_unit_tests/test_auth_checks.py
+++ b/tests/proxy_unit_tests/test_auth_checks.py
@@ -13,22 +13,22 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 import pytest, litellm
 import httpx
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.auth.auth_checks import get_end_user_object
+from litellm_proxy._types import UserAPIKeyAuth
+from litellm_proxy.auth.auth_checks import get_end_user_object
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     LiteLLM_EndUserTable,
     LiteLLM_BudgetTable,
     LiteLLM_UserTable,
     LiteLLM_TeamTable,
 )
-from litellm.proxy.utils import PrismaClient
-from litellm.proxy.auth.auth_checks import (
+from litellm_proxy.utils import PrismaClient
+from litellm_proxy.auth.auth_checks import (
     can_team_access_model,
     _virtual_key_soft_budget_check,
 )
-from litellm.proxy.utils import ProxyLogging
-from litellm.proxy.utils import CallInfo
+from litellm_proxy.utils import ProxyLogging
+from litellm_proxy.utils import CallInfo
 
 
 @pytest.mark.parametrize("customer_spend, customer_budget", [(0, 10), (10, 0)])
@@ -87,7 +87,7 @@ async def test_can_key_call_model(model, expect_to_work):
     """
     If wildcard model + specific model is used, choose the specific model settings
     """
-    from litellm.proxy.auth.auth_checks import can_key_call_model
+    from litellm_proxy.auth.auth_checks import can_key_call_model
     from fastapi import HTTPException
 
     llm_model_list = [
@@ -140,7 +140,7 @@ async def test_can_key_call_model(model, expect_to_work):
 )
 @pytest.mark.asyncio
 async def test_can_team_call_model(model, expect_to_work):
-    from litellm.proxy.auth.auth_checks import model_in_access_group
+    from litellm_proxy.auth.auth_checks import model_in_access_group
     from fastapi import HTTPException
 
     llm_model_list = [
@@ -195,7 +195,7 @@ async def test_can_team_call_model(model, expect_to_work):
 )
 @pytest.mark.asyncio
 async def test_can_key_call_model_wildcard_access(key_models, model, expect_to_work):
-    from litellm.proxy.auth.auth_checks import can_key_call_model
+    from litellm_proxy.auth.auth_checks import can_key_call_model
     from fastapi import HTTPException
 
     llm_model_list = [
@@ -260,7 +260,7 @@ async def test_can_key_call_model_wildcard_access(key_models, model, expect_to_w
 
 @pytest.mark.asyncio
 async def test_is_valid_fallback_model():
-    from litellm.proxy.auth.auth_checks import is_valid_fallback_model
+    from litellm_proxy.auth.auth_checks import is_valid_fallback_model
     from litellm import Router
 
     router = Router(
@@ -305,8 +305,8 @@ async def test_virtual_key_max_budget_check(
     1. Triggers budget alert for all cases
     2. Raises BudgetExceededError when spend >= max_budget
     """
-    from litellm.proxy.auth.auth_checks import _virtual_key_max_budget_check
-    from litellm.proxy.utils import ProxyLogging
+    from litellm_proxy.auth.auth_checks import _virtual_key_max_budget_check
+    from litellm_proxy.utils import ProxyLogging
 
     # Setup test data
     valid_token = UserAPIKeyAuth(
@@ -479,8 +479,8 @@ async def test_virtual_key_soft_budget_check(spend, soft_budget, expect_alert):
 
 @pytest.mark.asyncio
 async def test_can_user_call_model():
-    from litellm.proxy.auth.auth_checks import can_user_call_model
-    from litellm.proxy._types import ProxyException
+    from litellm_proxy.auth.auth_checks import can_user_call_model
+    from litellm_proxy._types import ProxyException
     from litellm import Router
 
     router = Router(
@@ -519,8 +519,8 @@ async def test_can_user_call_model():
 
 @pytest.mark.asyncio
 async def test_can_user_call_model_with_no_default_models():
-    from litellm.proxy.auth.auth_checks import can_user_call_model
-    from litellm.proxy._types import ProxyException, SpecialModelNames
+    from litellm_proxy.auth.auth_checks import can_user_call_model
+    from litellm_proxy._types import ProxyException, SpecialModelNames
     from unittest.mock import MagicMock
 
     args = {
@@ -543,8 +543,8 @@ async def test_can_user_call_model_with_no_default_models():
 
 @pytest.mark.asyncio
 async def test_get_fuzzy_user_object():
-    from litellm.proxy.auth.auth_checks import _get_fuzzy_user_object
-    from litellm.proxy.utils import PrismaClient
+    from litellm_proxy.auth.auth_checks import _get_fuzzy_user_object
+    from litellm_proxy.utils import PrismaClient
     from unittest.mock import AsyncMock, MagicMock
 
     # Setup mock Prisma client
@@ -638,7 +638,7 @@ async def test_can_key_call_model_with_aliases(model, alias_map, expect_to_work)
     """
     Test if can_key_call_model correctly handles model aliases in the token
     """
-    from litellm.proxy.auth.auth_checks import can_key_call_model
+    from litellm_proxy.auth.auth_checks import can_key_call_model
 
     llm_model_list = [
         {
diff --git a/tests/proxy_unit_tests/test_banned_keyword_list.py b/tests/proxy_unit_tests/test_banned_keyword_list.py
index 90066b74f6..2e9bcb00c9 100644
--- a/tests/proxy_unit_tests/test_banned_keyword_list.py
+++ b/tests/proxy_unit_tests/test_banned_keyword_list.py
@@ -15,12 +15,12 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 import pytest
 import litellm
-from litellm.proxy.enterprise.enterprise_hooks.banned_keywords import (
+from enterprise.enterprise_hooks.banned_keywords import (
     _ENTERPRISE_BannedKeywords,
 )
 from litellm import Router, mock_completion
-from litellm.proxy.utils import ProxyLogging, hash_token
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy.utils import ProxyLogging, hash_token
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.caching.caching import DualCache
 
 
diff --git a/tests/proxy_unit_tests/test_configs/custom_auth.py b/tests/proxy_unit_tests/test_configs/custom_auth.py
index 1b6bec43b5..86c83c8e66 100644
--- a/tests/proxy_unit_tests/test_configs/custom_auth.py
+++ b/tests/proxy_unit_tests/test_configs/custom_auth.py
@@ -1,4 +1,4 @@
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from fastapi import Request
 from dotenv import load_dotenv
 import os
diff --git a/tests/proxy_unit_tests/test_e2e_pod_lock_manager.py b/tests/proxy_unit_tests/test_e2e_pod_lock_manager.py
index 061da8c186..5851296178 100644
--- a/tests/proxy_unit_tests/test_e2e_pod_lock_manager.py
+++ b/tests/proxy_unit_tests/test_e2e_pod_lock_manager.py
@@ -24,16 +24,16 @@ import asyncio
 import logging
 
 import pytest
-from litellm.proxy.db.db_transaction_queue.pod_lock_manager import PodLockManager
+from litellm_proxy.db.db_transaction_queue.pod_lock_manager import PodLockManager
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy.management_endpoints.internal_user_endpoints import (
+from litellm_proxy.management_endpoints.internal_user_endpoints import (
     new_user,
     user_info,
     user_update,
 )
-from litellm.proxy.auth.auth_checks import get_key_object
-from litellm.proxy.management_endpoints.key_management_endpoints import (
+from litellm_proxy.auth.auth_checks import get_key_object
+from litellm_proxy.management_endpoints.key_management_endpoints import (
     delete_key_fn,
     generate_key_fn,
     generate_key_helper_fn,
@@ -42,12 +42,12 @@ from litellm.proxy.management_endpoints.key_management_endpoints import (
     regenerate_key_fn,
     update_key_fn,
 )
-from litellm.proxy.management_endpoints.team_endpoints import (
+from litellm_proxy.management_endpoints.team_endpoints import (
     new_team,
     team_info,
     update_team,
 )
-from litellm.proxy.proxy_server import (
+from litellm_proxy.proxy_server import (
     LitellmUserRoles,
     audio_transcriptions,
     chat_completion,
@@ -58,23 +58,23 @@ from litellm.proxy.proxy_server import (
     moderations,
     user_api_key_auth,
 )
-from litellm.proxy.management_endpoints.customer_endpoints import (
+from litellm_proxy.management_endpoints.customer_endpoints import (
     new_end_user,
 )
-from litellm.proxy.spend_tracking.spend_management_endpoints import (
+from litellm_proxy.spend_tracking.spend_management_endpoints import (
     global_spend,
     spend_key_fn,
     spend_user_fn,
     view_spend_logs,
 )
-from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
+from litellm_proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
 
 verbose_proxy_logger.setLevel(level=logging.DEBUG)
 
 from starlette.datastructures import URL
 
 from litellm.caching.caching import DualCache, RedisCache
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     DynamoDBArgs,
     GenerateKeyRequest,
     KeyRequest,
@@ -109,7 +109,7 @@ global_redis_cache = RedisCache(
 
 @pytest.fixture
 def prisma_client():
-    from litellm.proxy.proxy_cli import append_query_params
+    from litellm_proxy.proxy_cli import append_query_params
 
     ### add connection pool + pool timeout args
     params = {"connection_limit": 100, "pool_timeout": 60}
@@ -122,19 +122,19 @@ def prisma_client():
         database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
     )
 
-    # Reset litellm.proxy.proxy_server.prisma_client to None
-    litellm.proxy.proxy_server.litellm_proxy_budget_name = (
+    # Reset litellm_proxy.proxy_server.prisma_client to None
+    litellm_proxy.proxy_server.litellm_proxy_budget_name = (
         f"litellm-proxy-budget-{time.time()}"
     )
-    litellm.proxy.proxy_server.user_custom_key_generate = None
+    litellm_proxy.proxy_server.user_custom_key_generate = None
 
     return prisma_client
 
 
 async def setup_db_connection(prisma_client):
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
 
 @pytest.mark.asyncio
diff --git a/tests/proxy_unit_tests/test_jwt.py b/tests/proxy_unit_tests/test_jwt.py
index d96fb691f7..a1cfa5841e 100644
--- a/tests/proxy_unit_tests/test_jwt.py
+++ b/tests/proxy_unit_tests/test_jwt.py
@@ -26,15 +26,15 @@ from fastapi.routing import APIRoute
 from fastapi.responses import Response
 import litellm
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     LiteLLM_JWTAuth,
     LiteLLM_UserTable,
     LiteLLMRoutes,
     JWTAuthBuilderResult,
 )
-from litellm.proxy.auth.handle_jwt import JWTHandler, JWTAuthManager
-from litellm.proxy.management_endpoints.team_endpoints import new_team
-from litellm.proxy.proxy_server import chat_completion
+from litellm_proxy.auth.handle_jwt import JWTHandler, JWTAuthManager
+from litellm_proxy.management_endpoints.team_endpoints import new_team
+from litellm_proxy.proxy_server import chat_completion
 from typing import Literal
 
 public_key = {
@@ -218,8 +218,8 @@ async def test_valid_invalid_token(audience, monkeypatch):
 @pytest.fixture
 def prisma_client():
     import litellm
-    from litellm.proxy.proxy_cli import append_query_params
-    from litellm.proxy.utils import PrismaClient, ProxyLogging
+    from litellm_proxy.proxy_cli import append_query_params
+    from litellm_proxy.utils import PrismaClient, ProxyLogging
 
     proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
 
@@ -250,8 +250,8 @@ def team_token_tuple():
     from starlette.datastructures import URL
 
     import litellm
-    from litellm.proxy._types import NewTeamRequest, UserAPIKeyAuth
-    from litellm.proxy.proxy_server import user_api_key_auth
+    from litellm_proxy._types import NewTeamRequest, UserAPIKeyAuth
+    from litellm_proxy.proxy_server import user_api_key_auth
 
     # Generate a private / public key pair using RSA algorithm
     key = rsa.generate_private_key(
@@ -315,11 +315,11 @@ async def test_team_token_output(prisma_client, audience, monkeypatch):
     from starlette.datastructures import URL
 
     import litellm
-    from litellm.proxy._types import NewTeamRequest, UserAPIKeyAuth
-    from litellm.proxy.proxy_server import user_api_key_auth
+    from litellm_proxy._types import NewTeamRequest, UserAPIKeyAuth
+    from litellm_proxy.proxy_server import user_api_key_auth
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     os.environ.pop("JWT_AUDIENCE", None)
     if audience:
@@ -419,13 +419,13 @@ async def test_team_token_output(prisma_client, audience, monkeypatch):
     ## 1. INITIAL TEAM CALL - should fail
     # use generated key to auth in
     setattr(
-        litellm.proxy.proxy_server,
+        litellm_proxy.proxy_server,
         "general_settings",
         {
             "enable_jwt_auth": True,
         },
     )
-    setattr(litellm.proxy.proxy_server, "jwt_handler", jwt_handler)
+    setattr(litellm_proxy.proxy_server, "jwt_handler", jwt_handler)
     try:
         result = await user_api_key_auth(request=request, api_key=bearer_token)
         pytest.fail("Team doesn't exist. This should fail")
@@ -501,15 +501,15 @@ async def aaaatest_user_token_output(
     from starlette.datastructures import URL
 
     import litellm
-    from litellm.proxy._types import NewTeamRequest, NewUserRequest, UserAPIKeyAuth
-    from litellm.proxy.management_endpoints.internal_user_endpoints import (
+    from litellm_proxy._types import NewTeamRequest, NewUserRequest, UserAPIKeyAuth
+    from litellm_proxy.management_endpoints.internal_user_endpoints import (
         new_user,
         user_info,
     )
-    from litellm.proxy.proxy_server import user_api_key_auth
+    from litellm_proxy.proxy_server import user_api_key_auth
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     os.environ.pop("JWT_AUDIENCE", None)
     if audience:
@@ -619,8 +619,8 @@ async def aaaatest_user_token_output(
 
     ## 1. INITIAL TEAM CALL - should fail
     # use generated key to auth in
-    setattr(litellm.proxy.proxy_server, "general_settings", {"enable_jwt_auth": True})
-    setattr(litellm.proxy.proxy_server, "jwt_handler", jwt_handler)
+    setattr(litellm_proxy.proxy_server, "general_settings", {"enable_jwt_auth": True})
+    setattr(litellm_proxy.proxy_server, "jwt_handler", jwt_handler)
     try:
         result = await user_api_key_auth(request=request, api_key=bearer_token)
         pytest.fail("Team doesn't exist. This should fail")
@@ -736,11 +736,11 @@ async def test_allowed_routes_admin(
     from starlette.datastructures import URL
 
     import litellm
-    from litellm.proxy._types import NewTeamRequest, UserAPIKeyAuth
-    from litellm.proxy.proxy_server import user_api_key_auth
+    from litellm_proxy._types import NewTeamRequest, UserAPIKeyAuth
+    from litellm_proxy.proxy_server import user_api_key_auth
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
 
@@ -841,13 +841,13 @@ async def test_allowed_routes_admin(
         ## 1. INITIAL TEAM CALL - should fail
         # use generated key to auth in
         setattr(
-            litellm.proxy.proxy_server,
+            litellm_proxy.proxy_server,
             "general_settings",
             {
                 "enable_jwt_auth": True,
             },
         )
-        setattr(litellm.proxy.proxy_server, "jwt_handler", jwt_handler)
+        setattr(litellm_proxy.proxy_server, "jwt_handler", jwt_handler)
         try:
             result = await user_api_key_auth(request=request, api_key=bearer_token)
         except Exception as e:
@@ -860,13 +860,13 @@ import pytest
 @pytest.mark.asyncio
 async def test_team_cache_update_called():
     import litellm
-    from litellm.proxy.proxy_server import user_api_key_cache
+    from litellm_proxy.proxy_server import user_api_key_cache
 
     # Use setattr to replace the method on the user_api_key_cache object
     cache = DualCache()
 
     setattr(
-        litellm.proxy.proxy_server,
+        litellm_proxy.proxy_server,
         "user_api_key_cache",
         cache,
     )
@@ -874,7 +874,7 @@ async def test_team_cache_update_called():
     with patch.object(cache, "async_get_cache", new=AsyncMock()) as mock_call_cache:
         cache.async_get_cache = mock_call_cache
         # Call the function under test
-        await litellm.proxy.proxy_server.update_cache(
+        await litellm_proxy.proxy_server.update_cache(
             token=None,
             user_id=None,
             end_user_id=None,
@@ -944,8 +944,8 @@ async def test_allow_access_by_email(
     import jwt
     from starlette.datastructures import URL
 
-    from litellm.proxy._types import NewTeamRequest, UserAPIKeyAuth
-    from litellm.proxy.proxy_server import user_api_key_auth
+    from litellm_proxy._types import NewTeamRequest, UserAPIKeyAuth
+    from litellm_proxy.proxy_server import user_api_key_auth
 
     public_jwk = public_jwt_key["public_jwk"]
     private_key = public_jwt_key["private_key"]
@@ -1006,14 +1006,14 @@ async def test_allow_access_by_email(
     ## 1. INITIAL TEAM CALL - should fail
     # use generated key to auth in
     setattr(
-        litellm.proxy.proxy_server,
+        litellm_proxy.proxy_server,
         "general_settings",
         {
             "enable_jwt_auth": True,
         },
     )
-    setattr(litellm.proxy.proxy_server, "jwt_handler", jwt_handler)
-    setattr(litellm.proxy.proxy_server, "prisma_client", {})
+    setattr(litellm_proxy.proxy_server, "jwt_handler", jwt_handler)
+    setattr(litellm_proxy.proxy_server, "prisma_client", {})
 
     # AsyncMock(
     #     return_value=LiteLLM_UserTable(
@@ -1021,7 +1021,7 @@ async def test_allow_access_by_email(
     #     )
     # ),
     with patch.object(
-        litellm.proxy.auth.handle_jwt,
+        litellm_proxy.auth.handle_jwt,
         "get_user_object",
         side_effect=mock_user_object,
     ) as mock_client:
@@ -1040,7 +1040,7 @@ async def test_allow_access_by_email(
 
 def test_get_public_key_from_jwk_url():
     import litellm
-    from litellm.proxy.auth.handle_jwt import JWTHandler
+    from litellm_proxy.auth.handle_jwt import JWTHandler
 
     jwt_handler = JWTHandler()
 
@@ -1067,10 +1067,10 @@ def test_get_public_key_from_jwk_url():
 @pytest.mark.asyncio
 async def test_end_user_jwt_auth(monkeypatch):
     import litellm
-    from litellm.proxy.auth.handle_jwt import JWTHandler
+    from litellm_proxy.auth.handle_jwt import JWTHandler
     from litellm.caching import DualCache
-    from litellm.proxy._types import LiteLLM_JWTAuth
-    from litellm.proxy.proxy_server import user_api_key_auth
+    from litellm_proxy._types import LiteLLM_JWTAuth
+    from litellm_proxy.proxy_server import user_api_key_auth
     import json
 
     monkeypatch.delenv("JWT_AUDIENCE", None)
@@ -1156,18 +1156,18 @@ async def test_end_user_jwt_auth(monkeypatch):
     ## 1. INITIAL TEAM CALL - should fail
     # use generated key to auth in
     setattr(
-        litellm.proxy.proxy_server,
+        litellm_proxy.proxy_server,
         "general_settings",
         {"enable_jwt_auth": True, "pass_through_all_models": True},
     )
     setattr(
-        litellm.proxy.proxy_server,
+        litellm_proxy.proxy_server,
         "llm_router",
         MagicMock(),
     )
-    setattr(litellm.proxy.proxy_server, "prisma_client", {})
-    setattr(litellm.proxy.proxy_server, "jwt_handler", jwt_handler)
-    from litellm.proxy.proxy_server import cost_tracking
+    setattr(litellm_proxy.proxy_server, "prisma_client", {})
+    setattr(litellm_proxy.proxy_server, "jwt_handler", jwt_handler)
+    from litellm_proxy.proxy_server import cost_tracking
 
     cost_tracking()
     result = await user_api_key_auth(request=request, api_key=bearer_token)
@@ -1176,12 +1176,12 @@ async def test_end_user_jwt_auth(monkeypatch):
     )  # jwt token decoded sub value
 
     temp_response = Response()
-    from litellm.proxy.hooks.proxy_track_cost_callback import (
+    from litellm_proxy.hooks.proxy_track_cost_callback import (
         _should_track_cost_callback,
     )
 
     with patch.object(
-        litellm.proxy.hooks.proxy_track_cost_callback, "_should_track_cost_callback"
+        litellm_proxy.hooks.proxy_track_cost_callback, "_should_track_cost_callback"
     ) as mock_client:
         resp = await chat_completion(
             request=request,
@@ -1202,9 +1202,9 @@ async def test_end_user_jwt_auth(monkeypatch):
 
 
 def test_can_rbac_role_call_route():
-    from litellm.proxy.auth.handle_jwt import JWTAuthManager
-    from litellm.proxy._types import RoleBasedPermissions
-    from litellm.proxy._types import LitellmUserRoles
+    from litellm_proxy.auth.handle_jwt import JWTAuthManager
+    from litellm_proxy._types import RoleBasedPermissions
+    from litellm_proxy._types import LitellmUserRoles
 
     with pytest.raises(HTTPException):
         JWTAuthManager.can_rbac_role_call_route(
@@ -1228,8 +1228,8 @@ def test_can_rbac_role_call_route():
     ],
 )
 def test_check_scope_based_access(requested_model, should_work):
-    from litellm.proxy.auth.handle_jwt import JWTAuthManager
-    from litellm.proxy._types import ScopeMapping
+    from litellm_proxy.auth.handle_jwt import JWTAuthManager
+    from litellm_proxy._types import ScopeMapping
 
     args = {
         "scope_mappings": [
diff --git a/tests/proxy_unit_tests/test_key_generate_prisma.py b/tests/proxy_unit_tests/test_key_generate_prisma.py
index 9b8d3543bc..2af0ce4469 100644
--- a/tests/proxy_unit_tests/test_key_generate_prisma.py
+++ b/tests/proxy_unit_tests/test_key_generate_prisma.py
@@ -47,13 +47,13 @@ import pytest
 
 import litellm
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy.management_endpoints.internal_user_endpoints import (
+from litellm_proxy.management_endpoints.internal_user_endpoints import (
     new_user,
     user_info,
     user_update,
 )
-from litellm.proxy.auth.auth_checks import get_key_object
-from litellm.proxy.management_endpoints.key_management_endpoints import (
+from litellm_proxy.auth.auth_checks import get_key_object
+from litellm_proxy.management_endpoints.key_management_endpoints import (
     delete_key_fn,
     generate_key_fn,
     generate_key_helper_fn,
@@ -62,12 +62,12 @@ from litellm.proxy.management_endpoints.key_management_endpoints import (
     regenerate_key_fn,
     update_key_fn,
 )
-from litellm.proxy.management_endpoints.team_endpoints import (
+from litellm_proxy.management_endpoints.team_endpoints import (
     new_team,
     team_info,
     update_team,
 )
-from litellm.proxy.proxy_server import (
+from litellm_proxy.proxy_server import (
     LitellmUserRoles,
     audio_transcriptions,
     chat_completion,
@@ -78,23 +78,23 @@ from litellm.proxy.proxy_server import (
     moderations,
     user_api_key_auth,
 )
-from litellm.proxy.management_endpoints.customer_endpoints import (
+from litellm_proxy.management_endpoints.customer_endpoints import (
     new_end_user,
 )
-from litellm.proxy.spend_tracking.spend_management_endpoints import (
+from litellm_proxy.spend_tracking.spend_management_endpoints import (
     global_spend,
     spend_key_fn,
     spend_user_fn,
     view_spend_logs,
 )
-from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
+from litellm_proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
 
 verbose_proxy_logger.setLevel(level=logging.DEBUG)
 
 from starlette.datastructures import URL
 
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     DynamoDBArgs,
     GenerateKeyRequest,
     KeyRequest,
@@ -123,7 +123,7 @@ request_data = {
 
 @pytest.fixture
 def prisma_client():
-    from litellm.proxy.proxy_cli import append_query_params
+    from litellm_proxy.proxy_cli import append_query_params
 
     ### add connection pool + pool timeout args
     params = {"connection_limit": 100, "pool_timeout": 60}
@@ -136,11 +136,11 @@ def prisma_client():
         database_url=os.environ["DATABASE_URL"], proxy_logging_obj=proxy_logging_obj
     )
 
-    # Reset litellm.proxy.proxy_server.prisma_client to None
-    litellm.proxy.proxy_server.litellm_proxy_budget_name = (
+    # Reset litellm_proxy.proxy_server.prisma_client to None
+    litellm_proxy.proxy_server.litellm_proxy_budget_name = (
         f"litellm-proxy-budget-{time.time()}"
     )
-    litellm.proxy.proxy_server.user_custom_key_generate = None
+    litellm_proxy.proxy_server.user_custom_key_generate = None
 
     return prisma_client
 
@@ -152,11 +152,11 @@ async def test_new_user_response(prisma_client):
 
         print("prisma client=", prisma_client)
 
-        setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-        setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+        setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+        setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
 
-        await litellm.proxy.proxy_server.prisma_client.connect()
-        from litellm.proxy.proxy_server import user_api_key_cache
+        await litellm_proxy.proxy_server.prisma_client.connect()
+        from litellm_proxy.proxy_server import user_api_key_cache
 
         _team_id = "ishaan-special-team_{}".format(uuid.uuid4())
         await new_team(
@@ -237,13 +237,13 @@ def test_generate_and_call_with_valid_key(prisma_client, api_route):
 
     print("prisma client=", prisma_client)
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
-            from litellm.proxy.proxy_server import user_api_key_cache
+            await litellm_proxy.proxy_server.prisma_client.connect()
+            from litellm_proxy.proxy_server import user_api_key_cache
 
             user_api_key_dict = UserAPIKeyAuth(
                 user_role=LitellmUserRoles.PROXY_ADMIN,
@@ -294,12 +294,12 @@ def test_generate_and_call_with_valid_key(prisma_client, api_route):
 
 def test_call_with_invalid_key(prisma_client):
     # 2. Make a call with invalid key, expect it to fail
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
+            await litellm_proxy.proxy_server.prisma_client.connect()
             generated_key = "sk-126666"
             bearer_token = "Bearer " + generated_key
 
@@ -322,12 +322,12 @@ def test_call_with_invalid_key(prisma_client):
 def test_call_with_invalid_model(prisma_client):
     litellm.set_verbose = True
     # 3. Make a call to a key with an invalid model - expect to fail
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
+            await litellm_proxy.proxy_server.prisma_client.connect()
             request = NewUserRequest(models=["mistral"])
             key = await new_user(
                 data=request,
@@ -368,12 +368,12 @@ def test_call_with_invalid_model(prisma_client):
 
 def test_call_with_valid_model(prisma_client):
     # 4. Make a call to a key with a valid model - expect to pass
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
+            await litellm_proxy.proxy_server.prisma_client.connect()
             request = NewUserRequest(models=["mistral"])
             key = await new_user(
                 request,
@@ -415,11 +415,11 @@ async def test_call_with_valid_model_using_all_models(prisma_client):
     3. Call /chat/completions with the key -> expect to pass
     """
     # Make a call to a key with model = `all-proxy-models` this is an Alias from LiteLLM Admin UI
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     try:
 
-        await litellm.proxy.proxy_server.prisma_client.connect()
+        await litellm_proxy.proxy_server.prisma_client.connect()
 
         team_request = NewTeamRequest(
             team_alias="testing-team",
@@ -477,12 +477,12 @@ async def test_call_with_valid_model_using_all_models(prisma_client):
 
 def test_call_with_user_over_budget(prisma_client):
     # 5. Make a call with a key over budget, expect to fail
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
+            await litellm_proxy.proxy_server.prisma_client.connect()
             request = NewUserRequest(max_budget=0.00001)
             key = await new_user(
                 data=request,
@@ -507,7 +507,7 @@ def test_call_with_user_over_budget(prisma_client):
 
             # update spend using track_cost callback, make 2nd request, it should fail
             from litellm import Choices, Message, ModelResponse, Usage
-            from litellm.proxy.proxy_server import _ProxyDBLogger
+            from litellm_proxy.proxy_server import _ProxyDBLogger
 
             proxy_db_logger = _ProxyDBLogger()
 
@@ -569,13 +569,13 @@ def test_call_with_end_user_over_budget(prisma_client):
     # we only check this when litellm.max_end_user_budget is set
     import random
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     setattr(litellm, "max_end_user_budget", 0.00001)
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
+            await litellm_proxy.proxy_server.prisma_client.connect()
             user = f"ishaan {uuid.uuid4().hex}"
             request = NewCustomerRequest(
                 user_id=user, max_budget=0.000001
@@ -604,7 +604,7 @@ def test_call_with_end_user_over_budget(prisma_client):
 
             # update spend using track_cost callback, make 2nd request, it should fail
             from litellm import Choices, Message, ModelResponse, Usage
-            from litellm.proxy.proxy_server import _ProxyDBLogger
+            from litellm_proxy.proxy_server import _ProxyDBLogger
 
             proxy_db_logger = _ProxyDBLogger()
 
@@ -668,25 +668,25 @@ def test_call_with_end_user_over_budget(prisma_client):
 
 def test_call_with_proxy_over_budget(prisma_client):
     # 5.1 Make a call with a proxy over budget, expect to fail
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     litellm_proxy_budget_name = f"litellm-proxy-budget-{time.time()}"
     setattr(
-        litellm.proxy.proxy_server,
+        litellm_proxy.proxy_server,
         "litellm_proxy_admin_name",
         litellm_proxy_budget_name,
     )
     setattr(litellm, "max_budget", 0.00001)
-    from litellm.proxy.proxy_server import user_api_key_cache
+    from litellm_proxy.proxy_server import user_api_key_cache
 
     user_api_key_cache.set_cache(
         key="{}:spend".format(litellm_proxy_budget_name), value=0
     )
-    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
+    setattr(litellm_proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
+            await litellm_proxy.proxy_server.prisma_client.connect()
             request = NewUserRequest()
             key = await new_user(
                 data=request,
@@ -711,7 +711,7 @@ def test_call_with_proxy_over_budget(prisma_client):
 
             # update spend using track_cost callback, make 2nd request, it should fail
             from litellm import Choices, Message, ModelResponse, Usage
-            from litellm.proxy.proxy_server import _ProxyDBLogger
+            from litellm_proxy.proxy_server import _ProxyDBLogger
 
             proxy_db_logger = _ProxyDBLogger()
 
@@ -766,8 +766,8 @@ def test_call_with_proxy_over_budget(prisma_client):
 
 def test_call_with_user_over_budget_stream(prisma_client):
     # 6. Make a call with a key over budget, expect to fail
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     import logging
 
     from litellm._logging import verbose_proxy_logger
@@ -777,7 +777,7 @@ def test_call_with_user_over_budget_stream(prisma_client):
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
+            await litellm_proxy.proxy_server.prisma_client.connect()
             request = NewUserRequest(max_budget=0.00001)
             key = await new_user(
                 data=request,
@@ -802,7 +802,7 @@ def test_call_with_user_over_budget_stream(prisma_client):
 
             # update spend using track_cost callback, make 2nd request, it should fail
             from litellm import Choices, Message, ModelResponse, Usage
-            from litellm.proxy.proxy_server import _ProxyDBLogger
+            from litellm_proxy.proxy_server import _ProxyDBLogger
 
             proxy_db_logger = _ProxyDBLogger()
 
@@ -854,21 +854,21 @@ def test_call_with_user_over_budget_stream(prisma_client):
 
 def test_call_with_proxy_over_budget_stream(prisma_client):
     # 6.1 Make a call with a global proxy over budget, expect to fail
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     litellm_proxy_budget_name = f"litellm-proxy-budget-{time.time()}"
     setattr(
-        litellm.proxy.proxy_server,
+        litellm_proxy.proxy_server,
         "litellm_proxy_admin_name",
         litellm_proxy_budget_name,
     )
     setattr(litellm, "max_budget", 0.00001)
-    from litellm.proxy.proxy_server import user_api_key_cache
+    from litellm_proxy.proxy_server import user_api_key_cache
 
     user_api_key_cache.set_cache(
         key="{}:spend".format(litellm_proxy_budget_name), value=0
     )
-    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
+    setattr(litellm_proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
 
     import logging
 
@@ -879,7 +879,7 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
+            await litellm_proxy.proxy_server.prisma_client.connect()
             ## CREATE PROXY + USER BUDGET ##
             # request = NewUserRequest(
             #     max_budget=0.00001, user_id=litellm_proxy_budget_name
@@ -908,7 +908,7 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
 
             # update spend using track_cost callback, make 2nd request, it should fail
             from litellm import Choices, Message, ModelResponse, Usage
-            from litellm.proxy.proxy_server import _ProxyDBLogger
+            from litellm_proxy.proxy_server import _ProxyDBLogger
 
             proxy_db_logger = _ProxyDBLogger()
 
@@ -961,12 +961,12 @@ def test_generate_and_call_with_valid_key_never_expires(prisma_client):
 
     print("prisma client=", prisma_client)
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
+            await litellm_proxy.proxy_server.prisma_client.connect()
             request = NewUserRequest(duration=None)
             key = await new_user(
                 data=request,
@@ -998,12 +998,12 @@ def test_generate_and_call_with_expired_key(prisma_client):
 
     print("prisma client=", prisma_client)
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
+            await litellm_proxy.proxy_server.prisma_client.connect()
             request = NewUserRequest(duration="0s")
             key = await new_user(
                 data=request,
@@ -1041,14 +1041,14 @@ def test_delete_key(prisma_client):
 
     print("prisma client=", prisma_client)
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "user_custom_auth", None)
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "user_custom_auth", None)
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
-            from litellm.proxy.proxy_server import user_api_key_cache
+            await litellm_proxy.proxy_server.prisma_client.connect()
+            from litellm_proxy.proxy_server import user_api_key_cache
 
             request = NewUserRequest()
             key = await new_user(
@@ -1098,13 +1098,13 @@ def test_delete_key_auth(prisma_client):
 
     print("prisma client=", prisma_client)
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
-            from litellm.proxy.proxy_server import user_api_key_cache
+            await litellm_proxy.proxy_server.prisma_client.connect()
+            from litellm_proxy.proxy_server import user_api_key_cache
 
             request = NewUserRequest()
             key = await new_user(
@@ -1168,12 +1168,12 @@ def test_generate_and_call_key_info(prisma_client):
 
     print("prisma client=", prisma_client)
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
+            await litellm_proxy.proxy_server.prisma_client.connect()
             request = NewUserRequest(
                 metadata={"team": "litellm-team3", "project": "litellm-project3"}
             )
@@ -1234,12 +1234,12 @@ def test_generate_and_update_key(prisma_client):
 
     print("prisma client=", prisma_client)
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
+            await litellm_proxy.proxy_server.prisma_client.connect()
 
             # create team "litellm-core-infra@gmail.com""
             print("creating team litellm-core-infra@gmail.com")
@@ -1434,16 +1434,16 @@ def test_key_generate_with_custom_auth(prisma_client):
                 "message": "This violates LiteLLM Proxy Rules. No team id provided.",
             }
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     setattr(
-        litellm.proxy.proxy_server, "user_custom_key_generate", custom_generate_key_fn
+        litellm_proxy.proxy_server, "user_custom_key_generate", custom_generate_key_fn
     )
     try:
 
         async def test():
             try:
-                await litellm.proxy.proxy_server.prisma_client.connect()
+                await litellm_proxy.proxy_server.prisma_client.connect()
                 request = GenerateKeyRequest()
                 key = await generate_key_fn(
                     request,
@@ -1488,12 +1488,12 @@ def test_key_generate_with_custom_auth(prisma_client):
 
 def test_call_with_key_over_budget(prisma_client):
     # 12. Make a call with a key over budget, expect to fail
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
+            await litellm_proxy.proxy_server.prisma_client.connect()
             request = GenerateKeyRequest(max_budget=0.00001)
             key = await generate_key_fn(
                 request,
@@ -1519,7 +1519,7 @@ def test_call_with_key_over_budget(prisma_client):
             # update spend using track_cost callback, make 2nd request, it should fail
             from litellm import Choices, Message, ModelResponse, Usage
             from litellm.caching.caching import Cache
-            from litellm.proxy.proxy_server import _ProxyDBLogger
+            from litellm_proxy.proxy_server import _ProxyDBLogger
 
             proxy_db_logger = _ProxyDBLogger()
 
@@ -1607,12 +1607,12 @@ def test_call_with_key_over_budget_no_cache(prisma_client):
     # 12. Make a call with a key over budget, expect to fail
     # ✅  Tests if spend trackign works when the key does not exist in memory
     # Related to this: https://github.com/BerriAI/litellm/issues/3920
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     try:
 
         async def test():
-            await litellm.proxy.proxy_server.prisma_client.connect()
+            await litellm_proxy.proxy_server.prisma_client.connect()
             request = GenerateKeyRequest(max_budget=0.00001)
             key = await generate_key_fn(
                 request,
@@ -1636,11 +1636,11 @@ def test_call_with_key_over_budget_no_cache(prisma_client):
             print("result from user auth with new key", result)
 
             # update spend using track_cost callback, make 2nd request, it should fail
-            from litellm.proxy.proxy_server import _ProxyDBLogger
-            from litellm.proxy.proxy_server import user_api_key_cache
+            from litellm_proxy.proxy_server import _ProxyDBLogger
+            from litellm_proxy.proxy_server import user_api_key_cache
 
             user_api_key_cache.in_memory_cache.cache_dict = {}
-            setattr(litellm.proxy.proxy_server, "proxy_batch_write_at", 1)
+            setattr(litellm_proxy.proxy_server, "proxy_batch_write_at", 1)
 
             from litellm import Choices, Message, ModelResponse, Usage
             from litellm.caching.caching import Cache
@@ -1740,13 +1740,13 @@ async def test_call_with_key_over_model_budget(
     prisma_client, request_model, should_pass
 ):
     # 12. Make a call with a key over budget, expect to fail
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
     verbose_proxy_logger.setLevel(logging.DEBUG)
 
     # init model max budget limiter
-    from litellm.proxy.hooks.model_max_budget_limiter import (
+    from litellm_proxy.hooks.model_max_budget_limiter import (
         _PROXY_VirtualKeyModelMaxBudgetLimiter,
     )
 
@@ -1842,10 +1842,10 @@ async def test_call_with_key_over_model_budget(
 @pytest.mark.asyncio()
 async def test_call_with_key_never_over_budget(prisma_client):
     # Make a call with a key with budget=None, it should never fail
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     try:
-        await litellm.proxy.proxy_server.prisma_client.connect()
+        await litellm_proxy.proxy_server.prisma_client.connect()
         request = GenerateKeyRequest(max_budget=None)
         key = await generate_key_fn(
             request,
@@ -1873,7 +1873,7 @@ async def test_call_with_key_never_over_budget(prisma_client):
         import uuid
 
         from litellm import Choices, Message, ModelResponse, Usage
-        from litellm.proxy.proxy_server import _ProxyDBLogger
+        from litellm_proxy.proxy_server import _ProxyDBLogger
 
         proxy_db_logger = _ProxyDBLogger()
 
@@ -1927,8 +1927,8 @@ async def test_call_with_key_never_over_budget(prisma_client):
 @pytest.mark.asyncio()
 async def test_call_with_key_over_budget_stream(prisma_client):
     # 14. Make a call with a key over budget, expect to fail
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     import logging
 
     from litellm._logging import verbose_proxy_logger
@@ -1936,7 +1936,7 @@ async def test_call_with_key_over_budget_stream(prisma_client):
     litellm.set_verbose = True
     verbose_proxy_logger.setLevel(logging.DEBUG)
     try:
-        await litellm.proxy.proxy_server.prisma_client.connect()
+        await litellm_proxy.proxy_server.prisma_client.connect()
         request = GenerateKeyRequest(max_budget=0.00001)
         key = await generate_key_fn(
             request,
@@ -1964,7 +1964,7 @@ async def test_call_with_key_over_budget_stream(prisma_client):
         import uuid
 
         from litellm import Choices, Message, ModelResponse, Usage
-        from litellm.proxy.proxy_server import _ProxyDBLogger
+        from litellm_proxy.proxy_server import _ProxyDBLogger
 
         proxy_db_logger = _ProxyDBLogger()
 
@@ -2022,9 +2022,9 @@ async def test_call_with_key_over_budget_stream(prisma_client):
 
 @pytest.mark.asyncio()
 async def test_aview_spend_per_user(prisma_client):
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
     try:
         user_by_spend = await spend_user_fn(user_id=None)
         assert type(user_by_spend) == list
@@ -2040,9 +2040,9 @@ async def test_aview_spend_per_user(prisma_client):
 
 @pytest.mark.asyncio()
 async def test_view_spend_per_key(prisma_client):
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
     try:
         key_by_spend = await spend_key_fn()
         assert type(key_by_spend) == list
@@ -2063,10 +2063,10 @@ async def test_key_name_null(prisma_client):
     - get key info
     - assert key_name is null
     """
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     os.environ["DISABLE_KEY_NAME"] = "True"
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
     try:
         request = GenerateKeyRequest()
         key = await generate_key_fn(
@@ -2099,10 +2099,10 @@ async def test_key_name_set(prisma_client):
     - get key info
     - assert key_name is not null
     """
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": True})
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "general_settings", {"allow_user_auth": True})
+    await litellm_proxy.proxy_server.prisma_client.connect()
     try:
         request = GenerateKeyRequest()
         key = await generate_key_fn(
@@ -2132,11 +2132,11 @@ async def test_default_key_params(prisma_client):
     - get key info
     - assert key_name is not null
     """
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": True})
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "general_settings", {"allow_user_auth": True})
     litellm.default_key_generate_params = {"max_budget": 0.000122}
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
     try:
         request = GenerateKeyRequest()
         key = await generate_key_fn(
@@ -2166,12 +2166,12 @@ async def test_upperbound_key_param_larger_budget(prisma_client):
     - get key info
     - assert key_name is not null
     """
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     litellm.upperbound_key_generate_params = LiteLLM_UpperboundKeyGenerateParams(
         max_budget=0.001, budget_duration="1m"
     )
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
     try:
         request = GenerateKeyRequest(
             max_budget=200000,
@@ -2192,12 +2192,12 @@ async def test_upperbound_key_param_larger_budget(prisma_client):
 
 @pytest.mark.asyncio()
 async def test_upperbound_key_param_larger_duration(prisma_client):
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     litellm.upperbound_key_generate_params = LiteLLM_UpperboundKeyGenerateParams(
         max_budget=100, duration="14d"
     )
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
     try:
         request = GenerateKeyRequest(
             max_budget=10,
@@ -2221,12 +2221,12 @@ async def test_upperbound_key_param_larger_duration(prisma_client):
 async def test_upperbound_key_param_none_duration(prisma_client):
     from datetime import datetime, timedelta
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     litellm.upperbound_key_generate_params = LiteLLM_UpperboundKeyGenerateParams(
         max_budget=100, duration="14d"
     )
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
     try:
         request = GenerateKeyRequest()
         key = await generate_key_fn(
@@ -2253,7 +2253,7 @@ async def test_upperbound_key_param_none_duration(prisma_client):
 
 
 def test_get_bearer_token():
-    from litellm.proxy.auth.user_api_key_auth import _get_bearer_token
+    from litellm_proxy.auth.user_api_key_auth import _get_bearer_token
 
     # Test valid Bearer token
     api_key = "Bearer valid_token"
@@ -2285,7 +2285,7 @@ async def test_update_logs_with_spend_logs_url(prisma_client):
     """
     Unit test for making sure spend logs list is still updated when url passed in
     """
-    from litellm.proxy.db.db_spend_update_writer import DBSpendUpdateWriter
+    from litellm_proxy.db.db_spend_update_writer import DBSpendUpdateWriter
     db_spend_update_writer = DBSpendUpdateWriter()
 
     payload = {"startTime": datetime.now(), "endTime": datetime.now()}
@@ -2306,12 +2306,12 @@ async def test_update_logs_with_spend_logs_url(prisma_client):
 
 @pytest.mark.asyncio
 async def test_user_api_key_auth(prisma_client):
-    from litellm.proxy.proxy_server import ProxyException
+    from litellm_proxy.proxy_server import ProxyException
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": True})
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "general_settings", {"allow_user_auth": True})
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     request = Request(scope={"type": "http"})
     request._url = URL(url="/chat/completions")
@@ -2350,14 +2350,14 @@ async def test_user_api_key_auth(prisma_client):
 async def test_user_api_key_auth_without_master_key(prisma_client):
     # if master key is not set, expect all calls to go through
     try:
-        from litellm.proxy.proxy_server import ProxyException
+        from litellm_proxy.proxy_server import ProxyException
 
-        setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-        setattr(litellm.proxy.proxy_server, "master_key", None)
+        setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+        setattr(litellm_proxy.proxy_server, "master_key", None)
         setattr(
-            litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": True}
+            litellm_proxy.proxy_server, "general_settings", {"allow_user_auth": True}
         )
-        await litellm.proxy.proxy_server.prisma_client.connect()
+        await litellm_proxy.proxy_server.prisma_client.connect()
 
         request = Request(scope={"type": "http"})
         request._url = URL(url="/chat/completions")
@@ -2379,10 +2379,10 @@ async def test_key_with_no_permissions(prisma_client):
     - get key info
     - assert key_name is null
     """
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": False})
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "general_settings", {"allow_user_auth": False})
+    await litellm_proxy.proxy_server.prisma_client.connect()
     try:
         response = await generate_key_helper_fn(
             request_type="key",
@@ -2409,7 +2409,7 @@ async def track_cost_callback_helper_fn(generated_key: str, user_id: str):
     import uuid
 
     from litellm import Choices, Message, ModelResponse, Usage
-    from litellm.proxy.proxy_server import _ProxyDBLogger
+    from litellm_proxy.proxy_server import _ProxyDBLogger
 
     request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{uuid.uuid4()}"
     resp = ModelResponse(
@@ -2454,8 +2454,8 @@ async def test_proxy_load_test_db(prisma_client):
     """
     Run 1500 req./s against track_cost_callback function
     """
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     import logging
     import time
 
@@ -2465,7 +2465,7 @@ async def test_proxy_load_test_db(prisma_client):
     verbose_proxy_logger.setLevel(logging.DEBUG)
     try:
         start_time = time.time()
-        await litellm.proxy.proxy_server.prisma_client.connect()
+        await litellm_proxy.proxy_server.prisma_client.connect()
         request = GenerateKeyRequest(max_budget=0.00001)
         key = await generate_key_fn(
             request,
@@ -2521,11 +2521,11 @@ async def test_master_key_hashing(prisma_client):
 
         master_key = "sk-1234"
 
-        setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-        setattr(litellm.proxy.proxy_server, "master_key", master_key)
+        setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+        setattr(litellm_proxy.proxy_server, "master_key", master_key)
 
-        await litellm.proxy.proxy_server.prisma_client.connect()
-        from litellm.proxy.proxy_server import user_api_key_cache
+        await litellm_proxy.proxy_server.prisma_client.connect()
+        from litellm_proxy.proxy_server import user_api_key_cache
 
         _team_id = "ishaans-special-team_{}".format(uuid.uuid4())
         user_api_key_dict = UserAPIKeyAuth(
@@ -2585,11 +2585,11 @@ async def test_reset_spend_authentication(prisma_client):
 
     master_key = "sk-1234"
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", master_key)
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", master_key)
 
-    await litellm.proxy.proxy_server.prisma_client.connect()
-    from litellm.proxy.proxy_server import user_api_key_cache
+    await litellm_proxy.proxy_server.prisma_client.connect()
+    from litellm_proxy.proxy_server import user_api_key_cache
 
     bearer_token = "Bearer " + master_key
 
@@ -2659,12 +2659,12 @@ async def test_create_update_team(prisma_client):
 
     master_key = "sk-1234"
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", master_key)
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", master_key)
     import datetime
 
-    await litellm.proxy.proxy_server.prisma_client.connect()
-    from litellm.proxy.proxy_server import user_api_key_cache
+    await litellm_proxy.proxy_server.prisma_client.connect()
+    from litellm_proxy.proxy_server import user_api_key_cache
 
     _team_id = "test-team_{}".format(uuid.uuid4())
     response = await new_team(
@@ -2771,9 +2771,9 @@ async def test_update_user_role(prisma_client):
     -> update user role to == PROXY_ADMIN
     -> access an Admin only route -> expect to succeed
     """
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
     key = await new_user(
         data=NewUserRequest(
             user_role=LitellmUserRoles.INTERNAL_USER,
@@ -2824,9 +2824,9 @@ async def test_update_user_unit_test(prisma_client):
 
     Ensure that params are updated for UpdateUserRequest
     """
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
     key = await new_user(
         data=NewUserRequest(
             user_email=f"test-{uuid.uuid4()}@test.com",
@@ -2870,14 +2870,14 @@ async def test_update_user_unit_test(prisma_client):
 @pytest.mark.asyncio()
 async def test_custom_api_key_header_name(prisma_client):
     """ """
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     setattr(
-        litellm.proxy.proxy_server,
+        litellm_proxy.proxy_server,
         "general_settings",
         {"litellm_key_header_name": "x-litellm-key"},
     )
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     api_route = APIRoute(path="/chat/completions", endpoint=chat_completion)
     request = Request(
@@ -2920,9 +2920,9 @@ async def test_custom_api_key_header_name(prisma_client):
 async def test_generate_key_with_model_tpm_limit(prisma_client):
     print("prisma client=", prisma_client)
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
     request = GenerateKeyRequest(
         metadata={
             "team": "litellm-team3",
@@ -2988,9 +2988,9 @@ async def test_generate_key_with_model_tpm_limit(prisma_client):
 async def test_generate_key_with_guardrails(prisma_client):
     print("prisma client=", prisma_client)
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
     request = GenerateKeyRequest(
         guardrails=["aporia-pre-call"],
         metadata={
@@ -3057,9 +3057,9 @@ async def test_team_guardrails(prisma_client):
     - Assert new guardrails are returned when calling /team/info
     """
     litellm.set_verbose = True
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     _new_team = NewTeamRequest(
         team_alias="test-teamA",
@@ -3120,9 +3120,9 @@ async def test_team_access_groups(prisma_client):
     - Test calling a model not in the access group -> fail
     """
     litellm.set_verbose = True
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
     # create router with access groups
     litellm_router = litellm.Router(
         model_list=[
@@ -3142,7 +3142,7 @@ async def test_team_access_groups(prisma_client):
             },
         ]
     )
-    setattr(litellm.proxy.proxy_server, "llm_router", litellm_router)
+    setattr(litellm_proxy.proxy_server, "llm_router", litellm_router)
 
     # Create team with models=["beta-models"]
     team_request = NewTeamRequest(
@@ -3229,9 +3229,9 @@ async def test_team_tags(prisma_client):
     - Assert new tags are returned when calling /team/info
     """
     litellm.set_verbose = True
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     _new_team = NewTeamRequest(
         team_alias="test-teamA",
@@ -3288,14 +3288,14 @@ async def test_aadmin_only_routes(prisma_client):
     """
     litellm.set_verbose = True
     print(f"os.getenv('DATABASE_URL')={os.getenv('DATABASE_URL')}")
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
     general_settings = {
         "allowed_routes": ["/embeddings", "/key/generate"],
         "admin_only_routes": ["/key/generate"],
     }
-    from litellm.proxy import proxy_server
+    from litellm_proxy import proxy_server
 
     initial_general_settings = getattr(proxy_server, "general_settings")
 
@@ -3358,12 +3358,12 @@ async def test_list_keys(prisma_client):
     """
     from fastapi import Query
 
-    from litellm.proxy.proxy_server import hash_token
-    from litellm.proxy._types import LitellmUserRoles
+    from litellm_proxy.proxy_server import hash_token
+    from litellm_proxy._types import LitellmUserRoles
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     # Test basic listing
     request = Request(scope={"type": "http", "query_string": b""})
@@ -3442,10 +3442,10 @@ async def test_auth_vertex_ai_route(prisma_client):
     If user is premium user and vertex-ai route is used. Assert Virtual Key checks are run
     """
     litellm.set_verbose = True
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "premium_user", True)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "premium_user", True)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     route = "/vertex-ai/publishers/google/models/gemini-1.5-flash-001:generateContent"
     request = Request(scope={"type": "http"})
@@ -3494,11 +3494,11 @@ async def test_user_api_key_auth_db_unavailable():
             pass
 
     # Set up test environment
-    setattr(litellm.proxy.proxy_server, "prisma_client", MockPrismaClient())
-    setattr(litellm.proxy.proxy_server, "user_api_key_cache", MockDualCache())
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", MockPrismaClient())
+    setattr(litellm_proxy.proxy_server, "user_api_key_cache", MockDualCache())
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     setattr(
-        litellm.proxy.proxy_server,
+        litellm_proxy.proxy_server,
         "general_settings",
         {"allow_requests_on_db_unavailable": True},
     )
@@ -3516,7 +3516,7 @@ async def test_user_api_key_auth_db_unavailable():
     # Verify results
     assert isinstance(result, UserAPIKeyAuth)
     assert result.key_name == "failed-to-connect-to-db"
-    assert result.user_id == litellm.proxy.proxy_server.litellm_proxy_admin_name
+    assert result.user_id == litellm_proxy.proxy_server.litellm_proxy_admin_name
 
 
 @pytest.mark.asyncio
@@ -3550,17 +3550,17 @@ async def test_user_api_key_auth_db_unavailable_not_allowed():
             pass
 
     # Set up test environment
-    setattr(litellm.proxy.proxy_server, "prisma_client", MockPrismaClient())
-    setattr(litellm.proxy.proxy_server, "user_api_key_cache", MockDualCache())
-    setattr(litellm.proxy.proxy_server, "general_settings", {})
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", MockPrismaClient())
+    setattr(litellm_proxy.proxy_server, "user_api_key_cache", MockDualCache())
+    setattr(litellm_proxy.proxy_server, "general_settings", {})
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
 
     # Create test request
     request = Request(scope={"type": "http"})
     request._url = URL(url="/chat/completions")
 
     # Run test with a sample API key
-    with pytest.raises(litellm.proxy._types.ProxyException):
+    with pytest.raises(litellm_proxy._types.ProxyException):
         await user_api_key_auth(
             request=request,
             api_key="Bearer sk-123456789",
@@ -3580,9 +3580,9 @@ async def test_key_generate_with_secret_manager_call(prisma_client):
     assert it is deleted from the secret manager
     """
     from litellm.secret_managers.aws_secret_manager_v2 import AWSSecretsManagerV2
-    from litellm.proxy._types import KeyManagementSystem, KeyManagementSettings
+    from litellm_proxy._types import KeyManagementSystem, KeyManagementSettings
 
-    from litellm.proxy.hooks.key_management_event_hooks import (
+    from litellm_proxy.hooks.key_management_event_hooks import (
         LITELLM_PREFIX_STORED_VIRTUAL_KEYS,
     )
 
@@ -3602,10 +3602,10 @@ async def test_key_generate_with_secret_manager_call(prisma_client):
         },
     }
 
-    setattr(litellm.proxy.proxy_server, "general_settings", general_settings)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "general_settings", general_settings)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    await litellm_proxy.proxy_server.prisma_client.connect()
     ############################################################################
 
     # generate new key
@@ -3658,7 +3658,7 @@ async def test_key_generate_with_secret_manager_call(prisma_client):
     assert result is None
 
     # cleanup
-    setattr(litellm.proxy.proxy_server, "general_settings", {})
+    setattr(litellm_proxy.proxy_server, "general_settings", {})
 
 
 ################################################################################
@@ -3672,9 +3672,9 @@ async def test_key_alias_uniqueness(prisma_client):
     2. We cannot update a key to use an alias that's already taken
     3. We can update a key while keeping its existing alias
     """
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     try:
         # Create first key with an alias
@@ -3756,12 +3756,12 @@ async def test_enforce_unique_key_alias(prisma_client):
     3. Test it allows updating a key with its own existing alias
     4. Test it blocks updating a key with another key's alias
     """
-    from litellm.proxy.management_endpoints.key_management_endpoints import (
+    from litellm_proxy.management_endpoints.key_management_endpoints import (
         _enforce_unique_key_alias,
     )
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     try:
         # Test 1: Allow unique alias
@@ -3827,7 +3827,7 @@ def test_should_track_cost_callback():
     """
     Test that the should_track_cost_callback function works as expected
     """
-    from litellm.proxy.hooks.proxy_track_cost_callback import (
+    from litellm_proxy.hooks.proxy_track_cost_callback import (
         _should_track_cost_callback,
     )
 
@@ -3847,11 +3847,11 @@ async def test_get_paginated_teams(prisma_client):
     2. Test total count matches across pages
     3. Test page size is respected
     """
-    from litellm.proxy.management_endpoints.team_endpoints import get_paginated_teams
+    from litellm_proxy.management_endpoints.team_endpoints import get_paginated_teams
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     try:
         # Get first page with page_size=2
@@ -3901,13 +3901,13 @@ async def test_reset_budget_job(prisma_client, entity_type):
     from datetime import datetime, timedelta
     import time
 
-    from litellm.proxy.common_utils.reset_budget_job import ResetBudgetJob
-    from litellm.proxy.utils import ProxyLogging
+    from litellm_proxy.common_utils.reset_budget_job import ResetBudgetJob
+    from litellm_proxy.utils import ProxyLogging
 
     # Setup
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     proxy_logging_obj = ProxyLogging(user_api_key_cache=None)
     reset_budget_job = ResetBudgetJob(
diff --git a/tests/proxy_unit_tests/test_proxy_config_unit_test.py b/tests/proxy_unit_tests/test_proxy_config_unit_test.py
index a1586ab6bd..6575d9d78f 100644
--- a/tests/proxy_unit_tests/test_proxy_config_unit_test.py
+++ b/tests/proxy_unit_tests/test_proxy_config_unit_test.py
@@ -6,8 +6,7 @@ import pytest
 
 from dotenv import load_dotenv
 
-import litellm.proxy
-import litellm.proxy.proxy_server
+import litellm_proxy.proxy_server
 
 load_dotenv()
 import io
@@ -21,7 +20,7 @@ sys.path.insert(
 import asyncio
 import logging
 
-from litellm.proxy.proxy_server import ProxyConfig
+from litellm_proxy.proxy_server import ProxyConfig
 
 INVALID_FILES = ["config_with_missing_include.yaml"]
 
diff --git a/tests/proxy_unit_tests/test_proxy_custom_auth.py b/tests/proxy_unit_tests/test_proxy_custom_auth.py
index cffcc2e7f2..a689bee4a9 100644
--- a/tests/proxy_unit_tests/test_proxy_custom_auth.py
+++ b/tests/proxy_unit_tests/test_proxy_custom_auth.py
@@ -23,7 +23,7 @@ from fastapi.testclient import TestClient
 
 import litellm
 from litellm import RateLimitError, Timeout, completion, completion_cost, embedding
-from litellm.proxy.proxy_server import (  # Replace with the actual module where your FastAPI router is defined
+from litellm_proxy.proxy_server import (  # Replace with the actual module where your FastAPI router is defined
     ProxyConfig,
     initialize,
     router,
@@ -35,7 +35,7 @@ from litellm.proxy.proxy_server import (  # Replace with the actual module where
 # Make sure the fixture returns TestClient(app)
 @pytest.fixture(scope="function")
 def client():
-    from litellm.proxy.proxy_server import cleanup_router_config_variables
+    from litellm_proxy.proxy_server import cleanup_router_config_variables
 
     cleanup_router_config_variables()
     filepath = os.path.dirname(os.path.abspath(__file__))
diff --git a/tests/proxy_unit_tests/test_proxy_custom_logger.py b/tests/proxy_unit_tests/test_proxy_custom_logger.py
index bdad7c9d7d..c21bd05075 100644
--- a/tests/proxy_unit_tests/test_proxy_custom_logger.py
+++ b/tests/proxy_unit_tests/test_proxy_custom_logger.py
@@ -19,7 +19,7 @@ import importlib, inspect
 # test /chat/completion request to the proxy
 from fastapi.testclient import TestClient
 from fastapi import FastAPI
-from litellm.proxy.proxy_server import (
+from litellm_proxy.proxy_server import (
     router,
     save_worker_config,
     initialize,
@@ -51,7 +51,7 @@ print("Testing proxy custom logger")
 def test_embedding(client):
     try:
         litellm.set_verbose = False
-        from litellm.proxy.types_utils.utils import get_instance_fn
+        from litellm_proxy.types_utils.utils import get_instance_fn
 
         my_custom_logger = get_instance_fn(
             value="custom_callbacks.my_custom_logger", config_file_path=python_file_path
@@ -122,7 +122,7 @@ def test_chat_completion(client):
     try:
         # Your test data
         litellm.set_verbose = False
-        from litellm.proxy.types_utils.utils import get_instance_fn
+        from litellm_proxy.types_utils.utils import get_instance_fn
 
         my_custom_logger = get_instance_fn(
             value="custom_callbacks.my_custom_logger", config_file_path=python_file_path
@@ -217,7 +217,7 @@ def test_chat_completion_stream(client):
     try:
         # Your test data
         litellm.set_verbose = False
-        from litellm.proxy.types_utils.utils import get_instance_fn
+        from litellm_proxy.types_utils.utils import get_instance_fn
 
         my_custom_logger = get_instance_fn(
             value="custom_callbacks.my_custom_logger", config_file_path=python_file_path
diff --git a/tests/proxy_unit_tests/test_proxy_encrypt_decrypt.py b/tests/proxy_unit_tests/test_proxy_encrypt_decrypt.py
index f9c3ff42d0..3d17d27801 100644
--- a/tests/proxy_unit_tests/test_proxy_encrypt_decrypt.py
+++ b/tests/proxy_unit_tests/test_proxy_encrypt_decrypt.py
@@ -12,8 +12,8 @@ sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds-the parent directory to the system path
 
-from litellm.proxy import proxy_server
-from litellm.proxy.common_utils.encrypt_decrypt_utils import (
+from litellm_proxy import proxy_server
+from litellm_proxy.common_utils.encrypt_decrypt_utils import (
     decrypt_value_helper,
     encrypt_value_helper,
 )
diff --git a/tests/proxy_unit_tests/test_proxy_exception_mapping.py b/tests/proxy_unit_tests/test_proxy_exception_mapping.py
index 8171a9cb06..11b775d744 100644
--- a/tests/proxy_unit_tests/test_proxy_exception_mapping.py
+++ b/tests/proxy_unit_tests/test_proxy_exception_mapping.py
@@ -21,7 +21,7 @@ from fastapi import Response
 from fastapi.testclient import TestClient
 
 import litellm
-from litellm.proxy.proxy_server import (  # Replace with the actual module where your FastAPI router is defined
+from litellm_proxy.proxy_server import (  # Replace with the actual module where your FastAPI router is defined
     initialize,
     router,
     save_worker_config,
@@ -50,7 +50,7 @@ def client():
     filepath = os.path.dirname(os.path.abspath(__file__))
     config_fp = f"{filepath}/test_configs/test_bad_config.yaml"
     asyncio.run(initialize(config=config_fp))
-    from litellm.proxy.proxy_server import app
+    from litellm_proxy.proxy_server import app
 
     return TestClient(app)
 
@@ -99,7 +99,7 @@ def test_chat_completion_exception(client):
 
 # raise openai.AuthenticationError
 @mock.patch(
-    "litellm.proxy.proxy_server.llm_router.acompletion",
+    "litellm_proxy.proxy_server.llm_router.acompletion",
     return_value=invalid_authentication_error_response,
 )
 def test_chat_completion_exception_azure(mock_acompletion, client):
@@ -142,7 +142,7 @@ def test_chat_completion_exception_azure(mock_acompletion, client):
 
 # raise openai.AuthenticationError
 @mock.patch(
-    "litellm.proxy.proxy_server.llm_router.aembedding",
+    "litellm_proxy.proxy_server.llm_router.aembedding",
     return_value=invalid_authentication_error_response,
 )
 def test_embedding_auth_exception_azure(mock_aembedding, client):
@@ -270,7 +270,7 @@ def test_embedding_exception_any_model(client):
 
 # raise openai.BadRequestError
 @mock.patch(
-    "litellm.proxy.proxy_server.llm_router.acompletion",
+    "litellm_proxy.proxy_server.llm_router.acompletion",
     return_value=context_length_exceeded_error_response,
 )
 def test_chat_completion_exception_azure_context_window(mock_acompletion, client):
diff --git a/tests/proxy_unit_tests/test_proxy_gunicorn.py b/tests/proxy_unit_tests/test_proxy_gunicorn.py
index 73e368d35a..2e11b3b31f 100644
--- a/tests/proxy_unit_tests/test_proxy_gunicorn.py
+++ b/tests/proxy_unit_tests/test_proxy_gunicorn.py
@@ -17,7 +17,7 @@
 # import litellm
 
 # ### LOCAL Proxy Server INIT ###
-# from litellm.proxy.proxy_server import save_worker_config  # Replace with the actual module where your FastAPI router is defined
+# from litellm_proxy.proxy_server import save_worker_config  # Replace with the actual module where your FastAPI router is defined
 # filepath = os.path.dirname(os.path.abspath(__file__))
 # config_fp = f"{filepath}/test_configs/test_config_custom_auth.yaml"
 # def get_openai_info():
@@ -47,7 +47,7 @@
 
 #     # In order for the app to behave well with signals, run it with gunicorn
 #     # The first argument must be the "name of the command run"
-#     cmd = f"gunicorn litellm.proxy.proxy_server:app --workers {num_workers} --worker-class uvicorn.workers.UvicornWorker --bind {host}:{port}"
+#     cmd = f"gunicorn litellm_proxy.proxy_server:app --workers {num_workers} --worker-class uvicorn.workers.UvicornWorker --bind {host}:{port}"
 #     cmd = cmd.split()
 #     print(f"Running command: {cmd}")
 #     import sys
diff --git a/tests/proxy_unit_tests/test_proxy_pass_user_config.py b/tests/proxy_unit_tests/test_proxy_pass_user_config.py
index 3ecc252264..c8499021d6 100644
--- a/tests/proxy_unit_tests/test_proxy_pass_user_config.py
+++ b/tests/proxy_unit_tests/test_proxy_pass_user_config.py
@@ -25,7 +25,7 @@ logging.basicConfig(
 from fastapi.testclient import TestClient
 from fastapi import FastAPI
 import os
-from litellm.proxy.proxy_server import (
+from litellm_proxy.proxy_server import (
     router,
     save_worker_config,
     initialize,
@@ -39,8 +39,8 @@ headers = {"Authorization": f"Bearer {token}"}
 
 @pytest.fixture(scope="function")
 def client_no_auth():
-    # Assuming litellm.proxy.proxy_server is an object
-    from litellm.proxy.proxy_server import cleanup_router_config_variables
+    # Assuming litellm_proxy.proxy_server is an object
+    from litellm_proxy.proxy_server import cleanup_router_config_variables
 
     cleanup_router_config_variables()
     filepath = os.path.dirname(os.path.abspath(__file__))
diff --git a/tests/proxy_unit_tests/test_proxy_reject_logging.py b/tests/proxy_unit_tests/test_proxy_reject_logging.py
index 756a231154..4efad2d086 100644
--- a/tests/proxy_unit_tests/test_proxy_reject_logging.py
+++ b/tests/proxy_unit_tests/test_proxy_reject_logging.py
@@ -33,18 +33,18 @@ import litellm
 from litellm import Router, mock_completion
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (
+from litellm_proxy._types import UserAPIKeyAuth
+from enterprise.enterprise_hooks.secret_detection import (
     _ENTERPRISE_SecretDetection,
 )
-from litellm.proxy.proxy_server import (
+from litellm_proxy.proxy_server import (
     Depends,
     HTTPException,
     chat_completion,
     completion,
     embeddings,
 )
-from litellm.proxy.utils import ProxyLogging, hash_token
+from litellm_proxy.utils import ProxyLogging, hash_token
 from litellm.router import Router
 
 
@@ -129,7 +129,7 @@ async def test_chat_completion_request_with_redaction(route, body):
 
     Ensures that the secret is redacted EVEN on the callback
     """
-    from litellm.proxy import proxy_server
+    from litellm_proxy import proxy_server
 
     setattr(proxy_server, "llm_router", router)
     _test_logger = testLogger()
diff --git a/tests/proxy_unit_tests/test_proxy_routes.py b/tests/proxy_unit_tests/test_proxy_routes.py
index c2dc0542f1..b285fef4cf 100644
--- a/tests/proxy_unit_tests/test_proxy_routes.py
+++ b/tests/proxy_unit_tests/test_proxy_routes.py
@@ -20,10 +20,10 @@ from fastapi import Request
 from starlette.datastructures import URL, Headers, QueryParams
 
 import litellm
-from litellm.proxy._types import LiteLLMRoutes
-from litellm.proxy.auth.auth_utils import get_request_route
-from litellm.proxy.auth.route_checks import RouteChecks
-from litellm.proxy.proxy_server import app
+from litellm_proxy._types import LiteLLMRoutes
+from litellm_proxy.auth.auth_utils import get_request_route
+from litellm_proxy.auth.route_checks import RouteChecks
+from litellm_proxy.proxy_server import app
 
 # Configure logging
 logging.basicConfig(
diff --git a/tests/proxy_unit_tests/test_proxy_server.py b/tests/proxy_unit_tests/test_proxy_server.py
index dda39d2bd5..b0e4cc79f4 100644
--- a/tests/proxy_unit_tests/test_proxy_server.py
+++ b/tests/proxy_unit_tests/test_proxy_server.py
@@ -4,9 +4,7 @@ import traceback
 from unittest import mock
 
 from dotenv import load_dotenv
-
-import litellm.proxy
-import litellm.proxy.proxy_server
+import litellm_proxy.proxy_server
 
 load_dotenv()
 import io
@@ -40,12 +38,12 @@ from fastapi import FastAPI
 from fastapi.testclient import TestClient
 
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.proxy.proxy_server import (  # Replace with the actual module where your FastAPI router is defined
+from litellm_proxy.proxy_server import (  # Replace with the actual module where your FastAPI router is defined
     app,
     initialize,
     save_worker_config,
 )
-from litellm.proxy.utils import ProxyLogging
+from litellm_proxy.utils import ProxyLogging
 
 # Your bearer token
 token = "sk-1234"
@@ -95,21 +93,21 @@ example_image_generation_result = {
 
 def mock_patch_acompletion():
     return mock.patch(
-        "litellm.proxy.proxy_server.llm_router.acompletion",
+        "litellm_proxy.proxy_server.llm_router.acompletion",
         return_value=example_completion_result,
     )
 
 
 def mock_patch_aembedding():
     return mock.patch(
-        "litellm.proxy.proxy_server.llm_router.aembedding",
+        "litellm_proxy.proxy_server.llm_router.aembedding",
         return_value=example_embedding_result,
     )
 
 
 def mock_patch_aimage_generation():
     return mock.patch(
-        "litellm.proxy.proxy_server.llm_router.aimage_generation",
+        "litellm_proxy.proxy_server.llm_router.aimage_generation",
         return_value=example_image_generation_result,
     )
 
@@ -127,8 +125,8 @@ def fake_env_vars(monkeypatch):
 
 @pytest.fixture(scope="function")
 def client_no_auth(fake_env_vars):
-    # Assuming litellm.proxy.proxy_server is an object
-    from litellm.proxy.proxy_server import cleanup_router_config_variables
+    # Assuming litellm_proxy.proxy_server is an object
+    from litellm_proxy.proxy_server import cleanup_router_config_variables
 
     cleanup_router_config_variables()
     filepath = os.path.dirname(os.path.abspath(__file__))
@@ -202,7 +200,7 @@ def test_add_headers_to_request(litellm_key_header_name):
     from fastapi import Request
     from starlette.datastructures import URL
     import json
-    from litellm.proxy.litellm_pre_call_utils import (
+    from litellm_proxy.litellm_pre_call_utils import (
         clean_headers,
         LiteLLMProxyRequestSetup,
     )
@@ -237,13 +235,13 @@ def test_chat_completion_forward_headers(
     global headers
     try:
         if forward_headers:
-            gs = getattr(litellm.proxy.proxy_server, "general_settings")
+            gs = getattr(litellm_proxy.proxy_server, "general_settings")
             gs["forward_client_headers_to_llm_api"] = True
-            setattr(litellm.proxy.proxy_server, "general_settings", gs)
+            setattr(litellm_proxy.proxy_server, "general_settings", gs)
         if litellm_key_header_name is not None:
-            gs = getattr(litellm.proxy.proxy_server, "general_settings")
+            gs = getattr(litellm_proxy.proxy_server, "general_settings")
             gs["litellm_key_header_name"] = litellm_key_header_name
-            setattr(litellm.proxy.proxy_server, "general_settings", gs)
+            setattr(litellm_proxy.proxy_server, "general_settings", gs)
         # Your test data
         test_data = {
             "model": "gpt-3.5-turbo",
@@ -300,14 +298,14 @@ async def test_team_disable_guardrails(mock_acompletion, client_no_auth):
     from fastapi import HTTPException, Request
     from starlette.datastructures import URL
 
-    from litellm.proxy._types import (
+    from litellm_proxy._types import (
         LiteLLM_TeamTable,
         LiteLLM_TeamTableCachedObj,
         ProxyException,
         UserAPIKeyAuth,
     )
-    from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-    from litellm.proxy.proxy_server import hash_token, user_api_key_cache
+    from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+    from litellm_proxy.proxy_server import hash_token, user_api_key_cache
 
     _team_id = "1234"
     user_key = "sk-12345678"
@@ -328,9 +326,9 @@ async def test_team_disable_guardrails(mock_acompletion, client_no_auth):
     user_api_key_cache.set_cache(key=hash_token(user_key), value=valid_token)
     user_api_key_cache.set_cache(key="team_id:{}".format(_team_id), value=team_obj)
 
-    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "prisma_client", "hello-world")
+    setattr(litellm_proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", "hello-world")
 
     request = Request(scope={"type": "http"})
     request._url = URL(url="/chat/completions")
@@ -352,8 +350,8 @@ from test_custom_callback_input import CompletionCustomHandler
 
 @mock_patch_acompletion()
 def test_custom_logger_failure_handler(mock_acompletion, client_no_auth):
-    from litellm.proxy._types import UserAPIKeyAuth
-    from litellm.proxy.proxy_server import hash_token, user_api_key_cache
+    from litellm_proxy._types import UserAPIKeyAuth
+    from litellm_proxy.proxy_server import hash_token, user_api_key_cache
 
     rpm_limit = 0
 
@@ -365,16 +363,16 @@ def test_custom_logger_failure_handler(mock_acompletion, client_no_auth):
     mock_logger = CustomLogger()
     mock_logger_unit_tests = CompletionCustomHandler()
     proxy_logging_obj: ProxyLogging = getattr(
-        litellm.proxy.proxy_server, "proxy_logging_obj"
+        litellm_proxy.proxy_server, "proxy_logging_obj"
     )
 
     litellm.callbacks = [mock_logger, mock_logger_unit_tests]
     proxy_logging_obj._init_litellm_callbacks(llm_router=None)
 
-    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "prisma_client", "FAKE-VAR")
-    setattr(litellm.proxy.proxy_server, "proxy_logging_obj", proxy_logging_obj)
+    setattr(litellm_proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", "FAKE-VAR")
+    setattr(litellm_proxy.proxy_server, "proxy_logging_obj", proxy_logging_obj)
 
     with patch.object(
         mock_logger, "async_log_failure_event", new=AsyncMock()
@@ -529,7 +527,7 @@ def test_openai_deployments_model_chat_completions_azure(
 @mock_patch_aembedding()
 def test_embedding(mock_aembedding, client_no_auth):
     global headers
-    from litellm.proxy.proxy_server import user_custom_auth
+    from litellm_proxy.proxy_server import user_custom_auth
 
     try:
         test_data = {
@@ -557,7 +555,7 @@ def test_embedding(mock_aembedding, client_no_auth):
 @mock_patch_aembedding()
 def test_bedrock_embedding(mock_aembedding, client_no_auth):
     global headers
-    from litellm.proxy.proxy_server import user_custom_auth
+    from litellm_proxy.proxy_server import user_custom_auth
 
     try:
         test_data = {
@@ -584,7 +582,7 @@ def test_bedrock_embedding(mock_aembedding, client_no_auth):
 @pytest.mark.skip(reason="AWS Suspended Account")
 def test_sagemaker_embedding(client_no_auth):
     global headers
-    from litellm.proxy.proxy_server import user_custom_auth
+    from litellm_proxy.proxy_server import user_custom_auth
 
     try:
         test_data = {
@@ -610,7 +608,7 @@ def test_sagemaker_embedding(client_no_auth):
 @mock_patch_aimage_generation()
 def test_img_gen(mock_aimage_generation, client_no_auth):
     global headers
-    from litellm.proxy.proxy_server import user_custom_auth
+    from litellm_proxy.proxy_server import user_custom_auth
 
     try:
         test_data = {
@@ -744,11 +742,11 @@ def test_chat_completion_optional_params(mock_acompletion, client_no_auth):
 
 
 # Test Reading config.yaml file
-from litellm.proxy.proxy_server import ProxyConfig
+from litellm_proxy.proxy_server import ProxyConfig
 
 
 @pytest.mark.skip(reason="local variable conflicts. needs to be refactored.")
-@mock.patch("litellm.proxy.proxy_server.litellm.Cache")
+@mock.patch("litellm_proxy.proxy_server.litellm.Cache")
 def test_load_router_config(mock_cache, fake_env_vars):
     mock_cache.return_value.cache.__dict__ = {"redis_client": None}
     mock_cache.return_value.supported_call_types = [
@@ -858,11 +856,11 @@ async def test_team_update_redis():
     Tests if team update, updates the redis cache if set
     """
     from litellm.caching.caching import DualCache, RedisCache
-    from litellm.proxy._types import LiteLLM_TeamTableCachedObj
-    from litellm.proxy.auth.auth_checks import _cache_team_object
+    from litellm_proxy._types import LiteLLM_TeamTableCachedObj
+    from litellm_proxy.auth.auth_checks import _cache_team_object
 
     proxy_logging_obj: ProxyLogging = getattr(
-        litellm.proxy.proxy_server, "proxy_logging_obj"
+        litellm_proxy.proxy_server, "proxy_logging_obj"
     )
 
     redis_cache = RedisCache()
@@ -888,10 +886,10 @@ async def test_get_team_redis(client_no_auth):
     Tests if get_team_object gets value from redis cache, if set
     """
     from litellm.caching.caching import DualCache, RedisCache
-    from litellm.proxy.auth.auth_checks import get_team_object
+    from litellm_proxy.auth.auth_checks import get_team_object
 
     proxy_logging_obj: ProxyLogging = getattr(
-        litellm.proxy.proxy_server, "proxy_logging_obj"
+        litellm_proxy.proxy_server, "proxy_logging_obj"
     )
 
     redis_cache = RedisCache()
@@ -919,14 +917,14 @@ import random
 import uuid
 from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
 
-from litellm.proxy._types import (
+from litellm_proxy._types import (
     LitellmUserRoles,
     NewUserRequest,
     TeamMemberAddRequest,
     UserAPIKeyAuth,
 )
-from litellm.proxy.management_endpoints.internal_user_endpoints import new_user
-from litellm.proxy.management_endpoints.team_endpoints import team_member_add
+from litellm_proxy.management_endpoints.internal_user_endpoints import new_user
+from litellm_proxy.management_endpoints.team_endpoints import team_member_add
 from test_key_generate_prisma import prisma_client
 
 
@@ -937,17 +935,17 @@ from test_key_generate_prisma import prisma_client
 @pytest.mark.asyncio
 async def test_create_user_default_budget(prisma_client, user_role):
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     setattr(litellm, "max_internal_user_budget", 10)
     setattr(litellm, "internal_user_budget_duration", "5m")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
     user = f"ishaan {uuid.uuid4().hex}"
     request = NewUserRequest(
         user_id=user, user_role=user_role
     )  # create a key with no budget
     with patch.object(
-        litellm.proxy.proxy_server.prisma_client, "insert_data", new=AsyncMock()
+        litellm_proxy.proxy_server.prisma_client, "insert_data", new=AsyncMock()
     ) as mock_client:
         await new_user(
             request,
@@ -980,14 +978,14 @@ async def test_create_team_member_add(prisma_client, new_member_method):
 
     from fastapi import Request
 
-    from litellm.proxy._types import LiteLLM_TeamTableCachedObj, LiteLLM_UserTable
-    from litellm.proxy.proxy_server import hash_token, user_api_key_cache
+    from litellm_proxy._types import LiteLLM_TeamTableCachedObj, LiteLLM_UserTable
+    from litellm_proxy.proxy_server import hash_token, user_api_key_cache
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     setattr(litellm, "max_internal_user_budget", 10)
     setattr(litellm, "internal_user_budget_duration", "5m")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
     user = f"ishaan {uuid.uuid4().hex}"
     _team_id = "litellm-test-client-id-new"
     team_obj = LiteLLM_TeamTableCachedObj(
@@ -999,7 +997,7 @@ async def test_create_team_member_add(prisma_client, new_member_method):
     # user_api_key_cache.set_cache(key=hash_token(user_key), value=valid_token)
     user_api_key_cache.set_cache(key="team_id:{}".format(_team_id), value=team_obj)
 
-    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
+    setattr(litellm_proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
     if new_member_method == "user_id":
         data = {
             "team_id": _team_id,
@@ -1013,10 +1011,10 @@ async def test_create_team_member_add(prisma_client, new_member_method):
     team_member_add_request = TeamMemberAddRequest(**data)
 
     with patch(
-        "litellm.proxy.proxy_server.prisma_client.db.litellm_usertable",
+        "litellm_proxy.proxy_server.prisma_client.db.litellm_usertable",
         new_callable=AsyncMock,
     ) as mock_litellm_usertable, patch(
-        "litellm.proxy.auth.auth_checks._get_team_object_from_user_api_key_cache",
+        "litellm_proxy.auth.auth_checks._get_team_object_from_user_api_key_cache",
         new=AsyncMock(return_value=team_obj),
     ) as mock_team_obj:
 
@@ -1029,9 +1027,9 @@ async def test_create_team_member_add(prisma_client, new_member_method):
         mock_litellm_usertable.find_many = AsyncMock(return_value=None)
         team_mock_client = AsyncMock()
         original_val = getattr(
-            litellm.proxy.proxy_server.prisma_client.db, "litellm_teamtable"
+            litellm_proxy.proxy_server.prisma_client.db, "litellm_teamtable"
         )
-        litellm.proxy.proxy_server.prisma_client.db.litellm_teamtable = team_mock_client
+        litellm_proxy.proxy_server.prisma_client.db.litellm_teamtable = team_mock_client
 
         team_mock_client.update = AsyncMock(
             return_value=LiteLLM_TeamTableCachedObj(team_id="1234")
@@ -1059,7 +1057,7 @@ async def test_create_team_member_add(prisma_client, new_member_method):
             == litellm.internal_user_budget_duration
         )
 
-        litellm.proxy.proxy_server.prisma_client.db.litellm_teamtable = original_val
+        litellm_proxy.proxy_server.prisma_client.db.litellm_teamtable = original_val
 
 
 @pytest.mark.parametrize("team_member_role", ["admin", "user"])
@@ -1072,19 +1070,19 @@ async def test_create_team_member_add_team_admin_user_api_key_auth(
 
     from fastapi import Request
 
-    from litellm.proxy._types import LiteLLM_TeamTableCachedObj, Member
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy._types import LiteLLM_TeamTableCachedObj, Member
+    from litellm_proxy.proxy_server import (
         ProxyException,
         hash_token,
         user_api_key_auth,
         user_api_key_cache,
     )
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     setattr(litellm, "max_internal_user_budget", 10)
     setattr(litellm, "internal_user_budget_duration", "5m")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
     user = f"ishaan {uuid.uuid4().hex}"
     _team_id = "litellm-test-client-id-new"
     user_key = "sk-12345678"
@@ -1106,7 +1104,7 @@ async def test_create_team_member_add_team_admin_user_api_key_auth(
 
     user_api_key_cache.set_cache(key="team_id:{}".format(_team_id), value=team_obj)
 
-    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
+    setattr(litellm_proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
 
     ## TEST IF TEAM ADMIN ALLOWED TO CALL /MEMBER_ADD ENDPOINT
     import json
@@ -1142,12 +1140,12 @@ async def test_create_team_member_add_team_admin(
 
     from fastapi import Request
 
-    from litellm.proxy._types import (
+    from litellm_proxy._types import (
         LiteLLM_TeamTableCachedObj,
         LiteLLM_UserTable,
         Member,
     )
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         HTTPException,
         ProxyException,
         hash_token,
@@ -1155,11 +1153,11 @@ async def test_create_team_member_add_team_admin(
         user_api_key_cache,
     )
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
     setattr(litellm, "max_internal_user_budget", 10)
     setattr(litellm, "internal_user_budget_duration", "5m")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    await litellm_proxy.proxy_server.prisma_client.connect()
     user = f"ishaan {uuid.uuid4().hex}"
     _team_id = "litellm-test-client-id-new"
     user_key = "sk-12345678"
@@ -1183,7 +1181,7 @@ async def test_create_team_member_add_team_admin(
 
     user_api_key_cache.set_cache(key="team_id:{}".format(_team_id), value=team_obj)
 
-    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
+    setattr(litellm_proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
     if new_member_method == "user_id":
         data = {
             "team_id": _team_id,
@@ -1197,10 +1195,10 @@ async def test_create_team_member_add_team_admin(
     team_member_add_request = TeamMemberAddRequest(**data)
 
     with patch(
-        "litellm.proxy.proxy_server.prisma_client.db.litellm_usertable",
+        "litellm_proxy.proxy_server.prisma_client.db.litellm_usertable",
         new_callable=AsyncMock,
     ) as mock_litellm_usertable, patch(
-        "litellm.proxy.auth.auth_checks._get_team_object_from_user_api_key_cache",
+        "litellm_proxy.auth.auth_checks._get_team_object_from_user_api_key_cache",
         new=AsyncMock(return_value=team_obj),
     ) as mock_team_obj:
         mock_client = AsyncMock(
@@ -1213,9 +1211,9 @@ async def test_create_team_member_add_team_admin(
 
         team_mock_client = AsyncMock()
         original_val = getattr(
-            litellm.proxy.proxy_server.prisma_client.db, "litellm_teamtable"
+            litellm_proxy.proxy_server.prisma_client.db, "litellm_teamtable"
         )
-        litellm.proxy.proxy_server.prisma_client.db.litellm_teamtable = team_mock_client
+        litellm_proxy.proxy_server.prisma_client.db.litellm_teamtable = team_mock_client
 
         team_mock_client.update = AsyncMock(
             return_value=LiteLLM_TeamTableCachedObj(team_id="1234")
@@ -1250,22 +1248,22 @@ async def test_create_team_member_add_team_admin(
             == litellm.internal_user_budget_duration
         )
 
-        litellm.proxy.proxy_server.prisma_client.db.litellm_teamtable = original_val
+        litellm_proxy.proxy_server.prisma_client.db.litellm_teamtable = original_val
 
 
 @pytest.mark.asyncio
 async def test_user_info_team_list(prisma_client):
     """Assert user_info for admin calls team_list function"""
-    from litellm.proxy._types import LiteLLM_UserTable
+    from litellm_proxy._types import LiteLLM_UserTable
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
-    from litellm.proxy.management_endpoints.internal_user_endpoints import user_info
+    from litellm_proxy.management_endpoints.internal_user_endpoints import user_info
 
     with patch(
-        "litellm.proxy.management_endpoints.team_endpoints.list_team",
+        "litellm_proxy.management_endpoints.team_endpoints.list_team",
         new_callable=AsyncMock,
     ) as mock_client:
 
@@ -1303,11 +1301,11 @@ async def test_add_callback_via_key(prisma_client):
     from fastapi import HTTPException, Request, Response
     from starlette.datastructures import URL
 
-    from litellm.proxy.proxy_server import chat_completion
+    from litellm_proxy.proxy_server import chat_completion
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
     litellm.set_verbose = True
 
@@ -1396,13 +1394,13 @@ async def test_add_callback_via_key_litellm_pre_call_utils(
     from fastapi import HTTPException, Request, Response
     from starlette.datastructures import URL
 
-    from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
+    from litellm_proxy.litellm_pre_call_utils import add_litellm_data_to_request
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
-    proxy_config = getattr(litellm.proxy.proxy_server, "proxy_config")
+    proxy_config = getattr(litellm_proxy.proxy_server, "proxy_config")
 
     request = Request(scope={"type": "http", "method": "POST", "headers": {}})
     request._url = URL(url="/chat/completions")
@@ -1519,7 +1517,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils(
     ],
 )
 async def test_disable_fallbacks_by_key(disable_fallbacks_set):
-    from litellm.proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
+    from litellm_proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
 
     key_metadata = {"disable_fallbacks": disable_fallbacks_set}
     existing_data = {
@@ -1552,13 +1550,13 @@ async def test_add_callback_via_key_litellm_pre_call_utils_gcs_bucket(
     from fastapi import HTTPException, Request, Response
     from starlette.datastructures import URL
 
-    from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
+    from litellm_proxy.litellm_pre_call_utils import add_litellm_data_to_request
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
-    proxy_config = getattr(litellm.proxy.proxy_server, "proxy_config")
+    proxy_config = getattr(litellm_proxy.proxy_server, "proxy_config")
 
     request = Request(scope={"type": "http", "method": "POST", "headers": {}})
     request._url = URL(url="/chat/completions")
@@ -1685,13 +1683,13 @@ async def test_add_callback_via_key_litellm_pre_call_utils_langsmith(
     from fastapi import HTTPException, Request, Response
     from starlette.datastructures import URL
 
-    from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
+    from litellm_proxy.litellm_pre_call_utils import add_litellm_data_to_request
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
-    proxy_config = getattr(litellm.proxy.proxy_server, "proxy_config")
+    proxy_config = getattr(litellm_proxy.proxy_server, "proxy_config")
 
     request = Request(scope={"type": "http", "method": "POST", "headers": {}})
     request._url = URL(url="/chat/completions")
@@ -1805,7 +1803,7 @@ async def test_add_callback_via_key_litellm_pre_call_utils_langsmith(
 async def test_gemini_pass_through_endpoint():
     from starlette.datastructures import URL
 
-    from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+    from litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints import (
         Request,
         Response,
         gemini_proxy_route,
@@ -1865,13 +1863,13 @@ async def test_proxy_model_group_alias_checks(prisma_client, hidden):
     from fastapi import HTTPException, Request, Response
     from starlette.datastructures import URL
 
-    from litellm.proxy.proxy_server import model_group_info, model_info_v1, model_list
+    from litellm_proxy.proxy_server import model_group_info, model_info_v1, model_list
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
-    proxy_config = getattr(litellm.proxy.proxy_server, "proxy_config")
+    proxy_config = getattr(litellm_proxy.proxy_server, "proxy_config")
 
     _model_list = [
         {
@@ -1884,8 +1882,8 @@ async def test_proxy_model_group_alias_checks(prisma_client, hidden):
         model_list=_model_list,
         model_group_alias={model_alias: {"model": "gpt-3.5-turbo", "hidden": hidden}},
     )
-    setattr(litellm.proxy.proxy_server, "llm_router", router)
-    setattr(litellm.proxy.proxy_server, "llm_model_list", _model_list)
+    setattr(litellm_proxy.proxy_server, "llm_router", router)
+    setattr(litellm_proxy.proxy_server, "llm_model_list", _model_list)
 
     request = Request(scope={"type": "http", "method": "POST", "headers": {}})
     request._url = URL(url="/v1/models")
@@ -1945,13 +1943,13 @@ async def test_proxy_model_group_info_rerank(prisma_client):
     from fastapi import HTTPException, Request, Response
     from starlette.datastructures import URL
 
-    from litellm.proxy.proxy_server import model_group_info, model_info_v1, model_list
+    from litellm_proxy.proxy_server import model_group_info, model_info_v1, model_list
 
-    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    await litellm.proxy.proxy_server.prisma_client.connect()
+    setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    await litellm_proxy.proxy_server.prisma_client.connect()
 
-    proxy_config = getattr(litellm.proxy.proxy_server, "proxy_config")
+    proxy_config = getattr(litellm_proxy.proxy_server, "proxy_config")
 
     _model_list = [
         {
@@ -1963,8 +1961,8 @@ async def test_proxy_model_group_info_rerank(prisma_client):
         }
     ]
     router = litellm.Router(model_list=_model_list)
-    setattr(litellm.proxy.proxy_server, "llm_router", router)
-    setattr(litellm.proxy.proxy_server, "llm_model_list", _model_list)
+    setattr(litellm_proxy.proxy_server, "llm_router", router)
+    setattr(litellm_proxy.proxy_server, "llm_model_list", _model_list)
 
     request = Request(scope={"type": "http", "method": "POST", "headers": {}})
     request._url = URL(url="/v1/models")
@@ -1995,19 +1993,19 @@ async def test_proxy_model_group_info_rerank(prisma_client):
 #     """
 #     Add 10 people to a team. Confirm all 10 are added.
 #     """
-#     from litellm.proxy.management_endpoints.team_endpoints import (
+#     from litellm_proxy.management_endpoints.team_endpoints import (
 #         team_member_add,
 #         new_team,
 #     )
-#     from litellm.proxy._types import TeamMemberAddRequest, Member, NewTeamRequest
+#     from litellm_proxy._types import TeamMemberAddRequest, Member, NewTeamRequest
 
-#     setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
-#     setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+#     setattr(litellm_proxy.proxy_server, "prisma_client", prisma_client)
+#     setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
 #     try:
 
 #         async def test():
-#             await litellm.proxy.proxy_server.prisma_client.connect()
-#             from litellm.proxy.proxy_server import user_api_key_cache
+#             await litellm_proxy.proxy_server.prisma_client.connect()
+#             from litellm_proxy.proxy_server import user_api_key_cache
 
 #             user_api_key_dict = UserAPIKeyAuth(
 #                 user_role=LitellmUserRoles.PROXY_ADMIN,
@@ -2070,14 +2068,14 @@ async def test_proxy_model_group_info_rerank(prisma_client):
 
 @pytest.mark.asyncio
 async def test_proxy_server_prisma_setup():
-    from litellm.proxy.proxy_server import ProxyStartupEvent, proxy_state
-    from litellm.proxy.utils import ProxyLogging
+    from litellm_proxy.proxy_server import ProxyStartupEvent, proxy_state
+    from litellm_proxy.utils import ProxyLogging
     from litellm.caching import DualCache
 
     user_api_key_cache = DualCache()
 
     with patch.object(
-        litellm.proxy.proxy_server, "PrismaClient", new=MagicMock()
+        litellm_proxy.proxy_server, "PrismaClient", new=MagicMock()
     ) as mock_prisma_client:
         mock_client = mock_prisma_client.return_value  # This is the mocked instance
         mock_client.connect = AsyncMock()  # Mock the connect method
@@ -2113,8 +2111,8 @@ async def test_proxy_server_prisma_setup_invalid_db():
 
     Think 2-3 times before editing / deleting this test, it's important for PROD
     """
-    from litellm.proxy.proxy_server import ProxyStartupEvent
-    from litellm.proxy.utils import ProxyLogging
+    from litellm_proxy.proxy_server import ProxyStartupEvent
+    from litellm_proxy.utils import ProxyLogging
     from litellm.caching import DualCache
 
     user_api_key_cache = DualCache()
@@ -2145,8 +2143,8 @@ async def test_get_ui_settings_spend_logs_threshold():
     """
     Test that get_ui_settings correctly sets DISABLE_EXPENSIVE_DB_QUERIES based on spend_logs_row_count threshold
     """
-    from litellm.proxy.management_endpoints.ui_sso import get_ui_settings
-    from litellm.proxy.proxy_server import proxy_state
+    from litellm_proxy.management_endpoints.ui_sso import get_ui_settings
+    from litellm_proxy.proxy_server import proxy_state
     from fastapi import Request
     from litellm.constants import MAX_SPENDLOG_ROWS_TO_QUERY
 
@@ -2195,7 +2193,7 @@ async def test_get_ui_settings_spend_logs_threshold():
 
 
 def test_get_timeout_from_request():
-    from litellm.proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
+    from litellm_proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
 
     headers = {
         "x-litellm-timeout": "90",
diff --git a/tests/proxy_unit_tests/test_proxy_server_caching.py b/tests/proxy_unit_tests/test_proxy_server_caching.py
index d6f98d27b4..b480c66596 100644
--- a/tests/proxy_unit_tests/test_proxy_server_caching.py
+++ b/tests/proxy_unit_tests/test_proxy_server_caching.py
@@ -26,7 +26,7 @@ logging.basicConfig(
 # test /chat/completion request to the proxy
 from fastapi.testclient import TestClient
 from fastapi import FastAPI
-from litellm.proxy.proxy_server import (
+from litellm_proxy.proxy_server import (
     router,
     save_worker_config,
     initialize,
@@ -40,8 +40,8 @@ headers = {"Authorization": f"Bearer {token}"}
 
 @pytest.fixture(scope="function")
 def client_no_auth():
-    # Assuming litellm.proxy.proxy_server is an object
-    from litellm.proxy.proxy_server import cleanup_router_config_variables
+    # Assuming litellm_proxy.proxy_server is an object
+    from litellm_proxy.proxy_server import cleanup_router_config_variables
 
     cleanup_router_config_variables()
     filepath = os.path.dirname(os.path.abspath(__file__))
diff --git a/tests/proxy_unit_tests/test_proxy_server_langfuse.py b/tests/proxy_unit_tests/test_proxy_server_langfuse.py
index 171b40ef15..b6e87e4f88 100644
--- a/tests/proxy_unit_tests/test_proxy_server_langfuse.py
+++ b/tests/proxy_unit_tests/test_proxy_server_langfuse.py
@@ -31,7 +31,7 @@ from fastapi import FastAPI
 # test /chat/completion request to the proxy
 from fastapi.testclient import TestClient
 
-from litellm.proxy.proxy_server import (  # Replace with the actual module where your FastAPI router is defined
+from litellm_proxy.proxy_server import (  # Replace with the actual module where your FastAPI router is defined
     router,
     save_worker_config,
 )
diff --git a/tests/proxy_unit_tests/test_proxy_setting_guardrails.py b/tests/proxy_unit_tests/test_proxy_setting_guardrails.py
index b845f86b6e..5e96cdd1a0 100644
--- a/tests/proxy_unit_tests/test_proxy_setting_guardrails.py
+++ b/tests/proxy_unit_tests/test_proxy_setting_guardrails.py
@@ -19,7 +19,7 @@ from fastapi import Response
 from fastapi.testclient import TestClient
 
 import litellm
-from litellm.proxy.proxy_server import (  # Replace with the actual module where your FastAPI router is defined
+from litellm_proxy.proxy_server import (  # Replace with the actual module where your FastAPI router is defined
     initialize,
     router,
     save_worker_config,
@@ -31,7 +31,7 @@ def client():
     filepath = os.path.dirname(os.path.abspath(__file__))
     config_fp = f"{filepath}/test_configs/test_guardrails_config.yaml"
     asyncio.run(initialize(config=config_fp))
-    from litellm.proxy.proxy_server import app
+    from litellm_proxy.proxy_server import app
 
     return TestClient(app)
 
diff --git a/tests/proxy_unit_tests/test_proxy_token_counter.py b/tests/proxy_unit_tests/test_proxy_token_counter.py
index 11dededd6c..6841ccdf17 100644
--- a/tests/proxy_unit_tests/test_proxy_token_counter.py
+++ b/tests/proxy_unit_tests/test_proxy_token_counter.py
@@ -18,13 +18,13 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 import pytest, logging, asyncio
 import litellm, asyncio
-from litellm.proxy.proxy_server import token_counter
-from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
+from litellm_proxy.proxy_server import token_counter
+from litellm_proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend
 from litellm._logging import verbose_proxy_logger
 
 verbose_proxy_logger.setLevel(level=logging.DEBUG)
 
-from litellm.proxy._types import TokenCountRequest, TokenCountResponse
+from litellm_proxy._types import TokenCountRequest, TokenCountResponse
 
 
 from litellm import Router
@@ -51,7 +51,7 @@ async def test_vLLM_token_counting():
         ]
     )
 
-    setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
+    setattr(litellm_proxy.proxy_server, "llm_router", llm_router)
 
     response = await token_counter(
         request=TokenCountRequest(
@@ -86,7 +86,7 @@ async def test_token_counting_model_not_in_model_list():
         ]
     )
 
-    setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
+    setattr(litellm_proxy.proxy_server, "llm_router", llm_router)
 
     response = await token_counter(
         request=TokenCountRequest(
@@ -121,7 +121,7 @@ async def test_gpt_token_counting():
         ]
     )
 
-    setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
+    setattr(litellm_proxy.proxy_server, "llm_router", llm_router)
 
     response = await token_counter(
         request=TokenCountRequest(
diff --git a/tests/proxy_unit_tests/test_proxy_utils.py b/tests/proxy_unit_tests/test_proxy_utils.py
index 1281d50863..a3245277ed 100644
--- a/tests/proxy_unit_tests/test_proxy_utils.py
+++ b/tests/proxy_unit_tests/test_proxy_utils.py
@@ -3,7 +3,7 @@ import os
 import sys
 from typing import Any, Dict, Optional, List
 from unittest.mock import Mock
-from litellm.proxy.utils import _get_redoc_url, _get_docs_url
+from litellm_proxy.utils import _get_redoc_url, _get_docs_url
 import json
 import pytest
 from fastapi import Request
@@ -14,9 +14,9 @@ sys.path.insert(
 import litellm
 from unittest.mock import MagicMock, patch, AsyncMock
 
-from litellm.proxy._types import LitellmUserRoles, UserAPIKeyAuth
-from litellm.proxy.auth.auth_utils import is_request_body_safe
-from litellm.proxy.litellm_pre_call_utils import (
+from litellm_proxy._types import LitellmUserRoles, UserAPIKeyAuth
+from litellm_proxy.auth.auth_utils import is_request_body_safe
+from litellm_proxy.litellm_pre_call_utils import (
     _get_dynamic_logging_metadata,
     add_litellm_data_to_request,
 )
@@ -29,7 +29,7 @@ def mock_request(monkeypatch):
     mock_request.query_params = {}  # Set mock query_params to an empty dictionary
     mock_request.headers = {"traceparent": "test_traceparent"}
     monkeypatch.setattr(
-        "litellm.proxy.litellm_pre_call_utils.add_litellm_data_to_request", mock_request
+        "litellm_proxy.litellm_pre_call_utils.add_litellm_data_to_request", mock_request
     )
     return mock_request
 
@@ -92,7 +92,7 @@ async def test_traceparent_not_added_by_default(endpoint, mock_request):
     from litellm.integrations.opentelemetry import OpenTelemetry
 
     otel_logger = OpenTelemetry()
-    setattr(litellm.proxy.proxy_server, "open_telemetry_logger", otel_logger)
+    setattr(litellm_proxy.proxy_server, "open_telemetry_logger", otel_logger)
 
     mock_request.url.path = endpoint
     user_api_key_dict = UserAPIKeyAuth(
@@ -110,7 +110,7 @@ async def test_traceparent_not_added_by_default(endpoint, mock_request):
     _extra_headers = data.get("extra_headers") or {}
     assert "traceparent" not in _extra_headers
 
-    setattr(litellm.proxy.proxy_server, "open_telemetry_logger", None)
+    setattr(litellm_proxy.proxy_server, "open_telemetry_logger", None)
 
 
 @pytest.mark.parametrize(
@@ -232,7 +232,7 @@ def test_dynamic_logging_metadata_key_and_team_metadata(callback_vars):
     os.environ["LANGFUSE_PUBLIC_KEY_TEMP"] = "pk-lf-9636b7a6-c066"
     os.environ["LANGFUSE_SECRET_KEY_TEMP"] = "sk-lf-7cc8b620"
     os.environ["LANGFUSE_HOST_TEMP"] = "https://us.cloud.langfuse.com"
-    from litellm.proxy.proxy_server import ProxyConfig
+    from litellm_proxy.proxy_server import ProxyConfig
 
     proxy_config = ProxyConfig()
     user_api_key_dict = UserAPIKeyAuth(
@@ -314,7 +314,7 @@ def test_dynamic_logging_metadata_key_and_team_metadata(callback_vars):
     ],
 )
 def test_dynamic_turn_off_message_logging(callback_vars):
-    from litellm.proxy.proxy_server import ProxyConfig
+    from litellm_proxy.proxy_server import ProxyConfig
 
     proxy_config = ProxyConfig()
     user_api_key_dict = UserAPIKeyAuth(
@@ -460,7 +460,7 @@ def test_is_request_body_safe_model_enabled(
 
 
 def test_reading_openai_org_id_from_headers():
-    from litellm.proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
+    from litellm_proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
 
     headers = {
         "OpenAI-Organization": "test_org_id",
@@ -488,8 +488,8 @@ def test_reading_openai_org_id_from_headers():
 )
 def test_add_litellm_data_for_backend_llm_call(headers, expected_data):
     import json
-    from litellm.proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
-    from litellm.proxy._types import UserAPIKeyAuth
+    from litellm_proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
+    from litellm_proxy._types import UserAPIKeyAuth
 
     user_api_key_dict = UserAPIKeyAuth(
         api_key="test_api_key", user_id="test_user_id", org_id="test_org_id"
@@ -509,8 +509,8 @@ def test_foward_litellm_user_info_to_backend_llm_call():
 
     litellm.add_user_information_to_llm_headers = True
 
-    from litellm.proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
-    from litellm.proxy._types import UserAPIKeyAuth
+    from litellm_proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
+    from litellm_proxy._types import UserAPIKeyAuth
 
     user_api_key_dict = UserAPIKeyAuth(
         api_key="test_api_key", user_id="test_user_id", org_id="test_org_id"
@@ -531,10 +531,10 @@ def test_foward_litellm_user_info_to_backend_llm_call():
 
 
 def test_update_internal_user_params():
-    from litellm.proxy.management_endpoints.internal_user_endpoints import (
+    from litellm_proxy.management_endpoints.internal_user_endpoints import (
         _update_internal_new_user_params,
     )
-    from litellm.proxy._types import NewUserRequest
+    from litellm_proxy._types import NewUserRequest
 
     litellm.default_internal_user_params = {
         "max_budget": 100,
@@ -558,7 +558,7 @@ def test_update_internal_user_params():
 
 @pytest.mark.asyncio
 async def test_proxy_config_update_from_db():
-    from litellm.proxy.proxy_server import ProxyConfig
+    from litellm_proxy.proxy_server import ProxyConfig
     from pydantic import BaseModel
 
     proxy_config = ProxyConfig()
@@ -602,10 +602,10 @@ async def test_proxy_config_update_from_db():
 
 
 def test_prepare_key_update_data():
-    from litellm.proxy.management_endpoints.key_management_endpoints import (
+    from litellm_proxy.management_endpoints.key_management_endpoints import (
         prepare_key_update_data,
     )
-    from litellm.proxy._types import UpdateKeyRequest
+    from litellm_proxy._types import UpdateKeyRequest
 
     existing_key_row = MagicMock()
     data = UpdateKeyRequest(key="test_key", models=["gpt-4"], duration="120s")
@@ -691,7 +691,7 @@ def test_get_docs_url(env_vars, expected_url):
     ],
 )
 def test_merge_tags(request_tags, tags_to_add, expected_tags):
-    from litellm.proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
+    from litellm_proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
 
     result = LiteLLMProxyRequestSetup._merge_tags(
         request_tags=request_tags, tags_to_add=tags_to_add
@@ -855,7 +855,7 @@ async def test_add_litellm_data_to_request_duplicate_tags(
 def test_enforced_params_check(
     general_settings, user_api_key_dict, request_body, expected_error
 ):
-    from litellm.proxy.litellm_pre_call_utils import _enforced_params_check
+    from litellm_proxy.litellm_pre_call_utils import _enforced_params_check
 
     if expected_error:
         with pytest.raises(ValueError):
@@ -875,7 +875,7 @@ def test_enforced_params_check(
 
 
 def test_get_key_models():
-    from litellm.proxy.auth.model_checks import get_key_models
+    from litellm_proxy.auth.model_checks import get_key_models
     from collections import defaultdict
 
     user_api_key_dict = UserAPIKeyAuth(
@@ -899,7 +899,7 @@ def test_get_key_models():
 
 
 def test_get_team_models():
-    from litellm.proxy.auth.model_checks import get_team_models
+    from litellm_proxy.auth.model_checks import get_team_models
     from collections import defaultdict
 
     user_api_key_dict = UserAPIKeyAuth(
@@ -925,7 +925,7 @@ def test_get_team_models():
 
 
 def test_update_config_fields():
-    from litellm.proxy.proxy_server import ProxyConfig
+    from litellm_proxy.proxy_server import ProxyConfig
 
     proxy_config = ProxyConfig()
 
@@ -979,7 +979,7 @@ def test_get_complete_model_list(proxy_model_list, provider):
     """
     Test that get_complete_model_list correctly expands model groups like 'openai/*' into individual models with provider prefixes
     """
-    from litellm.proxy.auth.model_checks import get_complete_model_list
+    from litellm_proxy.auth.model_checks import get_complete_model_list
 
     complete_list = get_complete_model_list(
         proxy_model_list=proxy_model_list,
@@ -999,7 +999,7 @@ def test_get_complete_model_list(proxy_model_list, provider):
 
 
 def test_team_callback_metadata_all_none_values():
-    from litellm.proxy._types import TeamCallbackMetadata
+    from litellm_proxy._types import TeamCallbackMetadata
 
     resp = TeamCallbackMetadata(
         success_callback=None,
@@ -1021,7 +1021,7 @@ def test_team_callback_metadata_all_none_values():
     ],
 )
 def test_team_callback_metadata_none_values(none_key):
-    from litellm.proxy._types import TeamCallbackMetadata
+    from litellm_proxy._types import TeamCallbackMetadata
 
     if none_key == "success_callback":
         args = {
@@ -1055,8 +1055,8 @@ def test_proxy_config_state_post_init_callback_call():
 
     Where team_id was being popped from config, after callback was called
     """
-    from litellm.proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
-    from litellm.proxy.proxy_server import ProxyConfig
+    from litellm_proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
+    from litellm_proxy.proxy_server import ProxyConfig
 
     pc = ProxyConfig()
 
@@ -1088,7 +1088,7 @@ def test_proxy_config_state_get_config_state_error():
     """
     Ensures that get_config_state does not raise an error when the config is not a valid dictionary
     """
-    from litellm.proxy.proxy_server import ProxyConfig
+    from litellm_proxy.proxy_server import ProxyConfig
     import threading
 
     test_config = {
@@ -1142,7 +1142,7 @@ def test_litellm_verification_token_view_response_with_budget_table(
     expected_user_api_key_auth_key,
     expected_user_api_key_auth_value,
 ):
-    from litellm.proxy._types import LiteLLM_VerificationTokenView
+    from litellm_proxy._types import LiteLLM_VerificationTokenView
 
     args: Dict[str, Any] = {
         "token": "78b627d4d14bc3acf5571ae9cb6834e661bc8794d1209318677387add7621ce1",
@@ -1194,8 +1194,8 @@ def test_litellm_verification_token_view_response_with_budget_table(
 
 
 def test_is_allowed_to_make_key_request():
-    from litellm.proxy._types import LitellmUserRoles
-    from litellm.proxy.management_endpoints.key_management_endpoints import (
+    from litellm_proxy._types import LitellmUserRoles
+    from litellm_proxy.management_endpoints.key_management_endpoints import (
         _is_allowed_to_make_key_request,
     )
 
@@ -1225,7 +1225,7 @@ def test_is_allowed_to_make_key_request():
 
 
 def test_get_model_group_info():
-    from litellm.proxy.proxy_server import _get_model_group_info
+    from litellm_proxy.proxy_server import _get_model_group_info
     from litellm import Router
 
     router = Router(
@@ -1310,14 +1310,14 @@ class MockPrismaClientDB:
 @pytest.mark.asyncio
 async def test_get_user_info_for_proxy_admin(mock_team_data, mock_key_data):
     # Patch the prisma_client import
-    from litellm.proxy._types import UserInfoResponse
+    from litellm_proxy._types import UserInfoResponse
 
     with patch(
-        "litellm.proxy.proxy_server.prisma_client",
+        "litellm_proxy.proxy_server.prisma_client",
         MockPrismaClientDB(mock_team_data, mock_key_data),
     ):
 
-        from litellm.proxy.management_endpoints.internal_user_endpoints import (
+        from litellm_proxy.management_endpoints.internal_user_endpoints import (
             _get_user_info_for_proxy_admin,
         )
 
@@ -1330,9 +1330,9 @@ async def test_get_user_info_for_proxy_admin(mock_team_data, mock_key_data):
 
 
 def test_custom_openid_response():
-    from litellm.proxy.management_endpoints.ui_sso import generic_response_convertor
-    from litellm.proxy.management_endpoints.ui_sso import JWTHandler
-    from litellm.proxy._types import LiteLLM_JWTAuth
+    from litellm_proxy.management_endpoints.ui_sso import generic_response_convertor
+    from litellm_proxy.management_endpoints.ui_sso import JWTHandler
+    from litellm_proxy._types import LiteLLM_JWTAuth
     from litellm.caching import DualCache
 
     jwt_handler = JWTHandler()
@@ -1365,7 +1365,7 @@ def test_update_key_request_validation():
     """
     Ensures that the UpdateKeyRequest model validates the temp_budget_increase and temp_budget_expiry fields together
     """
-    from litellm.proxy._types import UpdateKeyRequest
+    from litellm_proxy._types import UpdateKeyRequest
 
     with pytest.raises(Exception):
         UpdateKeyRequest(
@@ -1387,8 +1387,8 @@ def test_update_key_request_validation():
 
 
 def test_get_temp_budget_increase():
-    from litellm.proxy.auth.user_api_key_auth import _get_temp_budget_increase
-    from litellm.proxy._types import UserAPIKeyAuth
+    from litellm_proxy.auth.user_api_key_auth import _get_temp_budget_increase
+    from litellm_proxy._types import UserAPIKeyAuth
     from datetime import datetime, timedelta
 
     expiry = datetime.now() + timedelta(days=1)
@@ -1406,10 +1406,10 @@ def test_get_temp_budget_increase():
 
 
 def test_update_key_budget_with_temp_budget_increase():
-    from litellm.proxy.auth.user_api_key_auth import (
+    from litellm_proxy.auth.user_api_key_auth import (
         _update_key_budget_with_temp_budget_increase,
     )
-    from litellm.proxy._types import UserAPIKeyAuth
+    from litellm_proxy._types import UserAPIKeyAuth
     from datetime import datetime, timedelta
 
     expiry = datetime.now() + timedelta(days=1)
@@ -1431,7 +1431,7 @@ from unittest.mock import MagicMock, AsyncMock
 
 @pytest.mark.asyncio
 async def test_health_check_not_called_when_disabled(monkeypatch):
-    from litellm.proxy.proxy_server import ProxyStartupEvent
+    from litellm_proxy.proxy_server import ProxyStartupEvent
 
     # Mock environment variable
     monkeypatch.setenv("DISABLE_PRISMA_HEALTH_CHECK_ON_STARTUP", "true")
@@ -1444,7 +1444,7 @@ async def test_health_check_not_called_when_disabled(monkeypatch):
     mock_prisma._set_spend_logs_row_count_in_proxy_state = AsyncMock()
     # Mock PrismaClient constructor
     monkeypatch.setattr(
-        "litellm.proxy.proxy_server.PrismaClient", lambda **kwargs: mock_prisma
+        "litellm_proxy.proxy_server.PrismaClient", lambda **kwargs: mock_prisma
     )
 
     # Call the setup function
@@ -1459,7 +1459,7 @@ async def test_health_check_not_called_when_disabled(monkeypatch):
 
 
 @patch(
-    "litellm.proxy.proxy_server.get_openapi_schema",
+    "litellm_proxy.proxy_server.get_openapi_schema",
     return_value={
         "paths": {
             "/new/route": {"get": {"summary": "New"}},
@@ -1467,8 +1467,8 @@ async def test_health_check_not_called_when_disabled(monkeypatch):
     },
 )
 def test_custom_openapi(mock_get_openapi_schema):
-    from litellm.proxy.proxy_server import custom_openapi
-    from litellm.proxy.proxy_server import app
+    from litellm_proxy.proxy_server import custom_openapi
+    from litellm_proxy.proxy_server import app
 
     openapi_schema = custom_openapi()
     assert openapi_schema is not None
@@ -1478,7 +1478,7 @@ import pytest
 from unittest.mock import MagicMock, AsyncMock
 import asyncio
 from datetime import timedelta
-from litellm.proxy.utils import ProxyUpdateSpend
+from litellm_proxy.utils import ProxyUpdateSpend
 
 
 @pytest.mark.asyncio
@@ -1529,7 +1529,7 @@ async def test_spend_logs_cleanup_after_error():
 
 
 def test_provider_specific_header():
-    from litellm.proxy.litellm_pre_call_utils import (
+    from litellm_proxy.litellm_pre_call_utils import (
         add_provider_specific_headers_to_request,
     )
 
@@ -1593,7 +1593,7 @@ def test_provider_specific_header():
     }
 
 
-from litellm.proxy._types import LiteLLM_UserTable
+from litellm_proxy._types import LiteLLM_UserTable
 
 
 @pytest.mark.parametrize(
@@ -1610,7 +1610,7 @@ from litellm.proxy._types import LiteLLM_UserTable
     ],
 )
 def test_get_known_models_from_wildcard(wildcard_model, expected_models):
-    from litellm.proxy.auth.model_checks import get_known_models_from_wildcard
+    from litellm_proxy.auth.model_checks import get_known_models_from_wildcard
 
     wildcard_models = get_known_models_from_wildcard(wildcard_model=wildcard_model)
     # Check if all expected models are in the returned list
@@ -1658,7 +1658,7 @@ def test_get_known_models_from_wildcard(wildcard_model, expected_models):
     ],
 )
 def test_update_model_if_team_alias_exists(data, user_api_key_dict, expected_model):
-    from litellm.proxy.litellm_pre_call_utils import _update_model_if_team_alias_exists
+    from litellm_proxy.litellm_pre_call_utils import _update_model_if_team_alias_exists
 
     # Make a copy of the input data to avoid modifying the test parameters
     test_data = data.copy()
@@ -1767,7 +1767,7 @@ async def test_get_admin_team_ids(
     should_query_db: bool,
     mock_prisma_client,
 ):
-    from litellm.proxy.management_endpoints.key_management_endpoints import (
+    from litellm_proxy.management_endpoints.key_management_endpoints import (
         get_admin_team_ids,
     )
 
diff --git a/tests/proxy_unit_tests/test_unit_test_max_model_budget_limiter.py b/tests/proxy_unit_tests/test_unit_test_max_model_budget_limiter.py
index fc8373a174..d627a0c23e 100644
--- a/tests/proxy_unit_tests/test_unit_test_max_model_budget_limiter.py
+++ b/tests/proxy_unit_tests/test_unit_test_max_model_budget_limiter.py
@@ -20,10 +20,10 @@ from datetime import datetime
 from unittest.mock import AsyncMock, patch
 import pytest
 from litellm.caching.caching import DualCache
-from litellm.proxy.hooks.model_max_budget_limiter import (
+from litellm_proxy.hooks.model_max_budget_limiter import (
     _PROXY_VirtualKeyModelMaxBudgetLimiter,
 )
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 import litellm
 
 
diff --git a/tests/proxy_unit_tests/test_unit_test_proxy_hooks.py b/tests/proxy_unit_tests/test_unit_test_proxy_hooks.py
index 46863889d2..85dd7da9ac 100644
--- a/tests/proxy_unit_tests/test_unit_test_proxy_hooks.py
+++ b/tests/proxy_unit_tests/test_unit_test_proxy_hooks.py
@@ -4,7 +4,7 @@ import sys
 from unittest.mock import Mock, patch, AsyncMock
 import pytest
 from fastapi import Request
-from litellm.proxy.utils import _get_redoc_url, _get_docs_url
+from litellm_proxy.utils import _get_redoc_url, _get_docs_url
 from datetime import datetime
 
 sys.path.insert(0, os.path.abspath("../.."))
@@ -20,10 +20,10 @@ async def test_disable_spend_logs():
     mock_prisma_client = Mock()
     mock_prisma_client.spend_log_transactions = []
 
-    with patch("litellm.proxy.proxy_server.disable_spend_logs", True), patch(
-        "litellm.proxy.proxy_server.prisma_client", mock_prisma_client
+    with patch("litellm_proxy.proxy_server.disable_spend_logs", True), patch(
+        "litellm_proxy.proxy_server.prisma_client", mock_prisma_client
     ):
-        from litellm.proxy.db.db_spend_update_writer import DBSpendUpdateWriter
+        from litellm_proxy.db.db_spend_update_writer import DBSpendUpdateWriter
         db_spend_update_writer = DBSpendUpdateWriter()
 
         # Call update_database with disable_spend_logs=True
diff --git a/tests/proxy_unit_tests/test_update_spend.py b/tests/proxy_unit_tests/test_update_spend.py
index 1fb2479792..7cdbc7a720 100644
--- a/tests/proxy_unit_tests/test_update_spend.py
+++ b/tests/proxy_unit_tests/test_update_spend.py
@@ -2,7 +2,7 @@ import asyncio
 import os
 import sys
 from unittest.mock import Mock
-from litellm.proxy.utils import _get_redoc_url, _get_docs_url
+from litellm_proxy.utils import _get_redoc_url, _get_docs_url
 
 import pytest
 from fastapi import Request
@@ -15,7 +15,7 @@ from unittest.mock import MagicMock, patch, AsyncMock
 
 
 import httpx
-from litellm.proxy.utils import update_spend, DB_CONNECTION_ERROR_TYPES
+from litellm_proxy.utils import update_spend, DB_CONNECTION_ERROR_TYPES
 
 
 class MockPrismaClient:
diff --git a/tests/proxy_unit_tests/test_user_api_key_auth.py b/tests/proxy_unit_tests/test_user_api_key_auth.py
index f0ca27c946..bf621451a4 100644
--- a/tests/proxy_unit_tests/test_user_api_key_auth.py
+++ b/tests/proxy_unit_tests/test_user_api_key_auth.py
@@ -3,9 +3,7 @@
 
 import os
 import sys
-
-import litellm.proxy
-import litellm.proxy.proxy_server
+import litellm_proxy.proxy_server
 
 sys.path.insert(
     0, os.path.abspath("../..")
@@ -18,14 +16,14 @@ from starlette.datastructures import URL
 from litellm._logging import verbose_proxy_logger
 import logging
 import litellm
-from litellm.proxy.auth.user_api_key_auth import (
+from litellm_proxy.auth.user_api_key_auth import (
     user_api_key_auth,
     UserAPIKeyAuth,
     get_api_key_from_custom_header,
 )
 from fastapi import WebSocket, HTTPException, status
 
-from litellm.proxy._types import LiteLLM_UserTable, LitellmUserRoles
+from litellm_proxy._types import LiteLLM_UserTable, LitellmUserRoles
 
 
 class Request:
@@ -53,7 +51,7 @@ class Request:
 def test_check_valid_ip(
     allowed_ips: Optional[List[str]], client_ip: Optional[str], expected_result: bool
 ):
-    from litellm.proxy.auth.auth_utils import _check_valid_ip
+    from litellm_proxy.auth.auth_utils import _check_valid_ip
 
     request = Request(client_ip)
 
@@ -81,7 +79,7 @@ def test_check_valid_ip(
 def test_check_valid_ip_sent_with_x_forwarded_for(
     allowed_ips: Optional[List[str]], client_ip: Optional[str], expected_result: bool
 ):
-    from litellm.proxy.auth.auth_utils import _check_valid_ip
+    from litellm_proxy.auth.auth_utils import _check_valid_ip
 
     request = Request(client_ip, headers={"X-Forwarded-For": client_ip})
 
@@ -103,13 +101,13 @@ async def test_check_blocked_team():
     from fastapi import Request
     from starlette.datastructures import URL
 
-    from litellm.proxy._types import (
+    from litellm_proxy._types import (
         LiteLLM_TeamTable,
         LiteLLM_TeamTableCachedObj,
         UserAPIKeyAuth,
     )
-    from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-    from litellm.proxy.proxy_server import hash_token, user_api_key_cache
+    from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+    from litellm_proxy.proxy_server import hash_token, user_api_key_cache
 
     _team_id = "1234"
     user_key = "sk-12345678"
@@ -129,9 +127,9 @@ async def test_check_blocked_team():
     user_api_key_cache.set_cache(key=hashed_token, value=valid_token)
     user_api_key_cache.set_cache(key="team_id:{}".format(_team_id), value=team_obj)
 
-    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "prisma_client", "hello-world")
+    setattr(litellm_proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", "hello-world")
 
     request = Request(scope={"type": "http"})
     request._url = URL(url="/chat/completions")
@@ -149,8 +147,8 @@ async def test_check_blocked_team():
 )
 @pytest.mark.asyncio
 async def test_returned_user_api_key_auth(user_role, expected_role):
-    from litellm.proxy._types import LiteLLM_UserTable, LitellmUserRoles
-    from litellm.proxy.auth.user_api_key_auth import _return_user_api_key_auth_obj
+    from litellm_proxy._types import LiteLLM_UserTable, LitellmUserRoles
+    from litellm_proxy.auth.user_api_key_auth import _return_user_api_key_auth_obj
     from datetime import datetime
 
     new_obj = await _return_user_api_key_auth_obj(
@@ -183,9 +181,9 @@ async def test_aaauser_personal_budgets(key_ownership):
     from starlette.datastructures import URL
     import litellm
 
-    from litellm.proxy._types import LiteLLM_UserTable, UserAPIKeyAuth
-    from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-    from litellm.proxy.proxy_server import hash_token, user_api_key_cache
+    from litellm_proxy._types import LiteLLM_UserTable, UserAPIKeyAuth
+    from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+    from litellm_proxy.proxy_server import hash_token, user_api_key_cache
 
     _user_id = "1234"
     user_key = "sk-12345678"
@@ -213,14 +211,14 @@ async def test_aaauser_personal_budgets(key_ownership):
     user_api_key_cache.set_cache(key=hash_token(user_key), value=valid_token)
     user_api_key_cache.set_cache(key="{}".format(_user_id), value=user_obj)
 
-    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "prisma_client", "hello-world")
+    setattr(litellm_proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", "hello-world")
 
     request = Request(scope={"type": "http"})
     request._url = URL(url="/chat/completions")
 
-    test_user_cache = getattr(litellm.proxy.proxy_server, "user_api_key_cache")
+    test_user_cache = getattr(litellm_proxy.proxy_server, "user_api_key_cache")
 
     assert test_user_cache.get_cache(key=hash_token(user_key)) == valid_token
 
@@ -247,7 +245,7 @@ async def test_user_api_key_auth_fails_with_prohibited_params(prohibited_param):
     # Setup
     user_key = "sk-1234"
 
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
 
     # Create request with prohibited parameter in body
     request = Request(scope={"type": "http"})
@@ -287,7 +285,7 @@ async def test_auth_with_allowed_routes(route, should_raise_error):
     general_settings = {"allowed_routes": ["/embeddings"]}
     from fastapi import Request
 
-    from litellm.proxy import proxy_server
+    from litellm_proxy import proxy_server
 
     initial_general_settings = getattr(proxy_server, "general_settings")
 
@@ -332,8 +330,8 @@ async def test_auth_with_allowed_routes(route, should_raise_error):
     ],
 )
 def test_is_ui_route_allowed(route, user_role, expected_result):
-    from litellm.proxy.auth.auth_checks import _is_ui_route
-    from litellm.proxy._types import LiteLLM_UserTable
+    from litellm_proxy.auth.auth_checks import _is_ui_route
+    from litellm_proxy._types import LiteLLM_UserTable
 
     user_obj = LiteLLM_UserTable(
         user_id="3b803c0e-666e-4e99-bd5c-6e534c07e297",
@@ -370,8 +368,8 @@ def test_is_ui_route_allowed(route, user_role, expected_result):
     ],
 )
 def test_is_api_route_allowed(route, user_role, expected_result):
-    from litellm.proxy.auth.auth_checks import _is_api_route_allowed
-    from litellm.proxy._types import LiteLLM_UserTable
+    from litellm_proxy.auth.auth_checks import _is_api_route_allowed
+    from litellm_proxy._types import LiteLLM_UserTable
 
     user_obj = LiteLLM_UserTable(
         user_id="3b803c0e-666e-4e99-bd5c-6e534c07e297",
@@ -409,16 +407,16 @@ async def test_auth_not_connected_to_db():
     from fastapi import Request
     from starlette.datastructures import URL
 
-    from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-    from litellm.proxy.proxy_server import hash_token, user_api_key_cache
+    from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+    from litellm_proxy.proxy_server import hash_token, user_api_key_cache
 
     user_key = "sk-12345678"
 
-    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "prisma_client", None)
+    setattr(litellm_proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", None)
     setattr(
-        litellm.proxy.proxy_server,
+        litellm_proxy.proxy_server,
         "general_settings",
         {"allow_requests_on_db_unavailable": True},
     )
@@ -462,7 +460,7 @@ def test_get_api_key_from_custom_header(headers, custom_header_name, expected_ap
     assert api_key == expected_api_key
 
 
-from litellm.proxy._types import LitellmUserRoles
+from litellm_proxy._types import LitellmUserRoles
 
 
 @pytest.mark.parametrize(
@@ -478,8 +476,8 @@ from litellm.proxy._types import LitellmUserRoles
 def test_allowed_route_inside_route(
     user_role, auth_user_id, requested_user_id, expected_result
 ):
-    from litellm.proxy.auth.auth_checks import allowed_route_check_inside_route
-    from litellm.proxy._types import UserAPIKeyAuth, LitellmUserRoles
+    from litellm_proxy.auth.auth_checks import allowed_route_check_inside_route
+    from litellm_proxy._types import UserAPIKeyAuth, LitellmUserRoles
 
     assert (
         allowed_route_check_inside_route(
@@ -491,7 +489,7 @@ def test_allowed_route_inside_route(
 
 
 def test_read_request_body():
-    from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
+    from litellm_proxy.common_utils.http_parsing_utils import _read_request_body
     from fastapi import Request
 
     payload = "()" * 1000000
@@ -514,7 +512,7 @@ async def test_auth_with_form_data_and_model():
     """
     from fastapi import Request
     from starlette.datastructures import URL, FormData
-    from litellm.proxy.proxy_server import (
+    from litellm_proxy.proxy_server import (
         hash_token,
         user_api_key_cache,
         user_api_key_auth,
@@ -532,9 +530,9 @@ async def test_auth_with_form_data_and_model():
     # Store the virtual key in cache
     user_api_key_cache.set_cache(key=hash_token(user_key), value=valid_token)
 
-    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "prisma_client", "hello-world")
+    setattr(litellm_proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", "hello-world")
 
     # Create request with form data
     request = Request(
@@ -570,9 +568,9 @@ async def test_soft_budget_alert():
     from fastapi import Request
     from starlette.datastructures import URL
 
-    from litellm.proxy._types import UserAPIKeyAuth
-    from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-    from litellm.proxy.proxy_server import hash_token, user_api_key_cache
+    from litellm_proxy._types import UserAPIKeyAuth
+    from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
+    from litellm_proxy.proxy_server import hash_token, user_api_key_cache
 
     # Setup
     user_key = "sk-12345"
@@ -591,9 +589,9 @@ async def test_soft_budget_alert():
     user_api_key_cache.set_cache(key=hash_token(user_key), value=valid_token)
 
     # Mock proxy server settings
-    setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
-    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
-    setattr(litellm.proxy.proxy_server, "prisma_client", AsyncMock())
+    setattr(litellm_proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
+    setattr(litellm_proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm_proxy.proxy_server, "prisma_client", AsyncMock())
 
     # Create request
     request = Request(scope={"type": "http"})
@@ -601,7 +599,7 @@ async def test_soft_budget_alert():
 
     # Track if budget_alerts was called
     alert_called = False
-    original_budget_alerts = litellm.proxy.proxy_server.proxy_logging_obj.budget_alerts
+    original_budget_alerts = litellm_proxy.proxy_server.proxy_logging_obj.budget_alerts
 
     async def mock_budget_alerts(*args, **kwargs):
         nonlocal alert_called
@@ -611,7 +609,7 @@ async def test_soft_budget_alert():
 
     # Patch the budget_alerts method
     setattr(
-        litellm.proxy.proxy_server.proxy_logging_obj,
+        litellm_proxy.proxy_server.proxy_logging_obj,
         "budget_alerts",
         mock_budget_alerts,
     )
@@ -631,15 +629,15 @@ async def test_soft_budget_alert():
     finally:
         # Restore original budget_alerts
         setattr(
-            litellm.proxy.proxy_server.proxy_logging_obj,
+            litellm_proxy.proxy_server.proxy_logging_obj,
             "budget_alerts",
             original_budget_alerts,
         )
 
 
 def test_is_allowed_route():
-    from litellm.proxy.auth.auth_checks import _is_allowed_route
-    from litellm.proxy._types import UserAPIKeyAuth
+    from litellm_proxy.auth.auth_checks import _is_allowed_route
+    from litellm_proxy._types import UserAPIKeyAuth
     import datetime
 
     request = MagicMock()
@@ -736,7 +734,7 @@ def test_is_allowed_route():
     ],
 )
 def test_is_user_proxy_admin(user_obj, expected_result):
-    from litellm.proxy.auth.auth_checks import _is_user_proxy_admin
+    from litellm_proxy.auth.auth_checks import _is_user_proxy_admin
 
     assert _is_user_proxy_admin(user_obj) == expected_result
 
@@ -768,14 +766,14 @@ def test_is_user_proxy_admin(user_obj, expected_result):
     ],
 )
 def test_get_user_role(user_obj, expected_role):
-    from litellm.proxy.auth.user_api_key_auth import _get_user_role
+    from litellm_proxy.auth.user_api_key_auth import _get_user_role
 
     assert _get_user_role(user_obj) == expected_role
 
 
 @pytest.mark.asyncio
 async def test_user_api_key_auth_websocket():
-    from litellm.proxy.auth.user_api_key_auth import user_api_key_auth_websocket
+    from litellm_proxy.auth.user_api_key_auth import user_api_key_auth_websocket
 
     # Prepare a mock WebSocket object
     mock_websocket = MagicMock(spec=WebSocket)
@@ -784,7 +782,7 @@ async def test_user_api_key_auth_websocket():
 
     # Mock the return value of `user_api_key_auth` when it's called within the `user_api_key_auth_websocket` function
     with patch(
-        "litellm.proxy.auth.user_api_key_auth.user_api_key_auth", autospec=True
+        "litellm_proxy.auth.user_api_key_auth.user_api_key_auth", autospec=True
     ) as mock_user_api_key_auth:
 
         # Make the call to the WebSocket function
@@ -801,9 +799,9 @@ async def test_user_api_key_auth_websocket():
 @pytest.mark.parametrize("enforce_rbac", [True, False])
 @pytest.mark.asyncio
 async def test_jwt_user_api_key_auth_builder_enforce_rbac(enforce_rbac, monkeypatch):
-    from litellm.proxy.auth.handle_jwt import JWTHandler, JWTAuthManager
+    from litellm_proxy.auth.handle_jwt import JWTHandler, JWTAuthManager
     from unittest.mock import patch, Mock
-    from litellm.proxy._types import LiteLLM_JWTAuth
+    from litellm_proxy._types import LiteLLM_JWTAuth
     from litellm.caching import DualCache
 
     monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "my-fake-url")
@@ -868,7 +866,7 @@ async def test_jwt_user_api_key_auth_builder_enforce_rbac(enforce_rbac, monkeypa
 
 
 def test_user_api_key_auth_end_user_str():
-    from litellm.proxy.auth.user_api_key_auth import UserAPIKeyAuth
+    from litellm_proxy.auth.user_api_key_auth import UserAPIKeyAuth
 
     user_api_key_args = {
         "api_key": "sk-1234",
@@ -883,8 +881,8 @@ def test_user_api_key_auth_end_user_str():
 
 
 def test_can_rbac_role_call_model():
-    from litellm.proxy.auth.handle_jwt import JWTAuthManager
-    from litellm.proxy._types import RoleBasedPermissions
+    from litellm_proxy.auth.handle_jwt import JWTAuthManager
+    from litellm_proxy._types import RoleBasedPermissions
 
     roles_based_permissions = [
         RoleBasedPermissions(
@@ -919,7 +917,7 @@ def test_can_rbac_role_call_model():
 
 
 def test_can_rbac_role_call_model_no_role_permissions():
-    from litellm.proxy.auth.handle_jwt import JWTAuthManager
+    from litellm_proxy.auth.handle_jwt import JWTAuthManager
 
     assert JWTAuthManager.can_rbac_role_call_model(
         rbac_role=LitellmUserRoles.INTERNAL_USER,
@@ -946,7 +944,7 @@ def test_can_rbac_role_call_model_no_role_permissions():
     ],
 )
 def test_get_model_from_request(route, request_data, expected_model):
-    from litellm.proxy.auth.user_api_key_auth import get_model_from_request
+    from litellm_proxy.auth.user_api_key_auth import get_model_from_request
 
     assert get_model_from_request(request_data, route) == expected_model
 
@@ -959,9 +957,9 @@ async def test_jwt_non_admin_team_route_access(monkeypatch):
     from fastapi import Request, HTTPException
     from starlette.datastructures import URL
     from unittest.mock import patch
-    from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+    from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
     import json
-    from litellm.proxy._types import ProxyException
+    from litellm_proxy._types import ProxyException
 
     mock_jwt_response = {
         "is_proxy_admin": False,
@@ -983,12 +981,12 @@ async def test_jwt_non_admin_team_route_access(monkeypatch):
     request._url = URL(url="/team/new")
 
     monkeypatch.setattr(
-        litellm.proxy.proxy_server, "general_settings", {"enable_jwt_auth": True}
+        litellm_proxy.proxy_server, "general_settings", {"enable_jwt_auth": True}
     )
 
     # Mock JWTAuthManager.auth_builder
     with patch(
-        "litellm.proxy.auth.handle_jwt.JWTAuthManager.auth_builder",
+        "litellm_proxy.auth.handle_jwt.JWTAuthManager.auth_builder",
         return_value=mock_jwt_response,
     ):
         try:
diff --git a/tests/router_unit_tests/test_router_adding_deployments.py b/tests/router_unit_tests/test_router_adding_deployments.py
index 55481394bb..a33d911f59 100644
--- a/tests/router_unit_tests/test_router_adding_deployments.py
+++ b/tests/router_unit_tests/test_router_adding_deployments.py
@@ -61,7 +61,7 @@ def test_initialize_deployment_for_pass_through_success(reusable_credentials):
     )
 
     # Verify the credentials were properly set
-    from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+    from litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints import (
         passthrough_endpoint_router,
     )
 
@@ -148,7 +148,7 @@ def test_add_vertex_pass_through_deployment():
     router.add_deployment(deployment)
 
     # Get the vertex credentials from the router
-    from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+    from litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints import (
         passthrough_endpoint_router,
     )
 
diff --git a/tests/test_keys.py b/tests/test_keys.py
index 89b54ba92c..1918abb228 100644
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@@ -12,7 +12,7 @@ sys.path.insert(
     0, os.path.abspath("../")
 )  # Adds the parent directory to the system path
 import litellm
-from litellm.proxy._types import LitellmUserRoles
+from litellm_proxy._types import LitellmUserRoles
 
 
 async def generate_team(
@@ -626,7 +626,7 @@ async def test_key_with_budgets():
     - wait 10min (budget reset runs every 10mins.)
     - Check if value updated
     """
-    from litellm.proxy.utils import hash_token
+    from litellm_proxy.utils import hash_token
 
     async def retry_request(func, *args, _max_attempts=5, **kwargs):
         for attempt in range(_max_attempts):
@@ -673,7 +673,7 @@ async def test_key_crossing_budget():
 
     - Check if value updated
     """
-    from litellm.proxy.utils import hash_token
+    from litellm_proxy.utils import hash_token
 
     async with aiohttp.ClientSession() as session:
         key_gen = await generate_key(session=session, i=0, budget=0.0000001)