diff --git a/.circleci/config.yml b/.circleci/config.yml
index 5e0431e480..c31a07a22b 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -817,6 +817,7 @@ jobs:
       - run: python ./tests/documentation_tests/test_api_docs.py
       - run: python ./tests/code_coverage_tests/ensure_async_clients_test.py
       - run: python ./tests/code_coverage_tests/enforce_llms_folder_style.py
+      - run: python ./tests/documentation_tests/test_circular_imports.py
       - run: helm lint ./deploy/charts/litellm-helm
 
   db_migration_disable_update_check:
diff --git a/enterprise/enterprise_hooks/secret_detection.py b/enterprise/enterprise_hooks/secret_detection.py
index 414f3c4ddf..50ccccfde4 100644
--- a/enterprise/enterprise_hooks/secret_detection.py
+++ b/enterprise/enterprise_hooks/secret_detection.py
@@ -474,12 +474,9 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
         from detect_secrets import SecretsCollection
         from detect_secrets.settings import default_settings
 
-        print("INSIDE SECRET DETECTION PRE-CALL HOOK!")
-
         if await self.should_run_check(user_api_key_dict) is False:
             return
 
-        print("RUNNING CHECK!")
         if "messages" in data and isinstance(data["messages"], list):
             for message in data["messages"]:
                 if "content" in message and isinstance(message["content"], str):
diff --git a/litellm/__init__.py b/litellm/__init__.py
index 2d2b66795d..b8d722ad99 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -32,7 +32,7 @@ from litellm.proxy._types import (
     KeyManagementSettings,
     LiteLLM_UpperboundKeyGenerateParams,
 )
-from litellm.types.utils import StandardKeyGenerationConfig
+from litellm.types.utils import StandardKeyGenerationConfig, LlmProviders
 import httpx
 import dotenv
 from enum import Enum
@@ -838,71 +838,6 @@ model_list = (
 )
 
 
-class LlmProviders(str, Enum):
-    OPENAI = "openai"
-    OPENAI_LIKE = "openai_like"  # embedding only
-    JINA_AI = "jina_ai"
-    XAI = "xai"
-    CUSTOM_OPENAI = "custom_openai"
-    TEXT_COMPLETION_OPENAI = "text-completion-openai"
-    COHERE = "cohere"
-    COHERE_CHAT = "cohere_chat"
-    CLARIFAI = "clarifai"
-    ANTHROPIC = "anthropic"
-    ANTHROPIC_TEXT = "anthropic_text"
-    REPLICATE = "replicate"
-    HUGGINGFACE = "huggingface"
-    TOGETHER_AI = "together_ai"
-    OPENROUTER = "openrouter"
-    VERTEX_AI = "vertex_ai"
-    VERTEX_AI_BETA = "vertex_ai_beta"
-    GEMINI = "gemini"
-    AI21 = "ai21"
-    BASETEN = "baseten"
-    AZURE = "azure"
-    AZURE_TEXT = "azure_text"
-    AZURE_AI = "azure_ai"
-    SAGEMAKER = "sagemaker"
-    SAGEMAKER_CHAT = "sagemaker_chat"
-    BEDROCK = "bedrock"
-    VLLM = "vllm"
-    NLP_CLOUD = "nlp_cloud"
-    PETALS = "petals"
-    OOBABOOGA = "oobabooga"
-    OLLAMA = "ollama"
-    OLLAMA_CHAT = "ollama_chat"
-    DEEPINFRA = "deepinfra"
-    PERPLEXITY = "perplexity"
-    MISTRAL = "mistral"
-    GROQ = "groq"
-    NVIDIA_NIM = "nvidia_nim"
-    CEREBRAS = "cerebras"
-    AI21_CHAT = "ai21_chat"
-    VOLCENGINE = "volcengine"
-    CODESTRAL = "codestral"
-    TEXT_COMPLETION_CODESTRAL = "text-completion-codestral"
-    DEEPSEEK = "deepseek"
-    SAMBANOVA = "sambanova"
-    MARITALK = "maritalk"
-    VOYAGE = "voyage"
-    CLOUDFLARE = "cloudflare"
-    XINFERENCE = "xinference"
-    FIREWORKS_AI = "fireworks_ai"
-    FRIENDLIAI = "friendliai"
-    WATSONX = "watsonx"
-    WATSONX_TEXT = "watsonx_text"
-    TRITON = "triton"
-    PREDIBASE = "predibase"
-    DATABRICKS = "databricks"
-    EMPOWER = "empower"
-    GITHUB = "github"
-    CUSTOM = "custom"
-    LITELLM_PROXY = "litellm_proxy"
-    HOSTED_VLLM = "hosted_vllm"
-    LM_STUDIO = "lm_studio"
-    GALADRIEL = "galadriel"
-
-
 provider_list: List[Union[LlmProviders, str]] = list(LlmProviders)
 
 
diff --git a/litellm/adapters/anthropic_adapter.py b/litellm/adapters/anthropic_adapter.py
index 47fba36309..b8ce225ada 100644
--- a/litellm/adapters/anthropic_adapter.py
+++ b/litellm/adapters/anthropic_adapter.py
@@ -18,7 +18,7 @@ from litellm.types.llms.anthropic import (
     AnthropicResponse,
     ContentBlockDelta,
 )
-from litellm.types.utils import AdapterCompletionStreamWrapper
+from litellm.types.utils import AdapterCompletionStreamWrapper, ModelResponse
 
 
 class AnthropicAdapter(CustomLogger):
@@ -41,7 +41,7 @@ class AnthropicAdapter(CustomLogger):
         return translated_body
 
     def translate_completion_output_params(
-        self, response: litellm.ModelResponse
+        self, response: ModelResponse
     ) -> Optional[AnthropicResponse]:
 
         return litellm.AnthropicExperimentalPassThroughConfig().translate_openai_response_to_anthropic(
diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index 4f6bf5c19c..2e6edb7571 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -484,7 +484,7 @@ def completion_cost(  # noqa: PLR0915
         completion_characters: Optional[int] = None
         cache_creation_input_tokens: Optional[int] = None
         cache_read_input_tokens: Optional[int] = None
-        cost_per_token_usage_object: Optional[litellm.Usage] = _get_usage_object(
+        cost_per_token_usage_object: Optional[Usage] = _get_usage_object(
             completion_response=completion_response
         )
         if completion_response is not None and (
@@ -492,7 +492,7 @@ def completion_cost(  # noqa: PLR0915
             or isinstance(completion_response, dict)
         ):  # tts returns a custom class
 
-            usage_obj: Optional[Union[dict, litellm.Usage]] = completion_response.get(  # type: ignore
+            usage_obj: Optional[Union[dict, Usage]] = completion_response.get(  # type: ignore
                 "usage", {}
             )
             if isinstance(usage_obj, BaseModel) and not isinstance(
diff --git a/litellm/integrations/SlackAlerting/slack_alerting.py b/litellm/integrations/SlackAlerting/slack_alerting.py
index d585e235b7..bd3c3b8253 100644
--- a/litellm/integrations/SlackAlerting/slack_alerting.py
+++ b/litellm/integrations/SlackAlerting/slack_alerting.py
@@ -39,6 +39,7 @@ from litellm.proxy._types import (
     VirtualKeyEvent,
     WebhookEvent,
 )
+from litellm.router import Router
 from litellm.types.integrations.slack_alerting import *
 from litellm.types.router import LiteLLM_Params
 
@@ -93,7 +94,7 @@ class SlackAlerting(CustomBatchLogger):
         alert_types: Optional[List[AlertType]] = None,
         alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]] = None,
         alerting_args: Optional[Dict] = None,
-        llm_router: Optional[litellm.Router] = None,
+        llm_router: Optional[Router] = None,
     ):
         if alerting is not None:
             self.alerting = alerting
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index d534d4da3b..23ebb6ccd5 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -18,6 +18,7 @@ from pydantic import BaseModel
 
 import litellm
 from litellm import (
+    _custom_logger_compatible_callbacks_literal,
     json_logs,
     log_raw_request_response,
     turn_off_message_logging,
@@ -41,6 +42,7 @@ from litellm.types.utils import (
     CallTypes,
     EmbeddingResponse,
     ImageResponse,
+    LiteLLMLoggingBaseClass,
     ModelResponse,
     StandardCallbackDynamicParams,
     StandardLoggingAdditionalHeaders,
@@ -190,7 +192,7 @@ in_memory_trace_id_cache = ServiceTraceIDCache()
 in_memory_dynamic_logger_cache = DynamicLoggingCache()
 
 
-class Logging:
+class Logging(LiteLLMLoggingBaseClass):
     global supabaseClient, promptLayerLogger, weightsBiasesLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app
     custom_pricing: bool = False
     stream_options = None
@@ -2142,7 +2144,7 @@ def set_callbacks(callback_list, function_id=None):  # noqa: PLR0915
 
 
 def _init_custom_logger_compatible_class(  # noqa: PLR0915
-    logging_integration: litellm._custom_logger_compatible_callbacks_literal,
+    logging_integration: _custom_logger_compatible_callbacks_literal,
     internal_usage_cache: Optional[DualCache],
     llm_router: Optional[
         Any
@@ -2362,7 +2364,7 @@ def _init_custom_logger_compatible_class(  # noqa: PLR0915
 
 
 def get_custom_logger_compatible_class(  # noqa: PLR0915
-    logging_integration: litellm._custom_logger_compatible_callbacks_literal,
+    logging_integration: _custom_logger_compatible_callbacks_literal,
 ) -> Optional[CustomLogger]:
     if logging_integration == "lago":
         for callback in _in_memory_loggers:
diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py
index 9877c683c9..79933a462e 100644
--- a/litellm/litellm_core_utils/prompt_templates/factory.py
+++ b/litellm/litellm_core_utils/prompt_templates/factory.py
@@ -13,7 +13,6 @@ from jinja2.sandbox import ImmutableSandboxedEnvironment
 import litellm
 import litellm.types
 import litellm.types.llms
-import litellm.types.llms.vertex_ai
 from litellm import verbose_logger
 from litellm.llms.custom_httpx.http_handler import HTTPHandler
 from litellm.types.completion import (
@@ -40,6 +39,9 @@ from litellm.types.llms.openai import (
     ChatCompletionUserMessage,
     OpenAIMessageContentListBlock,
 )
+from litellm.types.llms.vertex_ai import FunctionCall as VertexFunctionCall
+from litellm.types.llms.vertex_ai import FunctionResponse as VertexFunctionResponse
+from litellm.types.llms.vertex_ai import PartType as VertexPartType
 from litellm.types.utils import GenericImageParsingChunk
 
 from .common_utils import convert_content_list_to_str, is_non_content_values_set
@@ -965,11 +967,11 @@ def infer_protocol_value(
 
 def _gemini_tool_call_invoke_helper(
     function_call_params: ChatCompletionToolCallFunctionChunk,
-) -> Optional[litellm.types.llms.vertex_ai.FunctionCall]:
+) -> Optional[VertexFunctionCall]:
     name = function_call_params.get("name", "") or ""
     arguments = function_call_params.get("arguments", "")
     arguments_dict = json.loads(arguments)
-    function_call = litellm.types.llms.vertex_ai.FunctionCall(
+    function_call = VertexFunctionCall(
         name=name,
         args=arguments_dict,
     )
@@ -978,7 +980,7 @@ def _gemini_tool_call_invoke_helper(
 
 def convert_to_gemini_tool_call_invoke(
     message: ChatCompletionAssistantMessage,
-) -> List[litellm.types.llms.vertex_ai.PartType]:
+) -> List[VertexPartType]:
     """
     OpenAI tool invokes:
     {
@@ -1019,22 +1021,20 @@ def convert_to_gemini_tool_call_invoke(
     - json.load the arguments
     """
     try:
-        _parts_list: List[litellm.types.llms.vertex_ai.PartType] = []
+        _parts_list: List[VertexPartType] = []
         tool_calls = message.get("tool_calls", None)
         function_call = message.get("function_call", None)
         if tool_calls is not None:
             for tool in tool_calls:
                 if "function" in tool:
-                    gemini_function_call: Optional[
-                        litellm.types.llms.vertex_ai.FunctionCall
-                    ] = _gemini_tool_call_invoke_helper(
-                        function_call_params=tool["function"]
+                    gemini_function_call: Optional[VertexFunctionCall] = (
+                        _gemini_tool_call_invoke_helper(
+                            function_call_params=tool["function"]
+                        )
                     )
                     if gemini_function_call is not None:
                         _parts_list.append(
-                            litellm.types.llms.vertex_ai.PartType(
-                                function_call=gemini_function_call
-                            )
+                            VertexPartType(function_call=gemini_function_call)
                         )
                     else:  # don't silently drop params. Make it clear to user what's happening.
                         raise Exception(
@@ -1047,11 +1047,7 @@ def convert_to_gemini_tool_call_invoke(
                 function_call_params=function_call
             )
             if gemini_function_call is not None:
-                _parts_list.append(
-                    litellm.types.llms.vertex_ai.PartType(
-                        function_call=gemini_function_call
-                    )
-                )
+                _parts_list.append(VertexPartType(function_call=gemini_function_call))
             else:  # don't silently drop params. Make it clear to user what's happening.
                 raise Exception(
                     "function_call missing. Received tool call with 'type': 'function'. No function call in argument - {}".format(
@@ -1070,7 +1066,7 @@ def convert_to_gemini_tool_call_invoke(
 def convert_to_gemini_tool_call_result(
     message: Union[ChatCompletionToolMessage, ChatCompletionFunctionMessage],
     last_message_with_tool_calls: Optional[dict],
-) -> litellm.types.llms.vertex_ai.PartType:
+) -> VertexPartType:
     """
     OpenAI message with a tool result looks like:
     {
@@ -1119,11 +1115,11 @@ def convert_to_gemini_tool_call_result(
 
     # We can't determine from openai message format whether it's a successful or
     # error call result so default to the successful result template
-    _function_response = litellm.types.llms.vertex_ai.FunctionResponse(
+    _function_response = VertexFunctionResponse(
         name=name, response={"content": content_str}  # type: ignore
     )
 
-    _part = litellm.types.llms.vertex_ai.PartType(function_response=_function_response)
+    _part = VertexPartType(function_response=_function_response)
 
     return _part
 
diff --git a/litellm/llms/anthropic/experimental_pass_through/transformation.py b/litellm/llms/anthropic/experimental_pass_through/transformation.py
index 5de232e1eb..7880827cce 100644
--- a/litellm/llms/anthropic/experimental_pass_through/transformation.py
+++ b/litellm/llms/anthropic/experimental_pass_through/transformation.py
@@ -5,6 +5,12 @@ from typing import Any, Dict, List, Literal, Optional, Tuple, Union
 from openai.types.chat.chat_completion_chunk import Choice as OpenAIStreamingChoice
 
 import litellm
+from litellm.litellm_core_utils.prompt_templates.factory import (
+    anthropic_messages_pt,
+    custom_prompt,
+    prompt_factory,
+)
+from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
 from litellm.types.llms.anthropic import (
     AllAnthropicToolsValues,
     AnthopicMessagesAssistantMessageParam,
@@ -53,15 +59,9 @@ from litellm.types.llms.openai import (
     ChatCompletionUserMessage,
     OpenAIMessageContent,
 )
-from litellm.types.utils import Choices, GenericStreamingChunk
-from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
+from litellm.types.utils import Choices, GenericStreamingChunk, ModelResponse, Usage
 
 from ...base import BaseLLM
-from litellm.litellm_core_utils.prompt_templates.factory import (
-    anthropic_messages_pt,
-    custom_prompt,
-    prompt_factory,
-)
 
 
 class AnthropicExperimentalPassThroughConfig:
@@ -338,7 +338,7 @@ class AnthropicExperimentalPassThroughConfig:
         return "end_turn"
 
     def translate_openai_response_to_anthropic(
-        self, response: litellm.ModelResponse
+        self, response: ModelResponse
     ) -> AnthropicResponse:
         ## translate content block
         anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices)  # type: ignore
@@ -347,7 +347,7 @@ class AnthropicExperimentalPassThroughConfig:
             openai_finish_reason=response.choices[0].finish_reason  # type: ignore
         )
         # extract usage
-        usage: litellm.Usage = getattr(response, "usage")
+        usage: Usage = getattr(response, "usage")
         anthropic_usage = AnthropicResponseUsageBlock(
             input_tokens=usage.prompt_tokens or 0,
             output_tokens=usage.completion_tokens or 0,
@@ -393,7 +393,7 @@ class AnthropicExperimentalPassThroughConfig:
             return "text_delta", ContentTextBlockDelta(type="text_delta", text=text)
 
     def translate_streaming_openai_response_to_anthropic(
-        self, response: litellm.ModelResponse
+        self, response: ModelResponse
     ) -> Union[ContentBlockDelta, MessageBlockDelta]:
         ## base case - final chunk w/ finish reason
         if response.choices[0].finish_reason is not None:
@@ -403,7 +403,7 @@ class AnthropicExperimentalPassThroughConfig:
                 ),
             )
             if getattr(response, "usage", None) is not None:
-                litellm_usage_chunk: Optional[litellm.Usage] = response.usage  # type: ignore
+                litellm_usage_chunk: Optional[Usage] = response.usage  # type: ignore
             elif (
                 hasattr(response, "_hidden_params")
                 and "usage" in response._hidden_params
diff --git a/litellm/llms/azure/azure.py b/litellm/llms/azure/azure.py
index 33261a7a7e..ffef8007e6 100644
--- a/litellm/llms/azure/azure.py
+++ b/litellm/llms/azure/azure.py
@@ -17,10 +17,14 @@ from litellm.llms.custom_httpx.http_handler import (
     HTTPHandler,
     get_async_httpx_client,
 )
-from litellm.types.utils import EmbeddingResponse
+from litellm.types.utils import (
+    EmbeddingResponse,
+    ImageResponse,
+    LlmProviders,
+    ModelResponse,
+)
 from litellm.utils import (
     CustomStreamWrapper,
-    ModelResponse,
     UnsupportedParamsError,
     convert_to_model_response_object,
     get_secret,
@@ -853,7 +857,7 @@ class AzureChatCompletion(BaseLLM):
         client=None,
         aembedding=None,
         headers: Optional[dict] = None,
-    ) -> litellm.EmbeddingResponse:
+    ) -> EmbeddingResponse:
         if headers:
             optional_params["extra_headers"] = headers
         if self._client_session is None:
@@ -963,7 +967,7 @@ class AzureChatCompletion(BaseLLM):
                 _params["timeout"] = httpx.Timeout(timeout=600.0, connect=5.0)
 
             async_handler = get_async_httpx_client(
-                llm_provider=litellm.LlmProviders.AZURE,
+                llm_provider=LlmProviders.AZURE,
                 params=_params,
             )
         else:
@@ -1242,11 +1246,11 @@ class AzureChatCompletion(BaseLLM):
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
         api_version: Optional[str] = None,
-        model_response: Optional[litellm.utils.ImageResponse] = None,
+        model_response: Optional[ImageResponse] = None,
         azure_ad_token: Optional[str] = None,
         client=None,
         aimg_generation=None,
-    ) -> litellm.ImageResponse:
+    ) -> ImageResponse:
         try:
             if model and len(model) > 0:
                 model = model
@@ -1510,7 +1514,7 @@ class AzureChatCompletion(BaseLLM):
     ) -> dict:
         client_session = (
             litellm.aclient_session
-            or get_async_httpx_client(llm_provider=litellm.LlmProviders.AZURE).client
+            or get_async_httpx_client(llm_provider=LlmProviders.AZURE).client
         )  # handle dall-e-2 calls
 
         if "gateway.ai.cloudflare.com" in api_base:
diff --git a/litellm/llms/azure/chat/gpt_transformation.py b/litellm/llms/azure/chat/gpt_transformation.py
index bd737af2fb..5af1a675aa 100644
--- a/litellm/llms/azure/chat/gpt_transformation.py
+++ b/litellm/llms/azure/chat/gpt_transformation.py
@@ -4,7 +4,11 @@ from typing import TYPE_CHECKING, Any, List, Optional, Type, Union
 from httpx._models import Headers, Response
 
 import litellm
+from litellm.litellm_core_utils.prompt_templates.factory import (
+    convert_to_azure_openai_messages,
+)
 from litellm.llms.base_llm.transformation import BaseLLMException
+from litellm.types.utils import ModelResponse
 
 from ....exceptions import UnsupportedParamsError
 from ....types.llms.openai import (
@@ -14,9 +18,7 @@ from ....types.llms.openai import (
     ChatCompletionToolParam,
     ChatCompletionToolParamFunctionChunk,
 )
-
 from ...base_llm.transformation import BaseConfig
-from litellm.litellm_core_utils.prompt_templates.factory import convert_to_azure_openai_messages
 from ..common_utils import AzureOpenAIError
 
 if TYPE_CHECKING:
@@ -26,6 +28,7 @@ if TYPE_CHECKING:
 else:
     LoggingClass = Any
 
+
 class AzureOpenAIConfig(BaseConfig):
     """
     Reference: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions
@@ -221,7 +224,7 @@ class AzureOpenAIConfig(BaseConfig):
         self,
         model: str,
         raw_response: Response,
-        model_response: litellm.ModelResponse,
+        model_response: ModelResponse,
         logging_obj: LoggingClass,
         request_data: dict,
         messages: List[AllMessageValues],
@@ -230,7 +233,7 @@ class AzureOpenAIConfig(BaseConfig):
         encoding: Any,
         api_key: Optional[str] = None,
         json_mode: Optional[bool] = None,
-    ) -> litellm.ModelResponse:
+    ) -> ModelResponse:
         raise NotImplementedError(
             "Azure OpenAI handler.py has custom logic for transforming response, as it uses the OpenAI SDK."
         )
diff --git a/litellm/llms/azure_ai/embed/handler.py b/litellm/llms/azure_ai/embed/handler.py
index 5b86f0b255..f5c3a1a6b5 100644
--- a/litellm/llms/azure_ai/embed/handler.py
+++ b/litellm/llms/azure_ai/embed/handler.py
@@ -89,7 +89,7 @@ class AzureAIEmbedding(OpenAIChatCompletion):
 
         embedding_response = response.json()
         embedding_headers = dict(response.headers)
-        returned_response: litellm.EmbeddingResponse = convert_to_model_response_object(  # type: ignore
+        returned_response: EmbeddingResponse = convert_to_model_response_object(  # type: ignore
             response_object=embedding_response,
             model_response_object=model_response,
             response_type="embedding",
@@ -104,7 +104,7 @@ class AzureAIEmbedding(OpenAIChatCompletion):
         data: ImageEmbeddingRequest,
         timeout: float,
         logging_obj,
-        model_response: litellm.EmbeddingResponse,
+        model_response: EmbeddingResponse,
         optional_params: dict,
         api_key: Optional[str],
         api_base: Optional[str],
@@ -132,7 +132,7 @@ class AzureAIEmbedding(OpenAIChatCompletion):
 
         embedding_response = response.json()
         embedding_headers = dict(response.headers)
-        returned_response: litellm.EmbeddingResponse = convert_to_model_response_object(  # type: ignore
+        returned_response: EmbeddingResponse = convert_to_model_response_object(  # type: ignore
             response_object=embedding_response,
             model_response_object=model_response,
             response_type="embedding",
@@ -213,14 +213,14 @@ class AzureAIEmbedding(OpenAIChatCompletion):
         input: List,
         timeout: float,
         logging_obj,
-        model_response: litellm.EmbeddingResponse,
+        model_response: EmbeddingResponse,
         optional_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
         client=None,
         aembedding=None,
         max_retries: Optional[int] = None,
-    ) -> litellm.EmbeddingResponse:
+    ) -> EmbeddingResponse:
         """
         - Separate image url from text
         -> route image url call to `/image/embeddings`
diff --git a/litellm/llms/base.py b/litellm/llms/base.py
index 943b10182b..611b0aa902 100644
--- a/litellm/llms/base.py
+++ b/litellm/llms/base.py
@@ -5,6 +5,8 @@ import httpx
 import requests
 
 import litellm
+from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
+from litellm.types.utils import ModelResponse, TextCompletionResponse
 
 
 class BaseLLM:
@@ -15,7 +17,7 @@ class BaseLLM:
         self,
         model: str,
         response: Union[requests.Response, httpx.Response],
-        model_response: litellm.utils.ModelResponse,
+        model_response: ModelResponse,
         stream: bool,
         logging_obj: Any,
         optional_params: dict,
@@ -24,7 +26,7 @@ class BaseLLM:
         messages: list,
         print_verbose,
         encoding,
-    ) -> Union[litellm.utils.ModelResponse, litellm.utils.CustomStreamWrapper]:
+    ) -> Union[ModelResponse, CustomStreamWrapper]:
         """
         Helper function to process the response across sync + async completion calls
         """
@@ -34,7 +36,7 @@ class BaseLLM:
         self,
         model: str,
         response: Union[requests.Response, httpx.Response],
-        model_response: litellm.utils.TextCompletionResponse,
+        model_response: TextCompletionResponse,
         stream: bool,
         logging_obj: Any,
         optional_params: dict,
@@ -43,7 +45,7 @@ class BaseLLM:
         messages: list,
         print_verbose,
         encoding,
-    ) -> Union[litellm.utils.TextCompletionResponse, litellm.utils.CustomStreamWrapper]:
+    ) -> Union[TextCompletionResponse, CustomStreamWrapper]:
         """
         Helper function to process the response across sync + async completion calls
         """
diff --git a/litellm/llms/bedrock/chat/invoke_handler.py b/litellm/llms/bedrock/chat/invoke_handler.py
index a2a8228e27..963e3fca59 100644
--- a/litellm/llms/bedrock/chat/invoke_handler.py
+++ b/litellm/llms/bedrock/chat/invoke_handler.py
@@ -32,6 +32,17 @@ from litellm import verbose_logger
 from litellm.caching.caching import InMemoryCache
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.litellm_core_utils.litellm_logging import Logging
+from litellm.litellm_core_utils.prompt_templates.factory import (
+    _bedrock_converse_messages_pt,
+    _bedrock_tools_pt,
+    cohere_message_pt,
+    construct_tool_use_system_prompt,
+    contains_tag,
+    custom_prompt,
+    extract_between_tags,
+    parse_xml_params,
+    prompt_factory,
+)
 from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     HTTPHandler,
@@ -50,20 +61,10 @@ from litellm.types.llms.openai import (
     ChatCompletionUsageBlock,
 )
 from litellm.types.utils import GenericStreamingChunk as GChunk
-from litellm.utils import CustomStreamWrapper, ModelResponse, Usage, get_secret
+from litellm.types.utils import ModelResponse, Usage
+from litellm.utils import CustomStreamWrapper, get_secret
 
 from ..base_aws_llm import BaseAWSLLM
-from litellm.litellm_core_utils.prompt_templates.factory import (
-    _bedrock_converse_messages_pt,
-    _bedrock_tools_pt,
-    cohere_message_pt,
-    construct_tool_use_system_prompt,
-    contains_tag,
-    custom_prompt,
-    extract_between_tags,
-    parse_xml_params,
-    prompt_factory,
-)
 from ..common_utils import BedrockError, ModelResponseIterator, get_bedrock_tool_name
 from .converse_transformation import AmazonConverseConfig
 
@@ -1317,7 +1318,7 @@ class MockResponseIterator:  # for returning ai21 streaming responses
 
     def _chunk_parser(self, chunk_data: ModelResponse) -> GChunk:
         try:
-            chunk_usage: litellm.Usage = getattr(chunk_data, "usage")
+            chunk_usage: Usage = getattr(chunk_data, "usage")
             text = chunk_data.choices[0].message.content or ""  # type: ignore
             tool_use = None
             if self.json_mode is True:
diff --git a/litellm/llms/clarifai/chat/transformation.py b/litellm/llms/clarifai/chat/transformation.py
index 53ddfcdfa6..fac16f7ca6 100644
--- a/litellm/llms/clarifai/chat/transformation.py
+++ b/litellm/llms/clarifai/chat/transformation.py
@@ -5,9 +5,11 @@ from typing import TYPE_CHECKING, Any, AsyncIterator, Iterator, List, Optional,
 import httpx
 
 import litellm
+from litellm.litellm_core_utils.prompt_templates.common_utils import (
+    convert_content_list_to_str,
+)
 from litellm.llms.base_llm.base_model_iterator import FakeStreamResponseIterator
 from litellm.llms.base_llm.transformation import BaseConfig, BaseLLMException
-from litellm.litellm_core_utils.prompt_templates.common_utils import convert_content_list_to_str
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import (
     ChatCompletionToolCallChunk,
@@ -152,7 +154,7 @@ class ClarifaiConfig(BaseConfig):
         encoding: str,
         api_key: Optional[str] = None,
         json_mode: Optional[bool] = None,
-    ) -> litellm.ModelResponse:
+    ) -> ModelResponse:
         logging_obj.post_call(
             input=messages,
             api_key=api_key,
diff --git a/litellm/llms/codestral/completion/handler.py b/litellm/llms/codestral/completion/handler.py
index 43bbafaefe..e04da501bf 100644
--- a/litellm/llms/codestral/completion/handler.py
+++ b/litellm/llms/codestral/completion/handler.py
@@ -29,6 +29,7 @@ from litellm.llms.custom_httpx.http_handler import (
 )
 from litellm.llms.openai.completion.transformation import OpenAITextCompletionConfig
 from litellm.types.llms.databricks import GenericStreamingChunk
+from litellm.types.utils import TextChoices
 from litellm.utils import (
     Choices,
     CustomStreamWrapper,
@@ -169,7 +170,7 @@ class CodestralTextCompletion(BaseLLM):
             raise TextCompletionCodestralError(message=response.text, status_code=422)
 
         _original_choices = completion_response.get("choices", [])
-        _choices: List[litellm.utils.TextChoices] = []
+        _choices: List[TextChoices] = []
         for choice in _original_choices:
             # This is what 1 choice looks like from codestral API
             # {
diff --git a/litellm/llms/cohere/embed/handler.py b/litellm/llms/cohere/embed/handler.py
index afeba10b58..5258df2b7f 100644
--- a/litellm/llms/cohere/embed/handler.py
+++ b/litellm/llms/cohere/embed/handler.py
@@ -17,6 +17,7 @@ from litellm.llms.custom_httpx.http_handler import (
     get_async_httpx_client,
 )
 from litellm.types.llms.bedrock import CohereEmbeddingRequest
+from litellm.types.utils import EmbeddingResponse
 from litellm.utils import Choices, Message, ModelResponse, Usage
 
 from .transformation import CohereEmbeddingConfig
@@ -118,7 +119,7 @@ async def async_embedding(
 def embedding(
     model: str,
     input: list,
-    model_response: litellm.EmbeddingResponse,
+    model_response: EmbeddingResponse,
     logging_obj: LiteLLMLoggingObj,
     optional_params: dict,
     headers: dict,
diff --git a/litellm/llms/databricks/embed/handler.py b/litellm/llms/databricks/embed/handler.py
index 4ed5853762..284988ffec 100644
--- a/litellm/llms/databricks/embed/handler.py
+++ b/litellm/llms/databricks/embed/handler.py
@@ -21,7 +21,7 @@ class DatabricksEmbeddingHandler(OpenAILikeEmbeddingHandler, DatabricksBase):
         api_key: Optional[str],
         api_base: Optional[str],
         optional_params: dict,
-        model_response: Optional[litellm.utils.EmbeddingResponse] = None,
+        model_response: Optional[EmbeddingResponse] = None,
         client=None,
         aembedding=None,
         custom_endpoint: Optional[bool] = None,
diff --git a/litellm/llms/databricks/streaming_utils.py b/litellm/llms/databricks/streaming_utils.py
index 502f4a0912..b9f54c04dd 100644
--- a/litellm/llms/databricks/streaming_utils.py
+++ b/litellm/llms/databricks/streaming_utils.py
@@ -55,9 +55,7 @@ class ModelResponseIterator:
                 is_finished = True
                 finish_reason = processed_chunk.choices[0].finish_reason
 
-            usage_chunk: Optional[litellm.Usage] = getattr(
-                processed_chunk, "usage", None
-            )
+            usage_chunk: Optional[Usage] = getattr(processed_chunk, "usage", None)
             if usage_chunk is not None:
 
                 usage = ChatCompletionUsageBlock(
diff --git a/litellm/llms/huggingface/chat/handler.py b/litellm/llms/huggingface/chat/handler.py
index 9ed841e379..eadb62fb30 100644
--- a/litellm/llms/huggingface/chat/handler.py
+++ b/litellm/llms/huggingface/chat/handler.py
@@ -24,6 +24,7 @@ import requests
 
 import litellm
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
 from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     HTTPHandler,
@@ -36,8 +37,9 @@ from litellm.llms.huggingface.chat.transformation import (
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.completion import ChatCompletionMessageToolCallParam
 from litellm.types.llms.openai import AllMessageValues
+from litellm.types.utils import EmbeddingResponse
 from litellm.types.utils import Logprobs as TextCompletionLogprobs
-from litellm.utils import Choices, CustomStreamWrapper, Message, ModelResponse, Usage
+from litellm.types.utils import ModelResponse, Usage
 
 from ...base import BaseLLM
 from ..common_utils import HuggingfaceError, hf_task_list, hf_tasks
@@ -453,11 +455,11 @@ class Huggingface(BaseLLM):
     def _process_embedding_response(
         self,
         embeddings: dict,
-        model_response: litellm.EmbeddingResponse,
+        model_response: EmbeddingResponse,
         model: str,
         input: List,
         encoding: Any,
-    ) -> litellm.EmbeddingResponse:
+    ) -> EmbeddingResponse:
         output_data = []
         if "similarities" in embeddings:
             for idx, embedding in embeddings["similarities"]:
@@ -583,7 +585,7 @@ class Huggingface(BaseLLM):
         self,
         model: str,
         input: list,
-        model_response: litellm.EmbeddingResponse,
+        model_response: EmbeddingResponse,
         optional_params: dict,
         logging_obj: LiteLLMLoggingObj,
         encoding: Callable,
@@ -593,7 +595,7 @@ class Huggingface(BaseLLM):
         aembedding: Optional[bool] = None,
         client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
         headers={},
-    ) -> litellm.EmbeddingResponse:
+    ) -> EmbeddingResponse:
         super().embedding()
         headers = hf_chat_config.validate_environment(
             api_key=api_key,
diff --git a/litellm/llms/huggingface/chat/transformation.py b/litellm/llms/huggingface/chat/transformation.py
index 155fc8dc81..8238d1be41 100644
--- a/litellm/llms/huggingface/chat/transformation.py
+++ b/litellm/llms/huggingface/chat/transformation.py
@@ -8,10 +8,15 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
 import httpx
 
 import litellm
+from litellm.litellm_core_utils.prompt_templates.common_utils import (
+    convert_content_list_to_str,
+)
+from litellm.litellm_core_utils.prompt_templates.factory import (
+    custom_prompt,
+    prompt_factory,
+)
 from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
 from litellm.llms.base_llm.transformation import BaseConfig, BaseLLMException
-from litellm.litellm_core_utils.prompt_templates.common_utils import convert_content_list_to_str
-from litellm.litellm_core_utils.prompt_templates.factory import custom_prompt, prompt_factory
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import Choices, Message, ModelResponse, Usage
@@ -407,7 +412,7 @@ class HuggingfaceChatConfig(BaseConfig):
     def convert_to_model_response_object(  # noqa: PLR0915
         self,
         completion_response: Union[List[Dict[str, Any]], Dict[str, Any]],
-        model_response: litellm.ModelResponse,
+        model_response: ModelResponse,
         task: Optional[hf_tasks],
         optional_params: dict,
         encoding: Any,
diff --git a/litellm/llms/ollama/completion/handler.py b/litellm/llms/ollama/completion/handler.py
index 5d6e2ef2dc..d50e7d5e64 100644
--- a/litellm/llms/ollama/completion/handler.py
+++ b/litellm/llms/ollama/completion/handler.py
@@ -14,11 +14,20 @@ import requests  # type: ignore
 
 import litellm
 from litellm import verbose_logger
+from litellm.litellm_core_utils.prompt_templates.factory import (
+    custom_prompt,
+    prompt_factory,
+)
 from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
 from litellm.secret_managers.main import get_secret_str
-from litellm.types.utils import ModelInfo, ProviderField, StreamingChoices
+from litellm.types.utils import (
+    EmbeddingResponse,
+    ModelInfo,
+    ModelResponse,
+    ProviderField,
+    StreamingChoices,
+)
 
-from litellm.litellm_core_utils.prompt_templates.factory import custom_prompt, prompt_factory
 from ..common_utils import OllamaError
 from .transformation import OllamaConfig
 
@@ -53,7 +62,7 @@ def _convert_image(image):
 
 # ollama implementation
 def get_ollama_response(
-    model_response: litellm.ModelResponse,
+    model_response: ModelResponse,
     model: str,
     prompt: str,
     optional_params: dict,
@@ -391,7 +400,7 @@ async def ollama_aembeddings(
     api_base: str,
     model: str,
     prompts: List[str],
-    model_response: litellm.EmbeddingResponse,
+    model_response: EmbeddingResponse,
     optional_params: dict,
     logging_obj: Any,
     encoding: Any,
@@ -479,7 +488,7 @@ def ollama_embeddings(
     model: str,
     prompts: list,
     optional_params: dict,
-    model_response: litellm.EmbeddingResponse,
+    model_response: EmbeddingResponse,
     logging_obj: Any,
     encoding=None,
 ):
diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py
index 47555a3a48..a0ccb81730 100644
--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@@ -17,7 +17,7 @@ from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
 from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
 from litellm.types.llms.ollama import OllamaToolCall, OllamaToolCallFunction
 from litellm.types.llms.openai import ChatCompletionAssistantToolCall
-from litellm.types.utils import StreamingChoices
+from litellm.types.utils import ModelResponse, StreamingChoices
 
 
 class OllamaError(Exception):
@@ -198,7 +198,7 @@ class OllamaChatConfig(OpenAIGPTConfig):
 
 # ollama implementation
 def get_ollama_response(  # noqa: PLR0915
-    model_response: litellm.ModelResponse,
+    model_response: ModelResponse,
     messages: list,
     optional_params: dict,
     model: str,
diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py
index cc1552ba0b..b2d14a3187 100644
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@@ -28,24 +28,31 @@ import litellm
 from litellm import LlmProviders
 from litellm._logging import verbose_logger
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.litellm_core_utils.prompt_templates.factory import (
+    custom_prompt,
+    prompt_factory,
+)
 from litellm.llms.base_llm.transformation import BaseConfig, BaseLLMException
 from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENTS
 from litellm.secret_managers.main import get_secret_str
-from litellm.types.utils import ProviderField
+from litellm.types.utils import (
+    EmbeddingResponse,
+    ImageResponse,
+    ModelResponse,
+    ProviderField,
+    TextCompletionResponse,
+    Usage,
+)
 from litellm.utils import (
     Choices,
     CustomStreamWrapper,
     Message,
-    ModelResponse,
     ProviderConfigManager,
-    TextCompletionResponse,
-    Usage,
     convert_to_model_response_object,
 )
 
 from ...types.llms.openai import *
 from ..base import BaseLLM
-from litellm.litellm_core_utils.prompt_templates.factory import custom_prompt, prompt_factory
 from .chat.gpt_transformation import OpenAIGPTConfig
 from .common_utils import OpenAIError, drop_params_from_unprocessable_entity_error
 
@@ -882,7 +889,7 @@ class OpenAIChatCompletion(BaseLLM):
         self,
         input: list,
         data: dict,
-        model_response: litellm.utils.EmbeddingResponse,
+        model_response: EmbeddingResponse,
         timeout: float,
         logging_obj: LiteLLMLoggingObj,
         api_key: Optional[str] = None,
@@ -911,9 +918,7 @@ class OpenAIChatCompletion(BaseLLM):
                 additional_args={"complete_input_dict": data},
                 original_response=stringified_response,
             )
-            returned_response: (
-                litellm.EmbeddingResponse
-            ) = convert_to_model_response_object(
+            returned_response: EmbeddingResponse = convert_to_model_response_object(
                 response_object=stringified_response,
                 model_response_object=model_response,
                 response_type="embedding",
@@ -953,14 +958,14 @@ class OpenAIChatCompletion(BaseLLM):
         input: list,
         timeout: float,
         logging_obj,
-        model_response: litellm.utils.EmbeddingResponse,
+        model_response: EmbeddingResponse,
         optional_params: dict,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
         client=None,
         aembedding=None,
         max_retries: Optional[int] = None,
-    ) -> litellm.EmbeddingResponse:
+    ) -> EmbeddingResponse:
         super().embedding()
         try:
             model = model
@@ -1011,7 +1016,7 @@ class OpenAIChatCompletion(BaseLLM):
                 additional_args={"complete_input_dict": data},
                 original_response=sync_embedding_response,
             )
-            response: litellm.EmbeddingResponse = convert_to_model_response_object(
+            response: EmbeddingResponse = convert_to_model_response_object(
                 response_object=sync_embedding_response.model_dump(),
                 model_response_object=model_response,
                 _response_headers=headers,
@@ -1068,7 +1073,7 @@ class OpenAIChatCompletion(BaseLLM):
         except Exception as e:
             ## LOGGING
             logging_obj.post_call(
-                input=input,
+                input=prompt,
                 api_key=api_key,
                 original_response=str(e),
             )
@@ -1083,10 +1088,10 @@ class OpenAIChatCompletion(BaseLLM):
         logging_obj: Any,
         api_key: Optional[str] = None,
         api_base: Optional[str] = None,
-        model_response: Optional[litellm.utils.ImageResponse] = None,
+        model_response: Optional[ImageResponse] = None,
         client=None,
         aimg_generation=None,
-    ) -> litellm.ImageResponse:
+    ) -> ImageResponse:
         data = {}
         try:
             model = model
diff --git a/litellm/llms/openai_like/chat/transformation.py b/litellm/llms/openai_like/chat/transformation.py
index c8511cb630..9d89e5d09f 100644
--- a/litellm/llms/openai_like/chat/transformation.py
+++ b/litellm/llms/openai_like/chat/transformation.py
@@ -3,7 +3,7 @@ OpenAI-like chat completion transformation
 """
 
 import types
-from typing import List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union
 
 import httpx
 from pydantic import BaseModel
@@ -16,6 +16,13 @@ from litellm.types.utils import ModelResponse
 from ....utils import _remove_additional_properties, _remove_strict_from_schema
 from ...openai.chat.gpt_transformation import OpenAIGPTConfig
 
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+    LiteLLMLoggingObj = _LiteLLMLoggingObj
+else:
+    LiteLLMLoggingObj = Any
+
 
 class OpenAILikeChatConfig(OpenAIGPTConfig):
     def _get_openai_compatible_provider_info(
@@ -64,7 +71,7 @@ class OpenAILikeChatConfig(OpenAIGPTConfig):
         response: httpx.Response,
         model_response: ModelResponse,
         stream: bool,
-        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,  # type: ignore
+        logging_obj: LiteLLMLoggingObj,
         optional_params: dict,
         api_key: Optional[str],
         data: Union[dict, str],
diff --git a/litellm/llms/openai_like/embedding/handler.py b/litellm/llms/openai_like/embedding/handler.py
index e786b5db89..03e7c0fb2e 100644
--- a/litellm/llms/openai_like/embedding/handler.py
+++ b/litellm/llms/openai_like/embedding/handler.py
@@ -11,8 +11,7 @@ from enum import Enum
 from functools import partial
 from typing import Any, Callable, List, Literal, Optional, Tuple, Union
 
-import httpx  # type: ignore
-import requests  # type: ignore
+import httpx
 
 import litellm
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
@@ -21,7 +20,7 @@ from litellm.llms.custom_httpx.http_handler import (
     HTTPHandler,
     get_async_httpx_client,
 )
-from litellm.utils import EmbeddingResponse
+from litellm.types.utils import EmbeddingResponse
 
 from ..common_utils import OpenAILikeBase, OpenAILikeError
 
@@ -100,7 +99,7 @@ class OpenAILikeEmbeddingHandler(OpenAILikeBase):
         api_key: Optional[str],
         api_base: Optional[str],
         optional_params: dict,
-        model_response: Optional[litellm.utils.EmbeddingResponse] = None,
+        model_response: Optional[EmbeddingResponse] = None,
         client=None,
         aembedding=None,
         custom_endpoint: Optional[bool] = None,
diff --git a/litellm/llms/petals/completion/transformation.py b/litellm/llms/petals/completion/transformation.py
index 17386a7df5..52b8cd178d 100644
--- a/litellm/llms/petals/completion/transformation.py
+++ b/litellm/llms/petals/completion/transformation.py
@@ -10,6 +10,7 @@ from litellm.llms.base_llm.transformation import (
     LiteLLMLoggingObj,
 )
 from litellm.types.llms.openai import AllMessageValues
+from litellm.types.utils import ModelResponse
 
 from ..common_utils import PetalsError
 
@@ -111,7 +112,7 @@ class PetalsConfig(BaseConfig):
         self,
         model: str,
         raw_response: Response,
-        model_response: litellm.ModelResponse,
+        model_response: ModelResponse,
         logging_obj: LiteLLMLoggingObj,
         request_data: dict,
         messages: List[AllMessageValues],
@@ -120,7 +121,7 @@ class PetalsConfig(BaseConfig):
         encoding: Any,
         api_key: Optional[str] = None,
         json_mode: Optional[bool] = None,
-    ) -> litellm.ModelResponse:
+    ) -> ModelResponse:
         raise NotImplementedError(
             "Petals transformation currently done in handler.py. [TODO] Move to the transformation.py"
         )
diff --git a/litellm/llms/predibase/chat/handler.py b/litellm/llms/predibase/chat/handler.py
index 4ed2e83f63..7352c2204c 100644
--- a/litellm/llms/predibase/chat/handler.py
+++ b/litellm/llms/predibase/chat/handler.py
@@ -27,6 +27,7 @@ from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     get_async_httpx_client,
 )
+from litellm.types.utils import LiteLLMLoggingBaseClass
 from litellm.utils import Choices, CustomStreamWrapper, Message, ModelResponse, Usage
 
 from ...base import BaseLLM
@@ -92,7 +93,7 @@ class PredibaseChatCompletion(BaseLLM):
         response: Union[requests.Response, httpx.Response],
         model_response: ModelResponse,
         stream: bool,
-        logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
+        logging_obj: LiteLLMLoggingBaseClass,
         optional_params: dict,
         api_key: str,
         data: Union[dict, str],
diff --git a/litellm/llms/sagemaker/completion/transformation.py b/litellm/llms/sagemaker/completion/transformation.py
index f7bac46b6f..0a91819b7b 100644
--- a/litellm/llms/sagemaker/completion/transformation.py
+++ b/litellm/llms/sagemaker/completion/transformation.py
@@ -13,10 +13,13 @@ from httpx._models import Headers, Response
 
 import litellm
 from litellm.litellm_core_utils.asyncify import asyncify
+from litellm.litellm_core_utils.prompt_templates.factory import (
+    custom_prompt,
+    prompt_factory,
+)
 from litellm.llms.base_llm.transformation import BaseConfig, BaseLLMException
-from litellm.litellm_core_utils.prompt_templates.factory import custom_prompt, prompt_factory
 from litellm.types.llms.openai import AllMessageValues
-from litellm.types.utils import Usage
+from litellm.types.utils import ModelResponse, Usage
 
 from ..common_utils import SagemakerError
 
@@ -197,7 +200,7 @@ class SagemakerConfig(BaseConfig):
         self,
         model: str,
         raw_response: Response,
-        model_response: litellm.ModelResponse,
+        model_response: ModelResponse,
         logging_obj: LiteLLMLoggingObj,
         request_data: dict,
         messages: List[AllMessageValues],
@@ -206,7 +209,7 @@ class SagemakerConfig(BaseConfig):
         encoding: str,
         api_key: Optional[str] = None,
         json_mode: Optional[bool] = None,
-    ) -> litellm.ModelResponse:
+    ) -> ModelResponse:
         completion_response = raw_response.json()
         ## LOGGING
         logging_obj.post_call(
diff --git a/litellm/llms/vertex_ai/gemini/transformation.py b/litellm/llms/vertex_ai/gemini/transformation.py
index d67419767a..e6ac6928bd 100644
--- a/litellm/llms/vertex_ai/gemini/transformation.py
+++ b/litellm/llms/vertex_ai/gemini/transformation.py
@@ -5,20 +5,20 @@ Why separate file? Make it easy to see how transformation works
 """
 
 import os
-from typing import List, Literal, Optional, Tuple, Union, cast
+from typing import TYPE_CHECKING, List, Literal, Optional, Tuple, Union, cast
 
 import httpx
 from pydantic import BaseModel
 
 import litellm
 from litellm._logging import verbose_logger
-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.litellm_core_utils.prompt_templates.factory import (
     convert_to_anthropic_image_obj,
     convert_to_gemini_tool_call_invoke,
     convert_to_gemini_tool_call_result,
     response_schema_prompt,
 )
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.types.files import (
     get_file_mime_type_for_file_type,
     get_file_type_from_extension,
@@ -49,6 +49,13 @@ from ..common_utils import (
     get_supports_system_message,
 )
 
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+    LiteLLMLoggingObj = _LiteLLMLoggingObj
+else:
+    LiteLLMLoggingObj = Any
+
 
 def _process_gemini_image(image_url: str) -> PartType:
     """
@@ -348,7 +355,7 @@ def sync_transform_request_body(
     timeout: Optional[Union[float, httpx.Timeout]],
     extra_headers: Optional[dict],
     optional_params: dict,
-    logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,  # type: ignore
+    logging_obj: LiteLLMLoggingObj,
     custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
     litellm_params: dict,
 ) -> RequestBody:
diff --git a/litellm/llms/vertex_ai/image_generation/image_generation_handler.py b/litellm/llms/vertex_ai/image_generation/image_generation_handler.py
index 7c288da70b..bb39fcb1ad 100644
--- a/litellm/llms/vertex_ai/image_generation/image_generation_handler.py
+++ b/litellm/llms/vertex_ai/image_generation/image_generation_handler.py
@@ -10,18 +10,17 @@ from litellm.llms.custom_httpx.http_handler import (
     HTTPHandler,
     get_async_httpx_client,
 )
-from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
-    VertexLLM,
-)
+from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM
+from litellm.types.utils import ImageResponse
 
 
 class VertexImageGeneration(VertexLLM):
     def process_image_generation_response(
         self,
         json_response: Dict[str, Any],
-        model_response: litellm.ImageResponse,
+        model_response: ImageResponse,
         model: Optional[str] = None,
-    ) -> litellm.ImageResponse:
+    ) -> ImageResponse:
         if "predictions" not in json_response:
             raise litellm.InternalServerError(
                 message=f"image generation response does not contain 'predictions', got {json_response}",
@@ -46,7 +45,7 @@ class VertexImageGeneration(VertexLLM):
         vertex_project: Optional[str],
         vertex_location: Optional[str],
         vertex_credentials: Optional[str],
-        model_response: litellm.ImageResponse,
+        model_response: ImageResponse,
         logging_obj: Any,
         model: Optional[
             str
@@ -55,7 +54,7 @@ class VertexImageGeneration(VertexLLM):
         optional_params: Optional[dict] = None,
         timeout: Optional[int] = None,
         aimg_generation=False,
-    ) -> litellm.ImageResponse:
+    ) -> ImageResponse:
         if aimg_generation is True:
             return self.aimage_generation(  # type: ignore
                 prompt=prompt,
diff --git a/litellm/llms/vertex_ai/multimodal_embeddings/embedding_handler.py b/litellm/llms/vertex_ai/multimodal_embeddings/embedding_handler.py
index 81708530a2..eaffbd38ee 100644
--- a/litellm/llms/vertex_ai/multimodal_embeddings/embedding_handler.py
+++ b/litellm/llms/vertex_ai/multimodal_embeddings/embedding_handler.py
@@ -22,7 +22,7 @@ from litellm.types.llms.vertex_ai import (
     MultimodalPredictions,
     VertexMultimodalEmbeddingRequest,
 )
-from litellm.types.utils import Embedding
+from litellm.types.utils import Embedding, EmbeddingResponse
 from litellm.utils import is_base64_encoded
 
 
@@ -39,7 +39,7 @@ class VertexMultimodalEmbedding(VertexLLM):
         model: str,
         input: Union[list, str],
         print_verbose,
-        model_response: litellm.EmbeddingResponse,
+        model_response: EmbeddingResponse,
         custom_llm_provider: Literal["gemini", "vertex_ai"],
         optional_params: dict,
         logging_obj: LiteLLMLoggingObj,
@@ -52,7 +52,7 @@ class VertexMultimodalEmbedding(VertexLLM):
         aembedding=False,
         timeout=300,
         client=None,
-    ) -> litellm.EmbeddingResponse:
+    ) -> EmbeddingResponse:
 
         _auth_header, vertex_project = self._ensure_access_token(
             credentials=vertex_credentials,
diff --git a/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py b/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py
index d3621f411b..d1634f1108 100644
--- a/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py
+++ b/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py
@@ -15,12 +15,10 @@ from litellm.llms.custom_httpx.http_handler import (
     _get_httpx_client,
     get_async_httpx_client,
 )
-from litellm.llms.vertex_ai.vertex_ai_non_gemini import (
-    VertexAIError,
-)
+from litellm.llms.vertex_ai.vertex_ai_non_gemini import VertexAIError
 from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
 from litellm.types.llms.vertex_ai import *
-from litellm.utils import Usage
+from litellm.types.utils import EmbeddingResponse, Usage
 
 from .transformation import VertexAITextEmbeddingConfig
 from .types import *
@@ -35,7 +33,7 @@ class VertexEmbedding(VertexBase):
         model: str,
         input: Union[list, str],
         print_verbose,
-        model_response: litellm.EmbeddingResponse,
+        model_response: EmbeddingResponse,
         optional_params: dict,
         logging_obj: LiteLLMLoggingObject,
         custom_llm_provider: Literal[
@@ -52,7 +50,7 @@ class VertexEmbedding(VertexBase):
         vertex_credentials: Optional[str] = None,
         gemini_api_key: Optional[str] = None,
         extra_headers: Optional[dict] = None,
-    ) -> litellm.EmbeddingResponse:
+    ) -> EmbeddingResponse:
         if aembedding is True:
             return self.async_embedding(  # type: ignore
                 model=model,
diff --git a/litellm/llms/vertex_ai/vertex_embeddings/transformation.py b/litellm/llms/vertex_ai/vertex_embeddings/transformation.py
index 6f4b25ceff..00f384c32c 100644
--- a/litellm/llms/vertex_ai/vertex_embeddings/transformation.py
+++ b/litellm/llms/vertex_ai/vertex_embeddings/transformation.py
@@ -4,7 +4,7 @@ from typing import List, Literal, Optional, Union
 from pydantic import BaseModel
 
 import litellm
-from litellm.utils import Usage
+from litellm.types.utils import EmbeddingResponse, Usage
 
 from .types import *
 
@@ -198,8 +198,8 @@ class VertexAITextEmbeddingConfig(BaseModel):
         return text_embedding_input
 
     def transform_vertex_response_to_openai(
-        self, response: dict, model: str, model_response: litellm.EmbeddingResponse
-    ) -> litellm.EmbeddingResponse:
+        self, response: dict, model: str, model_response: EmbeddingResponse
+    ) -> EmbeddingResponse:
         """
         Transforms a vertex embedding response to an openai response.
         """
@@ -234,8 +234,8 @@ class VertexAITextEmbeddingConfig(BaseModel):
         return model_response
 
     def _transform_vertex_response_to_openai_for_fine_tuned_models(
-        self, response: dict, model: str, model_response: litellm.EmbeddingResponse
-    ) -> litellm.EmbeddingResponse:
+        self, response: dict, model: str, model_response: EmbeddingResponse
+    ) -> EmbeddingResponse:
         """
         Transforms a vertex fine-tuned model embedding response to an openai response format.
         """
diff --git a/litellm/llms/watsonx/completion/handler.py b/litellm/llms/watsonx/completion/handler.py
index 599338c308..df8600e99d 100644
--- a/litellm/llms/watsonx/completion/handler.py
+++ b/litellm/llms/watsonx/completion/handler.py
@@ -24,6 +24,8 @@ import httpx  # type: ignore
 import requests  # type: ignore
 
 import litellm
+from litellm.litellm_core_utils.prompt_templates import factory as ptf
+from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
 from litellm.llms.custom_httpx.http_handler import (
     AsyncHTTPHandler,
     get_async_httpx_client,
@@ -34,7 +36,6 @@ from litellm.types.llms.watsonx import WatsonXAIEndpoint
 from litellm.utils import EmbeddingResponse, ModelResponse, Usage, map_finish_reason
 
 from ...base import BaseLLM
-from litellm.litellm_core_utils.prompt_templates import factory as ptf
 from ..common_utils import WatsonXAIError, _get_api_params, generate_iam_token
 from .transformation import IBMWatsonXAIConfig
 
@@ -204,7 +205,7 @@ class IBMWatsonXAI(BaseLLM):
 
         def process_stream_response(
             stream_resp: Union[Iterator[str], AsyncIterator],
-        ) -> litellm.CustomStreamWrapper:
+        ) -> CustomStreamWrapper:
             streamwrapper = litellm.CustomStreamWrapper(
                 stream_resp,
                 model=model,
@@ -235,7 +236,7 @@ class IBMWatsonXAI(BaseLLM):
                 json_resp = resp.json()
             return self._process_text_gen_response(json_resp, model_response)
 
-        def handle_stream_request(request_params: dict) -> litellm.CustomStreamWrapper:
+        def handle_stream_request(request_params: dict) -> CustomStreamWrapper:
             # stream the response - generated chunks will be handled
             # by litellm.utils.CustomStreamWrapper.handle_watsonx_stream
             with self.request_manager.request(
@@ -249,7 +250,7 @@ class IBMWatsonXAI(BaseLLM):
 
         async def handle_stream_request_async(
             request_params: dict,
-        ) -> litellm.CustomStreamWrapper:
+        ) -> CustomStreamWrapper:
             # stream the response - generated chunks will be handled
             # by litellm.utils.CustomStreamWrapper.handle_watsonx_stream
             async with self.request_manager.async_request(
@@ -321,14 +322,14 @@ class IBMWatsonXAI(BaseLLM):
         self,
         model: str,
         input: Union[list, str],
-        model_response: litellm.EmbeddingResponse,
+        model_response: EmbeddingResponse,
         api_key: Optional[str],
         logging_obj: Any,
         optional_params: dict,
         encoding=None,
         print_verbose=None,
         aembedding=None,
-    ) -> litellm.EmbeddingResponse:
+    ) -> EmbeddingResponse:
         """
         Send a text embedding request to the IBM Watsonx.ai API.
         """
diff --git a/litellm/proxy/auth/auth_checks.py b/litellm/proxy/auth/auth_checks.py
index b74e5199e8..45e8c844c4 100644
--- a/litellm/proxy/auth/auth_checks.py
+++ b/litellm/proxy/auth/auth_checks.py
@@ -35,6 +35,7 @@ from litellm.proxy._types import (
 )
 from litellm.proxy.auth.route_checks import RouteChecks
 from litellm.proxy.utils import PrismaClient, ProxyLogging, log_db_metrics
+from litellm.router import Router
 from litellm.types.services import ServiceLoggerPayload, ServiceTypes
 
 from .auth_checks_organization import organization_role_based_access_check
@@ -61,7 +62,7 @@ def common_checks(  # noqa: PLR0915
     global_proxy_spend: Optional[float],
     general_settings: dict,
     route: str,
-    llm_router: Optional[litellm.Router],
+    llm_router: Optional[Router],
 ) -> bool:
     """
     Common checks across jwt + key-based auth.
@@ -347,7 +348,7 @@ async def get_end_user_object(
 
 
 def model_in_access_group(
-    model: str, team_models: Optional[List[str]], llm_router: Optional[litellm.Router]
+    model: str, team_models: Optional[List[str]], llm_router: Optional[Router]
 ) -> bool:
     from collections import defaultdict
 
diff --git a/litellm/proxy/example_config_yaml/custom_handler.py b/litellm/proxy/example_config_yaml/custom_handler.py
index fdde975d6f..52e6686e6a 100644
--- a/litellm/proxy/example_config_yaml/custom_handler.py
+++ b/litellm/proxy/example_config_yaml/custom_handler.py
@@ -4,10 +4,11 @@ from typing import Any, Optional
 import litellm
 from litellm import CustomLLM, ImageObject, ImageResponse, completion, get_llm_provider
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+from litellm.types.utils import ModelResponse
 
 
 class MyCustomLLM(CustomLLM):
-    def completion(self, *args, **kwargs) -> litellm.ModelResponse:
+    def completion(self, *args, **kwargs) -> ModelResponse:
         return litellm.completion(
             model="gpt-3.5-turbo",
             messages=[{"role": "user", "content": "Hello world"}],
diff --git a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
index db5972a7cf..7a23817655 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
@@ -45,6 +45,7 @@ from litellm.types.guardrails import (
     BedrockTextContent,
     GuardrailEventHooks,
 )
+from litellm.types.utils import ModelResponse
 
 GUARDRAIL_NAME = "bedrock"
 
@@ -70,7 +71,7 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM):
     def convert_to_bedrock_format(
         self,
         messages: Optional[List[Dict[str, str]]] = None,
-        response: Optional[Union[Any, litellm.ModelResponse]] = None,
+        response: Optional[Union[Any, ModelResponse]] = None,
     ) -> BedrockRequest:
         bedrock_request: BedrockRequest = BedrockRequest(source="INPUT")
         bedrock_request_content: List[BedrockContentItem] = []
diff --git a/litellm/proxy/hooks/prompt_injection_detection.py b/litellm/proxy/hooks/prompt_injection_detection.py
index be4eb79885..19e152c2c5 100644
--- a/litellm/proxy/hooks/prompt_injection_detection.py
+++ b/litellm/proxy/hooks/prompt_injection_detection.py
@@ -20,8 +20,11 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
-from litellm.litellm_core_utils.prompt_templates.factory import prompt_injection_detection_default_pt
+from litellm.litellm_core_utils.prompt_templates.factory import (
+    prompt_injection_detection_default_pt,
+)
 from litellm.proxy._types import LiteLLMPromptInjectionParams, UserAPIKeyAuth
+from litellm.router import Router
 from litellm.utils import get_formatted_prompt
 
 
@@ -32,7 +35,7 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger):
         prompt_injection_params: Optional[LiteLLMPromptInjectionParams] = None,
     ):
         self.prompt_injection_params = prompt_injection_params
-        self.llm_router: Optional[litellm.Router] = None
+        self.llm_router: Optional[Router] = None
 
         self.verbs = [
             "Ignore",
@@ -74,7 +77,7 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger):
         if litellm.set_verbose is True:
             print(print_statement)  # noqa
 
-    def update_environment(self, router: Optional[litellm.Router] = None):
+    def update_environment(self, router: Optional[Router] = None):
         self.llm_router = router
 
         if (
diff --git a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py b/litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py
index 9161eb8493..4799de9eba 100644
--- a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py
+++ b/litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py
@@ -16,6 +16,7 @@ from litellm.llms.anthropic.chat.handler import (
 from litellm.llms.anthropic.chat.transformation import AnthropicConfig
 from litellm.proxy._types import PassThroughEndpointLoggingTypedDict
 from litellm.proxy.pass_through_endpoints.types import PassthroughStandardLoggingPayload
+from litellm.types.utils import ModelResponse, TextCompletionResponse
 
 if TYPE_CHECKING:
     from ..success_handler import PassThroughEndpointLogging
@@ -43,9 +44,7 @@ class AnthropicPassthroughLoggingHandler:
         Transforms Anthropic response to OpenAI response, generates a standard logging object so downstream logging can be handled
         """
         model = response_body.get("model", "")
-        litellm_model_response: (
-            litellm.ModelResponse
-        ) = AnthropicConfig().transform_response(
+        litellm_model_response: ModelResponse = AnthropicConfig().transform_response(
             raw_response=httpx_response,
             model_response=litellm.ModelResponse(),
             model=model,
@@ -89,9 +88,7 @@ class AnthropicPassthroughLoggingHandler:
 
     @staticmethod
     def _create_anthropic_response_logging_payload(
-        litellm_model_response: Union[
-            litellm.ModelResponse, litellm.TextCompletionResponse
-        ],
+        litellm_model_response: Union[ModelResponse, TextCompletionResponse],
         model: str,
         kwargs: dict,
         start_time: datetime,
@@ -204,7 +201,7 @@ class AnthropicPassthroughLoggingHandler:
         all_chunks: List[str],
         litellm_logging_obj: LiteLLMLoggingObj,
         model: str,
-    ) -> Optional[Union[litellm.ModelResponse, litellm.TextCompletionResponse]]:
+    ) -> Optional[Union[ModelResponse, TextCompletionResponse]]:
         """
         Builds complete response from raw Anthropic chunks
 
diff --git a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py b/litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py
index a6c9046b32..ea287b57d5 100644
--- a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py
+++ b/litellm/proxy/pass_through_endpoints/llm_provider_handlers/vertex_passthrough_logging_handler.py
@@ -15,6 +15,12 @@ from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
     ModelResponseIterator as VertexModelResponseIterator,
 )
 from litellm.proxy._types import PassThroughEndpointLoggingTypedDict
+from litellm.types.utils import (
+    EmbeddingResponse,
+    ImageResponse,
+    ModelResponse,
+    TextCompletionResponse,
+)
 
 if TYPE_CHECKING:
     from ..success_handler import PassThroughEndpointLogging
@@ -40,7 +46,7 @@ class VertexPassthroughLoggingHandler:
             model = VertexPassthroughLoggingHandler.extract_model_from_url(url_route)
 
             instance_of_vertex_llm = litellm.VertexGeminiConfig()
-            litellm_model_response: litellm.ModelResponse = (
+            litellm_model_response: ModelResponse = (
                 instance_of_vertex_llm.transform_response(
                     model=model,
                     messages=[
@@ -82,8 +88,8 @@ class VertexPassthroughLoggingHandler:
             _json_response = httpx_response.json()
 
             litellm_prediction_response: Union[
-                litellm.ModelResponse, litellm.EmbeddingResponse, litellm.ImageResponse
-            ] = litellm.ModelResponse()
+                ModelResponse, EmbeddingResponse, ImageResponse
+            ] = ModelResponse()
             if vertex_image_generation_class.is_image_generation_response(
                 _json_response
             ):
@@ -176,7 +182,7 @@ class VertexPassthroughLoggingHandler:
         all_chunks: List[str],
         litellm_logging_obj: LiteLLMLoggingObj,
         model: str,
-    ) -> Optional[Union[litellm.ModelResponse, litellm.TextCompletionResponse]]:
+    ) -> Optional[Union[ModelResponse, TextCompletionResponse]]:
         vertex_iterator = VertexModelResponseIterator(
             streaming_response=None,
             sync_stream=False,
@@ -212,9 +218,7 @@ class VertexPassthroughLoggingHandler:
 
     @staticmethod
     def _create_vertex_response_logging_payload_for_generate_content(
-        litellm_model_response: Union[
-            litellm.ModelResponse, litellm.TextCompletionResponse
-        ],
+        litellm_model_response: Union[ModelResponse, TextCompletionResponse],
         model: str,
         kwargs: dict,
         start_time: datetime,
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index bee43e4d43..4210d6035c 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -109,6 +109,7 @@ from litellm import (
     CreateBatchRequest,
     ListBatchRequest,
     RetrieveBatchRequest,
+    Router,
 )
 from litellm._logging import verbose_proxy_logger, verbose_router_logger
 from litellm.caching.caching import DualCache, RedisCache
@@ -482,7 +483,7 @@ user_config_file_path: Optional[str] = None
 local_logging = True  # writes logs to a local api_log.json file for debugging
 experimental = False
 #### GLOBAL VARIABLES ####
-llm_router: Optional[litellm.Router] = None
+llm_router: Optional[Router] = None
 llm_model_list: Optional[list] = None
 general_settings: dict = {}
 callback_settings: dict = {}
@@ -2833,7 +2834,7 @@ class ProxyStartupEvent:
     @classmethod
     def _initialize_startup_logging(
         cls,
-        llm_router: Optional[litellm.Router],
+        llm_router: Optional[Router],
         proxy_logging_obj: ProxyLogging,
         redis_usage_cache: Optional[RedisCache],
     ):
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index d7b275cf0c..9ae26aec09 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -289,7 +289,7 @@ class ProxyLogging:
 
     def startup_event(
         self,
-        llm_router: Optional[litellm.Router],
+        llm_router: Optional[Router],
         redis_usage_cache: Optional[RedisCache],
     ):
         """Initialize logging and alerting on proxy startup"""
@@ -359,7 +359,7 @@ class ProxyLogging:
         if redis_cache is not None:
             self.internal_usage_cache.dual_cache.redis_cache = redis_cache
 
-    def _init_litellm_callbacks(self, llm_router: Optional[litellm.Router] = None):
+    def _init_litellm_callbacks(self, llm_router: Optional[Router] = None):
         litellm.callbacks.append(self.max_parallel_request_limiter)  # type: ignore
         litellm.callbacks.append(self.max_budget_limiter)  # type: ignore
         litellm.callbacks.append(self.cache_control_check)  # type: ignore
diff --git a/litellm/router.py b/litellm/router.py
index f9860a6799..0fd39caa05 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -145,6 +145,7 @@ from litellm.types.utils import ModelInfo as ModelMapInfo
 from litellm.types.utils import StandardLoggingPayload
 from litellm.utils import (
     CustomStreamWrapper,
+    EmbeddingResponse,
     ModelResponse,
     _is_region_eu,
     calculate_max_parallel_requests,
@@ -2071,7 +2072,7 @@ class Router:
         input: Union[str, List],
         is_async: Optional[bool] = False,
         **kwargs,
-    ) -> litellm.EmbeddingResponse:
+    ) -> EmbeddingResponse:
         try:
             kwargs["model"] = model
             kwargs["input"] = input
@@ -2146,7 +2147,7 @@ class Router:
         input: Union[str, List],
         is_async: Optional[bool] = True,
         **kwargs,
-    ) -> litellm.EmbeddingResponse:
+    ) -> EmbeddingResponse:
         try:
             kwargs["model"] = model
             kwargs["input"] = input
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index 0f925816d5..46017a7963 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -1660,3 +1660,84 @@ class PersonalUIKeyGenerationConfig(KeyGenerationConfig):
 class StandardKeyGenerationConfig(TypedDict, total=False):
     team_key_generation: TeamUIKeyGenerationConfig
     personal_key_generation: PersonalUIKeyGenerationConfig
+
+
+class LlmProviders(str, Enum):
+    OPENAI = "openai"
+    OPENAI_LIKE = "openai_like"  # embedding only
+    JINA_AI = "jina_ai"
+    XAI = "xai"
+    CUSTOM_OPENAI = "custom_openai"
+    TEXT_COMPLETION_OPENAI = "text-completion-openai"
+    COHERE = "cohere"
+    COHERE_CHAT = "cohere_chat"
+    CLARIFAI = "clarifai"
+    ANTHROPIC = "anthropic"
+    ANTHROPIC_TEXT = "anthropic_text"
+    REPLICATE = "replicate"
+    HUGGINGFACE = "huggingface"
+    TOGETHER_AI = "together_ai"
+    OPENROUTER = "openrouter"
+    VERTEX_AI = "vertex_ai"
+    VERTEX_AI_BETA = "vertex_ai_beta"
+    GEMINI = "gemini"
+    AI21 = "ai21"
+    BASETEN = "baseten"
+    AZURE = "azure"
+    AZURE_TEXT = "azure_text"
+    AZURE_AI = "azure_ai"
+    SAGEMAKER = "sagemaker"
+    SAGEMAKER_CHAT = "sagemaker_chat"
+    BEDROCK = "bedrock"
+    VLLM = "vllm"
+    NLP_CLOUD = "nlp_cloud"
+    PETALS = "petals"
+    OOBABOOGA = "oobabooga"
+    OLLAMA = "ollama"
+    OLLAMA_CHAT = "ollama_chat"
+    DEEPINFRA = "deepinfra"
+    PERPLEXITY = "perplexity"
+    MISTRAL = "mistral"
+    GROQ = "groq"
+    NVIDIA_NIM = "nvidia_nim"
+    CEREBRAS = "cerebras"
+    AI21_CHAT = "ai21_chat"
+    VOLCENGINE = "volcengine"
+    CODESTRAL = "codestral"
+    TEXT_COMPLETION_CODESTRAL = "text-completion-codestral"
+    DEEPSEEK = "deepseek"
+    SAMBANOVA = "sambanova"
+    MARITALK = "maritalk"
+    VOYAGE = "voyage"
+    CLOUDFLARE = "cloudflare"
+    XINFERENCE = "xinference"
+    FIREWORKS_AI = "fireworks_ai"
+    FRIENDLIAI = "friendliai"
+    WATSONX = "watsonx"
+    WATSONX_TEXT = "watsonx_text"
+    TRITON = "triton"
+    PREDIBASE = "predibase"
+    DATABRICKS = "databricks"
+    EMPOWER = "empower"
+    GITHUB = "github"
+    CUSTOM = "custom"
+    LITELLM_PROXY = "litellm_proxy"
+    HOSTED_VLLM = "hosted_vllm"
+    LM_STUDIO = "lm_studio"
+    GALADRIEL = "galadriel"
+
+
+class LiteLLMLoggingBaseClass:
+    """
+    Base class for logging pre and post call
+
+    Meant to simplify type checking for logging obj.
+    """
+
+    def pre_call(self, input, api_key, model=None, additional_args={}):
+        pass
+
+    def post_call(
+        self, original_response, input=None, api_key=None, additional_args={}
+    ):
+        pass
diff --git a/litellm/utils.py b/litellm/utils.py
index ddaf60d5a1..6f662f6595 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -126,6 +126,7 @@ from litellm.types.utils import (
     EmbeddingResponse,
     Function,
     ImageResponse,
+    LlmProviders,
     Message,
     ModelInfo,
     ModelResponse,
@@ -147,6 +148,7 @@ claude_json_str = json.dumps(json_data)
 import importlib.metadata
 from concurrent.futures import ThreadPoolExecutor
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Dict,
@@ -163,6 +165,8 @@ from typing import (
 
 from openai import OpenAIError as OriginalError
 
+from litellm.llms.base_llm.transformation import BaseConfig
+
 from ._logging import verbose_logger
 from .caching.caching import (
     Cache,
@@ -235,7 +239,6 @@ last_fetched_at = None
 last_fetched_at_keys = None
 ######## Model Response #########################
 
-
 # All liteLLM Model responses will be in this format, Follows the OpenAI Format
 # https://docs.litellm.ai/docs/completion/output
 # {
@@ -6205,13 +6208,10 @@ def validate_chat_completion_user_messages(messages: List[AllMessageValues]):
     return messages
 
 
-from litellm.llms.base_llm.transformation import BaseConfig
-
-
 class ProviderConfigManager:
     @staticmethod
     def get_provider_chat_config(  # noqa: PLR0915
-        model: str, provider: litellm.LlmProviders
+        model: str, provider: LlmProviders
     ) -> BaseConfig:
         """
         Returns the provider config for a given provider.
diff --git a/tests/documentation_tests/test_api_docs.py b/tests/documentation_tests/test_api_docs.py
index 407010dcc6..edab7c1d34 100644
--- a/tests/documentation_tests/test_api_docs.py
+++ b/tests/documentation_tests/test_api_docs.py
@@ -173,8 +173,8 @@ def main():
         "list_organization",
         "user_update",
     ]
-    directory = "../../litellm/proxy/management_endpoints"  # LOCAL
-    # directory = "./litellm/proxy/management_endpoints"
+    # directory = "../../litellm/proxy/management_endpoints"  # LOCAL
+    directory = "./litellm/proxy/management_endpoints"
 
     # Convert function names to set for faster lookup
     target_functions = set(function_names)
diff --git a/tests/documentation_tests/test_circular_imports.py b/tests/documentation_tests/test_circular_imports.py
new file mode 100644
index 0000000000..b7ea13cd12
--- /dev/null
+++ b/tests/documentation_tests/test_circular_imports.py
@@ -0,0 +1,162 @@
+import os
+import ast
+import sys
+from typing import List, Tuple, Optional
+
+
+def find_litellm_type_hints(directory: str) -> List[Tuple[str, int, str]]:
+    """
+    Recursively search for Python files in the given directory
+    and find type hints containing 'litellm.'.
+
+    Args:
+        directory (str): The root directory to search for Python files
+
+    Returns:
+        List of tuples containing (file_path, line_number, type_hint)
+    """
+    litellm_type_hints = []
+
+    def is_litellm_type_hint(node):
+        """
+        Recursively check if a type annotation contains 'litellm.'
+
+        Handles more complex type hints like:
+        - Optional[litellm.Type]
+        - Union[litellm.Type1, litellm.Type2]
+        - Nested type hints
+        """
+        try:
+            # Convert node to string representation
+            type_str = ast.unparse(node)
+
+            # Direct check for litellm in type string
+            if "litellm." in type_str:
+                return True
+
+            # Handle more complex type hints
+            if isinstance(node, ast.Subscript):
+                # Check Union or Optional types
+                if isinstance(node.value, ast.Name) and node.value.id in [
+                    "Union",
+                    "Optional",
+                ]:
+                    # Check each element in the Union/Optional type
+                    if isinstance(node.slice, ast.Tuple):
+                        return any(is_litellm_type_hint(elt) for elt in node.slice.elts)
+                    else:
+                        return is_litellm_type_hint(node.slice)
+
+                # Recursive check for subscripted types
+                return is_litellm_type_hint(node.value) or is_litellm_type_hint(
+                    node.slice
+                )
+
+            # Recursive check for attribute types
+            if isinstance(node, ast.Attribute):
+                return "litellm." in ast.unparse(node)
+
+            # Recursive check for name types
+            if isinstance(node, ast.Name):
+                return "litellm" in node.id
+
+            return False
+        except Exception:
+            # Fallback to string checking if parsing fails
+            try:
+                return "litellm." in ast.unparse(node)
+            except:
+                return False
+
+    def scan_file(file_path: str):
+        """
+        Scan a single Python file for LiteLLM type hints
+        """
+        try:
+            # Use utf-8-sig to handle files with BOM, ignore errors
+            with open(file_path, "r", encoding="utf-8-sig", errors="ignore") as file:
+                tree = ast.parse(file.read())
+
+            for node in ast.walk(tree):
+                # Check type annotations in variable annotations
+                if isinstance(node, ast.AnnAssign) and node.annotation:
+                    if is_litellm_type_hint(node.annotation):
+                        litellm_type_hints.append(
+                            (file_path, node.lineno, ast.unparse(node.annotation))
+                        )
+
+                # Check type hints in function arguments
+                elif isinstance(node, ast.FunctionDef):
+                    for arg in node.args.args:
+                        if arg.annotation and is_litellm_type_hint(arg.annotation):
+                            litellm_type_hints.append(
+                                (file_path, arg.lineno, ast.unparse(arg.annotation))
+                            )
+
+                    # Check return type annotation
+                    if node.returns and is_litellm_type_hint(node.returns):
+                        litellm_type_hints.append(
+                            (file_path, node.lineno, ast.unparse(node.returns))
+                        )
+        except SyntaxError as e:
+            print(f"Syntax error in {file_path}: {e}", file=sys.stderr)
+        except Exception as e:
+            print(f"Error processing {file_path}: {e}", file=sys.stderr)
+
+    # Recursively walk through directory
+    for root, dirs, files in os.walk(directory):
+        # Remove virtual environment and cache directories from search
+        dirs[:] = [
+            d
+            for d in dirs
+            if not any(
+                venv in d
+                for venv in [
+                    "venv",
+                    "env",
+                    "myenv",
+                    ".venv",
+                    "__pycache__",
+                    ".pytest_cache",
+                ]
+            )
+        ]
+
+        for file in files:
+            if file.endswith(".py"):
+                full_path = os.path.join(root, file)
+                # Skip files in virtual environment or cache directories
+                if not any(
+                    venv in full_path
+                    for venv in [
+                        "venv",
+                        "env",
+                        "myenv",
+                        ".venv",
+                        "__pycache__",
+                        ".pytest_cache",
+                    ]
+                ):
+                    scan_file(full_path)
+
+    return litellm_type_hints
+
+
+def main():
+    # Get directory from command line argument or use current directory
+    directory = "./litellm/"
+
+    # Find LiteLLM type hints
+    results = find_litellm_type_hints(directory)
+
+    # Print results
+    if results:
+        print("LiteLLM Type Hints Found:")
+        for file_path, line_num, type_hint in results:
+            print(f"{file_path}:{line_num} - {type_hint}")
+    else:
+        print("No LiteLLM type hints found.")
+
+
+if __name__ == "__main__":
+    main()