Merge 5df4dcd33f into dc9b058dbd

2025-04-25 18:54:30 +00:00 · 2025-04-23 23:48:00 +00:00 · 2025-04-23 23:48:00 +00:00 · 24aed29ad7
commit 24aed29ad7
parent dc9b058dbd 5df4dcd33f
414 changed files with 2224 additions and 2205 deletions
--- a/README.md
+++ b/README.md
@ -402,7 +402,7 @@ If you have suggestions on how to improve the code quality feel free to open an
 1. (In root) create virtual environment `python -m venv .venv`
 2. Activate virtual environment `source .venv/bin/activate`
 3. Install dependencies `pip install -e ".[all]"`
-4. Start proxy backend `uvicorn litellm.proxy.proxy_server:app --host localhost --port 4000 --reload`
+4. Start proxy backend `uvicorn litellm_proxy.proxy_server:app --host localhost --port 4000 --reload`
 ### Frontend
 1. Navigate to `ui/litellm-dashboard`
--- a/docs/my-website/docs/proxy/call_hooks.md
+++ b/docs/my-website/docs/proxy/call_hooks.md
@ -17,7 +17,7 @@ This function is called just before a litellm completion call is made, and allow
 ```python
 from litellm.integrations.custom_logger import CustomLogger
 import litellm
-from litellm.proxy.proxy_server import UserAPIKeyAuth, DualCache
+from litellm_proxy.proxy_server import UserAPIKeyAuth, DualCache
 from typing import Optional, Literal
 # This file includes the custom callbacks for LiteLLM Proxy
--- a/docs/my-website/docs/proxy/custom_auth.md
+++ b/docs/my-website/docs/proxy/custom_auth.md
@ -9,7 +9,7 @@ Here's how:
 Make sure the response type follows the `UserAPIKeyAuth` pydantic object. This is used by for logging usage specific to that user key.
 ```python
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: 
    try: 
--- a/docs/my-website/docs/proxy/custom_sso.md
+++ b/docs/my-website/docs/proxy/custom_sso.md
@ -20,12 +20,12 @@ Make sure the response type follows the `SSOUserDefinedValues` pydantic object.
 from fastapi import Request
 from fastapi_sso.sso.base import OpenID
-from litellm.proxy._types import LitellmUserRoles, SSOUserDefinedValues
+from litellm_proxy._types import LitellmUserRoles, SSOUserDefinedValues
-from litellm.proxy.management_endpoints.internal_user_endpoints import (
+from litellm_proxy.management_endpoints.internal_user_endpoints import (
    new_user,
    user_info,
 )
-from litellm.proxy.management_endpoints.team_endpoints import add_new_member
+from litellm_proxy.management_endpoints.team_endpoints import add_new_member
 async def custom_sso_handler(userIDPInfo: OpenID) -> SSOUserDefinedValues:
--- a/docs/my-website/docs/proxy/guardrails/custom_guardrail.md
+++ b/docs/my-website/docs/proxy/guardrails/custom_guardrail.md
@ -29,8 +29,8 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_guardrail import CustomGuardrail
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
-from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
+from litellm_proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
 from litellm.types.guardrails import GuardrailEventHooks
@ -449,7 +449,7 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_guardrail import CustomGuardrail
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 class myCustomGuardrail(CustomGuardrail):
    def __init__(self, **kwargs):
--- a/enterprise/enterprise_hooks/aporia_ai.py
+++ b/enterprise/enterprise_hooks/aporia_ai.py
@ -14,11 +14,11 @@ sys.path.insert(
 from typing import Optional, Literal, Any
 import litellm
 import sys
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_guardrail import CustomGuardrail
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
-from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
+from litellm_proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
 from litellm.litellm_core_utils.logging_utils import (
    convert_litellm_response_object_to_str,
 )
@ -140,7 +140,7 @@ class AporiaGuardrail(CustomGuardrail):
        user_api_key_dict: UserAPIKeyAuth,
        response,
    ):
-        from litellm.proxy.common_utils.callback_utils import (
+        from litellm_proxy.common_utils.callback_utils import (
            add_guardrail_to_applied_guardrails_header,
        )
@ -176,7 +176,7 @@ class AporiaGuardrail(CustomGuardrail):
            "responses",
        ],
    ):
-        from litellm.proxy.common_utils.callback_utils import (
+        from litellm_proxy.common_utils.callback_utils import (
            add_guardrail_to_applied_guardrails_header,
        )
--- a/enterprise/enterprise_hooks/banned_keywords.py
+++ b/enterprise/enterprise_hooks/banned_keywords.py
@ -10,7 +10,7 @@
 from typing import Literal
 import litellm
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from litellm._logging import verbose_proxy_logger
 from fastapi import HTTPException
--- a/enterprise/enterprise_hooks/blocked_user_list.py
+++ b/enterprise/enterprise_hooks/blocked_user_list.py
@ -9,9 +9,9 @@
 from typing import Optional, Literal
 import litellm
-from litellm.proxy.utils import PrismaClient
+from litellm_proxy.utils import PrismaClient
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable
+from litellm_proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable
 from litellm.integrations.custom_logger import CustomLogger
 from litellm._logging import verbose_proxy_logger
 from fastapi import HTTPException
--- a/enterprise/enterprise_hooks/google_text_moderation.py
+++ b/enterprise/enterprise_hooks/google_text_moderation.py
@ -9,7 +9,7 @@
 from typing import Literal
 import litellm
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
--- a/enterprise/enterprise_hooks/llama_guard.py
+++ b/enterprise/enterprise_hooks/llama_guard.py
@ -17,7 +17,7 @@ sys.path.insert(
 from typing import Optional, Literal
 import litellm
 import sys
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
--- a/enterprise/enterprise_hooks/llm_guard.py
+++ b/enterprise/enterprise_hooks/llm_guard.py
@ -9,7 +9,7 @@
 from typing import Optional, Literal
 import litellm
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
--- a/enterprise/enterprise_hooks/openai_moderation.py
+++ b/enterprise/enterprise_hooks/openai_moderation.py
@ -14,7 +14,7 @@ sys.path.insert(
 from typing import Literal
 import litellm
 import sys
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
 from litellm.integrations.custom_logger import CustomLogger
 from fastapi import HTTPException
 from litellm._logging import verbose_proxy_logger
@ -50,7 +50,7 @@ class _ENTERPRISE_OpenAI_Moderation(CustomLogger):
                if "content" in m and isinstance(m["content"], str):
                    text += m["content"]
-        from litellm.proxy.proxy_server import llm_router
+        from litellm_proxy.proxy_server import llm_router
        if llm_router is None:
            return
--- a/enterprise/enterprise_hooks/secret_detection.py
+++ b/enterprise/enterprise_hooks/secret_detection.py
@ -11,13 +11,18 @@ import os
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-from typing import Optional
+from typing import Any, Optional
 from litellm.caching.caching import DualCache
-from litellm.proxy._types import UserAPIKeyAuth
+from typing import TYPE_CHECKING
 from litellm._logging import verbose_proxy_logger
 import tempfile
 from litellm.integrations.custom_guardrail import CustomGuardrail
 if TYPE_CHECKING:
    from litellm_proxy._types import UserAPIKeyAuth
 else:
    UserAPIKeyAuth = Any
 GUARDRAIL_NAME = "hide_secrets"
 _custom_plugins_path = "file://" + os.path.join(
--- a/litellm/init.py
+++ b/litellm/init.py
@ -61,13 +61,14 @@ from litellm.constants import (
    DEFAULT_ALLOWED_FAILS,
 )
 from litellm.types.guardrails import GuardrailItem
-from litellm.proxy._types import (
+from litellm.types.proxy.management_endpoints.ui_sso import DefaultTeamSSOParams
 from litellm.types.utils import (
    StandardKeyGenerationConfig,
    LlmProviders,
    KeyManagementSystem,
    KeyManagementSettings,
    LiteLLM_UpperboundKeyGenerateParams,
 )
 from litellm.types.proxy.management_endpoints.ui_sso import DefaultTeamSSOParams
 from litellm.types.utils import StandardKeyGenerationConfig, LlmProviders
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.logging_callback_manager import LoggingCallbackManager
 import httpx
@ -1048,7 +1049,6 @@ from .exceptions import (
    MockException,
 )
 from .budget_manager import BudgetManager
 from .proxy.proxy_cli import run_server
 from .router import Router
 from .assistants.main import *
 from .batches.main import *
--- a/litellm/_service_logger.py
+++ b/litellm/_service_logger.py
@ -4,7 +4,6 @@ from typing import TYPE_CHECKING, Any, Optional, Union
 import litellm
 from litellm._logging import verbose_logger
 from litellm.proxy._types import UserAPIKeyAuth
 from .integrations.custom_logger import CustomLogger
 from .integrations.datadog.datadog import DataDogLogger
@ -15,11 +14,14 @@ from .types.services import ServiceLoggerPayload, ServiceTypes
 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
    from litellm_proxy._types import UserAPIKeyAuth
    Span = Union[_Span, Any]
    OTELClass = OpenTelemetry
 else:
    Span = Any
    OTELClass = Any
    UserAPIKeyAuth = Any
 class ServiceLogging(CustomLogger):
@ -143,7 +145,7 @@ class ServiceLogging(CustomLogger):
                    event_metadata=event_metadata,
                )
            elif callback == "otel" or isinstance(callback, OpenTelemetry):
-                from litellm.proxy.proxy_server import open_telemetry_logger
+                from litellm_proxy.proxy_server import open_telemetry_logger
                await self.init_otel_logger_if_none()
@ -188,7 +190,7 @@ class ServiceLogging(CustomLogger):
        initializes otel_logger if it is None or no attribute exists on ServiceLogging Object
        """
-        from litellm.proxy.proxy_server import open_telemetry_logger
+        from litellm_proxy.proxy_server import open_telemetry_logger
        if not hasattr(self, "otel_logger"):
            if open_telemetry_logger is not None and isinstance(
@ -251,7 +253,7 @@ class ServiceLogging(CustomLogger):
                    event_metadata=event_metadata,
                )
            elif callback == "otel" or isinstance(callback, OpenTelemetry):
-                from litellm.proxy.proxy_server import open_telemetry_logger
+                from litellm_proxy.proxy_server import open_telemetry_logger
                await self.init_otel_logger_if_none()
--- a/litellm/caching/qdrant_semantic_cache.py
+++ b/litellm/caching/qdrant_semantic_cache.py
@ -281,7 +281,7 @@ class QdrantSemanticCache(BaseCache):
    async def async_set_cache(self, key, value, **kwargs):
        import uuid
-        from litellm.proxy.proxy_server import llm_model_list, llm_router
+        from litellm_proxy.proxy_server import llm_model_list, llm_router
        print_verbose(f"async qdrant semantic-cache set_cache, kwargs: {kwargs}")
@ -344,7 +344,7 @@ class QdrantSemanticCache(BaseCache):
    async def async_get_cache(self, key, **kwargs):
        print_verbose(f"async qdrant semantic-cache get_cache, kwargs: {kwargs}")
-        from litellm.proxy.proxy_server import llm_model_list, llm_router
+        from litellm_proxy.proxy_server import llm_model_list, llm_router
        # get the messages
        messages = kwargs["messages"]
--- a/litellm/caching/redis_semantic_cache.py
+++ b/litellm/caching/redis_semantic_cache.py
@ -279,7 +279,7 @@ class RedisSemanticCache(BaseCache):
        Returns:
            List[float]: The embedding vector
        """
-        from litellm.proxy.proxy_server import llm_model_list, llm_router
+        from litellm_proxy.proxy_server import llm_model_list, llm_router
        # Route the embedding request through the proxy if appropriate
        router_model_names = (
--- a/litellm/integrations/SlackAlerting/slack_alerting.py
+++ b/litellm/integrations/SlackAlerting/slack_alerting.py
@ -26,8 +26,8 @@ from litellm.llms.custom_httpx.http_handler import (
    get_async_httpx_client,
    httpxSpecialProvider,
 )
 from litellm.proxy._types import AlertType, CallInfo, VirtualKeyEvent, WebhookEvent
 from litellm.types.integrations.slack_alerting import *
 from litellm_proxy._types import AlertType, CallInfo, VirtualKeyEvent, WebhookEvent
 from ..email_templates.templates import *
 from .batching_handler import send_to_webhook, squash_payloads
@ -823,9 +823,9 @@ class SlackAlerting(CustomBatchLogger):
        ### UNIQUE CACHE KEY ###
        cache_key = provider + region_name
-        outage_value: Optional[
+        outage_value: Optional[ProviderRegionOutageModel] = (
-            ProviderRegionOutageModel
+            await self.internal_usage_cache.async_get_cache(key=cache_key)
-        ] = await self.internal_usage_cache.async_get_cache(key=cache_key)
+        )
        if (
            getattr(exception, "status_code", None) is None
@ -1148,7 +1148,7 @@ Model Info:
        email_logo_url: Optional[str] = None,
        email_support_contact: Optional[str] = None,
    ):
-        from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
+        from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
        if premium_user is not True:
            if email_logo_url is not None or email_support_contact is not None:
@ -1161,7 +1161,7 @@ Model Info:
        self, webhook_event: WebhookEvent
    ) -> bool:
        try:
-            from litellm.proxy.utils import send_email
+            from litellm_proxy.utils import send_email
            if self.alerting is None or "email" not in self.alerting:
                # do nothing if user does not want email alerts
@ -1170,7 +1170,7 @@ Model Info:
                    self.alerting,
                )
                return False
-            from litellm.proxy.proxy_server import premium_user, prisma_client
+            from litellm_proxy.proxy_server import premium_user, prisma_client
            email_logo_url = os.getenv(
                "SMTP_SENDER_LOGO", os.getenv("EMAIL_LOGO_URL", None)
@ -1271,8 +1271,8 @@ Model Info:
        Returns -> True if sent, False if not.
        """
-        from litellm.proxy.proxy_server import premium_user
+        from litellm_proxy.proxy_server import premium_user
-        from litellm.proxy.utils import send_email
+        from litellm_proxy.utils import send_email
        email_logo_url = os.getenv(
            "SMTP_SENDER_LOGO", os.getenv("EMAIL_LOGO_URL", None)
@ -1406,9 +1406,9 @@ Model Info:
            self.alert_to_webhook_url is not None
            and alert_type in self.alert_to_webhook_url
        ):
-            slack_webhook_url: Optional[
+            slack_webhook_url: Optional[Union[str, List[str]]] = (
-                Union[str, List[str]]
+                self.alert_to_webhook_url[alert_type]
-            ] = self.alert_to_webhook_url[alert_type]
+            )
        elif self.default_webhook_url is not None:
            slack_webhook_url = self.default_webhook_url
        else:
@ -1598,7 +1598,7 @@ Model Info:
            return
        try:
-            from litellm.proxy.spend_tracking.spend_management_endpoints import (
+            from litellm_proxy.spend_tracking.spend_management_endpoints import (
                _get_spend_report_for_time_range,
            )
@ -1662,7 +1662,7 @@ Model Info:
        try:
            from calendar import monthrange
-            from litellm.proxy.spend_tracking.spend_management_endpoints import (
+            from litellm_proxy.spend_tracking.spend_management_endpoints import (
                _get_spend_report_for_time_range,
            )
--- a/litellm/integrations/SlackAlerting/utils.py
+++ b/litellm/integrations/SlackAlerting/utils.py
@ -5,8 +5,8 @@ Utils used for slack alerting
 import asyncio
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 from litellm.proxy._types import AlertType
 from litellm.secret_managers.main import get_secret
 from litellm_proxy._types import AlertType
 if TYPE_CHECKING:
    from litellm.litellm_core_utils.litellm_logging import Logging as _Logging
@ -17,7 +17,7 @@ else:
 def process_slack_alerting_variables(
-    alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]]
+    alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]],
 ) -> Optional[Dict[AlertType, Union[List[str], str]]]:
    """
    process alert_to_webhook_url
--- a/litellm/integrations/azure_storage/azure_storage.py
+++ b/litellm/integrations/azure_storage/azure_storage.py
@ -321,7 +321,7 @@ class AzureBlobStorageLogger(CustomBatchLogger):
        """
        Checks if the user is a premium user, raises an error if not
        """
-        from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
+        from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
        if premium_user is not True:
            raise ValueError(
--- a/litellm/integrations/custom_guardrail.py
+++ b/litellm/integrations/custom_guardrail.py
@ -165,7 +165,7 @@ class CustomGuardrail(CustomLogger):
        """
        Returns True if the user is a premium user
        """
-        from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
+        from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
        if premium_user is not True:
            verbose_logger.warning(
@ -183,7 +183,7 @@ class CustomGuardrail(CustomLogger):
        """
        Builds `StandardLoggingGuardrailInformation` and adds it to the request metadata so it can be used for logging to DataDog, Langfuse, etc.
        """
-        from litellm.proxy.proxy_server import premium_user
+        from litellm_proxy.proxy_server import premium_user
        if premium_user is not True:
            verbose_logger.warning(
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@ -15,7 +15,6 @@ from typing import (
 from pydantic import BaseModel
 from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.types.integrations.argilla import ArgillaItem
 from litellm.types.llms.openai import AllMessageValues, ChatCompletionRequest
 from litellm.types.utils import (
@ -30,9 +29,12 @@ from litellm.types.utils import (
 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
    from litellm_proxy._types import UserAPIKeyAuth
    Span = Union[_Span, Any]
 else:
    Span = Any
    UserAPIKeyAuth = Any
 class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
--- a/litellm/integrations/email_alerting.py
+++ b/litellm/integrations/email_alerting.py
@ -3,10 +3,14 @@ Functions for sending Email Alerts
 """
 import os
-from typing import List, Optional
+from typing import TYPE_CHECKING, Any, List, Optional
 from litellm._logging import verbose_logger, verbose_proxy_logger
-from litellm.proxy._types import WebhookEvent
+
 if TYPE_CHECKING:
    from litellm_proxy._types import WebhookEvent
 else:
    WebhookEvent = Any
 # we use this for the email header, please send a test email if you change this. verify it looks good on email
 LITELLM_LOGO_URL = "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
@ -19,7 +23,7 @@ async def get_all_team_member_emails(team_id: Optional[str] = None) -> list:
    )
    if team_id is None:
        return []
-    from litellm.proxy.proxy_server import prisma_client
+    from litellm_proxy.proxy_server import prisma_client
    if prisma_client is None:
        raise Exception("Not connected to DB!")
@ -71,7 +75,7 @@ async def send_team_budget_alert(webhook_event: WebhookEvent) -> bool:
    Send an Email Alert to All Team Members when the Team Budget is crossed
    Returns -> True if sent, False if not.
    """
-    from litellm.proxy.utils import send_email
+    from litellm_proxy.utils import send_email
    _team_id = webhook_event.team_id
    team_alias = webhook_event.team_alias
--- a/litellm/integrations/gcs_bucket/gcs_bucket.py
+++ b/litellm/integrations/gcs_bucket/gcs_bucket.py
@ -9,10 +9,10 @@ from urllib.parse import quote
 from litellm._logging import verbose_logger
 from litellm.integrations.additional_logging_utils import AdditionalLoggingUtils
 from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
 from litellm.proxy._types import CommonProxyErrors
 from litellm.types.integrations.base_health_check import IntegrationHealthCheckStatus
 from litellm.types.integrations.gcs_bucket import *
 from litellm.types.utils import StandardLoggingPayload
 from litellm_proxy._types import CommonProxyErrors
 if TYPE_CHECKING:
    from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
@ -22,7 +22,7 @@ else:
 class GCSBucketLogger(GCSBucketBase, AdditionalLoggingUtils):
    def __init__(self, bucket_name: Optional[str] = None) -> None:
-        from litellm.proxy.proxy_server import premium_user
+        from litellm_proxy.proxy_server import premium_user
        super().__init__(bucket_name=bucket_name)
@ -48,7 +48,7 @@ class GCSBucketLogger(GCSBucketBase, AdditionalLoggingUtils):
    #### ASYNC ####
    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
-        from litellm.proxy.proxy_server import premium_user
+        from litellm_proxy.proxy_server import premium_user
        if premium_user is not True:
            raise ValueError(
--- a/litellm/integrations/gcs_pubsub/pub_sub.py
+++ b/litellm/integrations/gcs_pubsub/pub_sub.py
@ -15,7 +15,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 from litellm.types.utils import StandardLoggingPayload
 if TYPE_CHECKING:
-    from litellm.proxy._types import SpendLogsPayload
+    from litellm_proxy._types import SpendLogsPayload
 else:
    SpendLogsPayload = Any
@ -44,7 +44,7 @@ class GcsPubSubLogger(CustomBatchLogger):
            topic_id (str): Pub/Sub topic ID
            credentials_path (str, optional): Path to Google Cloud credentials JSON file
        """
-        from litellm.proxy.utils import _premium_user_check
+        from litellm_proxy.utils import _premium_user_check
        _premium_user_check()
@ -108,10 +108,10 @@ class GcsPubSubLogger(CustomBatchLogger):
        Raises:
            Raises a NON Blocking verbose_logger.exception if an error occurs
        """
-        from litellm.proxy.spend_tracking.spend_tracking_utils import (
+        from litellm_proxy.spend_tracking.spend_tracking_utils import (
            get_logging_payload,
        )
-        from litellm.proxy.utils import _premium_user_check
+        from litellm_proxy.utils import _premium_user_check
        _premium_user_check()
--- a/litellm/integrations/langtrace.py
+++ b/litellm/integrations/langtrace.py
@ -1,7 +1,7 @@
 import json
 from typing import TYPE_CHECKING, Any, Union
-from litellm.proxy._types import SpanAttributes
+from litellm.types.integrations.opentelemetry import SpanAttributes
 if TYPE_CHECKING:
    from opentelemetry.trace import Span as _Span
--- a/litellm/integrations/opentelemetry.py
+++ b/litellm/integrations/opentelemetry.py
@ -18,10 +18,10 @@ if TYPE_CHECKING:
    from opentelemetry.sdk.trace.export import SpanExporter as _SpanExporter
    from opentelemetry.trace import Span as _Span
-    from litellm.proxy._types import (
+    from litellm_proxy._types import (
        ManagementEndpointLoggingPayload as _ManagementEndpointLoggingPayload,
    )
-    from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
+    from litellm_proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
    Span = Union[_Span, Any]
    SpanExporter = Union[_SpanExporter, Any]
@ -126,7 +126,7 @@ class OpenTelemetry(CustomLogger):
        - Adds Otel as a service callback
        - Sets `proxy_server.open_telemetry_logger` to self
        """
-        from litellm.proxy import proxy_server
+        from litellm_proxy import proxy_server
        # Add Otel as a service callback
        if "otel" not in litellm.service_callback:
@ -350,9 +350,9 @@ class OpenTelemetry(CustomLogger):
        """
        from opentelemetry import trace
-        standard_callback_dynamic_params: Optional[
+        standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
-            StandardCallbackDynamicParams
+            kwargs.get("standard_callback_dynamic_params")
-        ] = kwargs.get("standard_callback_dynamic_params")
+        )
        if not standard_callback_dynamic_params:
            return
@ -406,7 +406,7 @@ class OpenTelemetry(CustomLogger):
    def set_tools_attributes(self, span: Span, tools):
        import json
-        from litellm.proxy._types import SpanAttributes
+        from litellm.types.integrations.opentelemetry import SpanAttributes
        if not tools:
            return
@ -460,7 +460,7 @@ class OpenTelemetry(CustomLogger):
    def _tool_calls_kv_pair(
        tool_calls: List[ChatCompletionMessageToolCall],
    ) -> Dict[str, Any]:
-        from litellm.proxy._types import SpanAttributes
+        from litellm.types.integrations.opentelemetry import SpanAttributes
        kv_pairs: Dict[str, Any] = {}
        for idx, tool_call in enumerate(tool_calls):
@ -496,7 +496,7 @@ class OpenTelemetry(CustomLogger):
                    span, kwargs, response_obj
                )
                return
-            from litellm.proxy._types import SpanAttributes
+            from litellm.types.integrations.opentelemetry import SpanAttributes
            optional_params = kwargs.get("optional_params", {})
            litellm_params = kwargs.get("litellm_params", {}) or {}
--- a/litellm/integrations/pagerduty/pagerduty.py
+++ b/litellm/integrations/pagerduty/pagerduty.py
@ -9,7 +9,7 @@ Handles two types of alerts:
 import asyncio
 import os
 from datetime import datetime, timedelta, timezone
-from typing import List, Literal, Optional, Union
+from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union
 from litellm._logging import verbose_logger
 from litellm.caching import DualCache
@ -19,7 +19,6 @@ from litellm.llms.custom_httpx.http_handler import (
    get_async_httpx_client,
    httpxSpecialProvider,
 )
 from litellm.proxy._types import UserAPIKeyAuth
 from litellm.types.integrations.pagerduty import (
    AlertingConfig,
    PagerDutyInternalEvent,
@ -31,6 +30,12 @@ from litellm.types.utils import (
    StandardLoggingPayloadErrorInformation,
 )
 if TYPE_CHECKING:
    from litellm_proxy._types import UserAPIKeyAuth
 else:
    UserAPIKeyAuth = Any
 PAGERDUTY_DEFAULT_FAILURE_THRESHOLD = 60
 PAGERDUTY_DEFAULT_FAILURE_THRESHOLD_WINDOW_SECONDS = 60
 PAGERDUTY_DEFAULT_HANGING_THRESHOLD_SECONDS = 60
@ -46,7 +51,7 @@ class PagerDutyAlerting(SlackAlerting):
    def __init__(
        self, alerting_args: Optional[Union[AlertingConfig, dict]] = None, **kwargs
    ):
-        from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
+        from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
        super().__init__()
        _api_key = os.getenv("PAGERDUTY_API_KEY")
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -18,10 +18,10 @@ from typing import (
 import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import LiteLLM_TeamTable, UserAPIKeyAuth
 from litellm.types.integrations.prometheus import *
 from litellm.types.utils import StandardLoggingPayload
 from litellm.utils import get_end_user_id_for_cost_tracking
 from litellm_proxy._types import LiteLLM_TeamTable, UserAPIKeyAuth
 if TYPE_CHECKING:
    from apscheduler.schedulers.asyncio import AsyncIOScheduler
@ -38,7 +38,7 @@ class PrometheusLogger(CustomLogger):
        try:
            from prometheus_client import Counter, Gauge, Histogram
-            from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
+            from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
            if premium_user is not True:
                verbose_logger.warning(
@ -456,7 +456,7 @@ class PrometheusLogger(CustomLogger):
            and isinstance(user_api_key, str)
            and user_api_key.startswith("sk-")
        ):
-            from litellm.proxy.utils import hash_token
+            from litellm_proxy.utils import hash_token
            user_api_key = hash_token(user_api_key)
@ -661,7 +661,7 @@ class PrometheusLogger(CustomLogger):
        kwargs: dict,
        metadata: dict,
    ):
-        from litellm.proxy.common_utils.callback_utils import (
+        from litellm_proxy.common_utils.callback_utils import (
            get_model_group_from_litellm_kwargs,
        )
@ -1363,7 +1363,7 @@ class PrometheusLogger(CustomLogger):
            set_metrics_function: Function to set metrics for the fetched data.
            data_type: String representing the type of data ("teams" or "keys") for logging purposes.
        """
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
        if prisma_client is None:
            return
@ -1398,10 +1398,10 @@ class PrometheusLogger(CustomLogger):
        """
        Initialize team budget metrics by reusing the generic pagination logic.
        """
-        from litellm.proxy.management_endpoints.team_endpoints import (
+        from litellm_proxy.management_endpoints.team_endpoints import (
            get_paginated_teams,
        )
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
        if prisma_client is None:
            verbose_logger.debug(
@ -1432,10 +1432,10 @@ class PrometheusLogger(CustomLogger):
        from typing import Union
        from litellm.constants import UI_SESSION_TOKEN_TEAM_ID
-        from litellm.proxy.management_endpoints.key_management_endpoints import (
+        from litellm_proxy.management_endpoints.key_management_endpoints import (
            _list_key_helper,
        )
-        from litellm.proxy.proxy_server import prisma_client
+        from litellm_proxy.proxy_server import prisma_client
        if prisma_client is None:
            verbose_logger.debug(
@ -1480,7 +1480,7 @@ class PrometheusLogger(CustomLogger):
        - If redis cache is not available, we initialize the metrics directly.
        """
        from litellm.constants import PROMETHEUS_EMIT_BUDGET_METRICS_JOB_NAME
-        from litellm.proxy.proxy_server import proxy_logging_obj
+        from litellm_proxy.proxy_server import proxy_logging_obj
        pod_lock_manager = proxy_logging_obj.db_spend_update_writer.pod_lock_manager
@ -1561,8 +1561,8 @@ class PrometheusLogger(CustomLogger):
        Fields not available in metadata:
        - `budget_reset_at`
        """
-        from litellm.proxy.auth.auth_checks import get_team_object
+        from litellm_proxy.auth.auth_checks import get_team_object
-        from litellm.proxy.proxy_server import prisma_client, user_api_key_cache
+        from litellm_proxy.proxy_server import prisma_client, user_api_key_cache
        _total_team_spend = (spend or 0) + response_cost
        team_object = LiteLLM_TeamTable(
@ -1711,8 +1711,8 @@ class PrometheusLogger(CustomLogger):
        """
        Assemble a UserAPIKeyAuth object
        """
-        from litellm.proxy.auth.auth_checks import get_key_object
+        from litellm_proxy.auth.auth_checks import get_key_object
-        from litellm.proxy.proxy_server import prisma_client, user_api_key_cache
+        from litellm_proxy.proxy_server import prisma_client, user_api_key_cache
        _total_key_spend = (key_spend or 0) + response_cost
        user_api_key_dict = UserAPIKeyAuth(
@ -1803,8 +1803,8 @@ class PrometheusLogger(CustomLogger):
        from prometheus_client import make_asgi_app
        from litellm._logging import verbose_proxy_logger
-        from litellm.proxy._types import CommonProxyErrors
+        from litellm_proxy._types import CommonProxyErrors
-        from litellm.proxy.proxy_server import app
+        from litellm_proxy.proxy_server import app
        if premium_user is not True:
            verbose_proxy_logger.warning(
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -28,7 +28,6 @@ from litellm._logging import _is_debugging_on, verbose_logger
 from litellm.batches.batch_utils import _handle_completed_batch
 from litellm.caching.caching import DualCache, InMemoryCache
 from litellm.caching.caching_handler import LLMCachingHandler
 from litellm.constants import (
    DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT,
    DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT,
@ -2886,7 +2885,7 @@ def _init_custom_logger_compatible_class(  # noqa: PLR0915
            _in_memory_loggers.append(_otel_logger)
            return _otel_logger  # type: ignore
        elif logging_integration == "dynamic_rate_limiter":
-            from litellm.proxy.hooks.dynamic_rate_limiter import (
+            from litellm_proxy.hooks.dynamic_rate_limiter import (
                _PROXY_DynamicRateLimitHandler,
            )
@ -3074,7 +3073,7 @@ def get_custom_logger_compatible_class(  # noqa: PLR0915
                    return callback  # type: ignore
        elif logging_integration == "dynamic_rate_limiter":
-            from litellm.proxy.hooks.dynamic_rate_limiter import (
+            from litellm_proxy.hooks.dynamic_rate_limiter import (
                _PROXY_DynamicRateLimitHandler,
            )
@ -3130,7 +3129,7 @@ def _get_custom_logger_settings_from_proxy_server(callback_name: str) -> Dict:
        otel:
            message_logging: False
    """
-    from litellm.proxy.proxy_server import callback_settings
+    from litellm_proxy.proxy_server import callback_settings
    if callback_settings:
        return dict(callback_settings.get(callback_name, {}))
--- a/litellm/litellm_core_utils/prompt_templates/common_utils.py
+++ b/litellm/litellm_core_utils/prompt_templates/common_utils.py
@ -342,7 +342,7 @@ def get_format_from_file_id(file_id: Optional[str]) -> Optional[str]:
    unified_file_id = litellm_proxy:{};unified_id,{}
    If not a unified file id, returns 'file' as default format
    """
-    from litellm.proxy.hooks.managed_files import _PROXY_LiteLLMManagedFiles
+    from litellm_proxy.hooks.managed_files import _PROXY_LiteLLMManagedFiles
    if not file_id:
        return None
--- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
+++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
@ -37,15 +37,15 @@ class AnthropicMessagesHandler:
        """Helper function to handle Anthropic streaming responses using the existing logging handlers"""
        from datetime import datetime
        from litellm.proxy.pass_through_endpoints.streaming_handler import (
            PassThroughStreamingHandler,
        )
        from litellm.proxy.pass_through_endpoints.success_handler import (
            PassThroughEndpointLogging,
        )
        from litellm.types.passthrough_endpoints.pass_through_endpoints import (
            EndpointType,
        )
        from litellm_proxy.pass_through_endpoints.streaming_handler import (
            PassThroughStreamingHandler,
        )
        from litellm_proxy.pass_through_endpoints.success_handler import (
            PassThroughEndpointLogging,
        )
        # Create success handler object
        passthrough_success_handler_obj = PassThroughEndpointLogging()
--- a/litellm/proxy/.gitignore
+++ b/litellm/proxy/.gitignore
@ -1,2 +0,0 @@
 .env
 secrets.toml
--- a/litellm/router.py
+++ b/litellm/router.py
@ -4550,7 +4550,7 @@ class Router:
        Each provider uses diff .env vars for pass-through endpoints, this helper uses the deployment credentials to set the .env vars for pass-through endpoints
        """
        if deployment.litellm_params.use_in_pass_through is True:
-            from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import (
+            from litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints import (
                passthrough_endpoint_router,
            )
--- a/litellm/router_strategy/budget_limiter.py
+++ b/litellm/router_strategy/budget_limiter.py
@ -10,11 +10,11 @@ This means you can use this with weighted-pick, lowest-latency, simple-shuffle,
 Example:
 ```
 openai:
-	budget_limit: 0.000000000001
+        budget_limit: 0.000000000001
-	time_period: 1d
+        time_period: 1d
 anthropic:
-	budget_limit: 100
+        budget_limit: 100
-	time_period: 7d
+        time_period: 7d
 ```
 """
@ -53,9 +53,9 @@ class RouterBudgetLimiting(CustomLogger):
        self.dual_cache = dual_cache
        self.redis_increment_operation_queue: List[RedisPipelineIncrementOperation] = []
        asyncio.create_task(self.periodic_sync_in_memory_spend_with_redis())
-        self.provider_budget_config: Optional[
+        self.provider_budget_config: Optional[GenericBudgetConfigType] = (
-            GenericBudgetConfigType
+            provider_budget_config
-        ] = provider_budget_config
+        )
        self.deployment_budget_config: Optional[GenericBudgetConfigType] = None
        self.tag_budget_config: Optional[GenericBudgetConfigType] = None
        self._init_provider_budgets()
@ -797,7 +797,7 @@ class RouterBudgetLimiting(CustomLogger):
    def _init_tag_budgets(self):
        if litellm.tag_budget_config is None:
            return
-        from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
+        from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
        if premium_user is not True:
            raise ValueError(
--- a/litellm/router_strategy/lowest_cost.py
+++ b/litellm/router_strategy/lowest_cost.py
@ -187,7 +187,7 @@ class LowestCostLoggingHandler(CustomLogger):
                    self.logged_success += 1
        except Exception as e:
            verbose_logger.exception(
-                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
+                "litellm_proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
                    str(e)
                )
            )
--- a/litellm/router_strategy/lowest_latency.py
+++ b/litellm/router_strategy/lowest_latency.py
@ -170,7 +170,7 @@ class LowestLatencyLoggingHandler(CustomLogger):
                    self.logged_success += 1
        except Exception as e:
            verbose_logger.exception(
-                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
+                "litellm_proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
                    str(e)
                )
            )
@ -238,7 +238,7 @@ class LowestLatencyLoggingHandler(CustomLogger):
                return
        except Exception as e:
            verbose_logger.exception(
-                "litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
+                "litellm_proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
                    str(e)
                )
            )
--- a/litellm/router_strategy/lowest_tpm_rpm_v2.py
+++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py
@ -270,7 +270,7 @@ class LowestTPMLoggingHandler_v2(BaseRoutingStrategy, CustomLogger):
                self.logged_success += 1
        except Exception as e:
            verbose_logger.exception(
-                "litellm.proxy.hooks.lowest_tpm_rpm_v2.py::log_success_event(): Exception occured - {}".format(
+                "litellm_proxy.hooks.lowest_tpm_rpm_v2.py::log_success_event(): Exception occured - {}".format(
                    str(e)
                )
            )
@ -321,7 +321,7 @@ class LowestTPMLoggingHandler_v2(BaseRoutingStrategy, CustomLogger):
                self.logged_success += 1
        except Exception as e:
            verbose_logger.exception(
-                "litellm.proxy.hooks.lowest_tpm_rpm_v2.py::async_log_success_event(): Exception occured - {}".format(
+                "litellm_proxy.hooks.lowest_tpm_rpm_v2.py::async_log_success_event(): Exception occured - {}".format(
                    str(e)
                )
            )
--- a/litellm/secret_managers/aws_secret_manager.py
+++ b/litellm/secret_managers/aws_secret_manager.py
@ -4,7 +4,7 @@ This is a file for the AWS Secret Manager Integration
 Relevant issue: https://github.com/BerriAI/litellm/issues/1883
 Requires:
-* `os.environ["AWS_REGION_NAME"], 
+* `os.environ["AWS_REGION_NAME"],
 * `pip install boto3>=1.28.57`
 """
@ -15,7 +15,7 @@ import re
 from typing import Any, Dict, Optional
 import litellm
-from litellm.proxy._types import KeyManagementSystem
+from litellm_proxy._types import KeyManagementSystem
 def validate_environment():
--- a/litellm/secret_managers/aws_secret_manager_v2.py
+++ b/litellm/secret_managers/aws_secret_manager_v2.py
@ -9,7 +9,7 @@ Handles Async Operations for:
 Relevant issue: https://github.com/BerriAI/litellm/issues/1883
 Requires:
-* `os.environ["AWS_REGION_NAME"], 
+* `os.environ["AWS_REGION_NAME"],
 * `pip install boto3>=1.28.57`
 """
@ -26,8 +26,8 @@ from litellm.llms.custom_httpx.http_handler import (
    _get_httpx_client,
    get_async_httpx_client,
 )
 from litellm.proxy._types import KeyManagementSystem
 from litellm.types.llms.custom_http import httpxSpecialProvider
 from litellm_proxy._types import KeyManagementSystem
 from .base_secret_manager import BaseSecretManager
--- a/litellm/secret_managers/google_kms.py
+++ b/litellm/secret_managers/google_kms.py
@ -12,7 +12,7 @@ import os
 from typing import Optional
 import litellm
-from litellm.proxy._types import KeyManagementSystem
+from litellm_proxy._types import KeyManagementSystem
 def validate_environment():
--- a/litellm/secret_managers/google_secret_manager.py
+++ b/litellm/secret_managers/google_secret_manager.py
@ -8,7 +8,7 @@ from litellm.caching.caching import InMemoryCache
 from litellm.constants import SECRET_MANAGER_REFRESH_INTERVAL
 from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
 from litellm.llms.custom_httpx.http_handler import _get_httpx_client
-from litellm.proxy._types import CommonProxyErrors, KeyManagementSystem
+from litellm_proxy._types import CommonProxyErrors, KeyManagementSystem
 class GoogleSecretManager(GCSBucketBase):
@ -22,7 +22,7 @@ class GoogleSecretManager(GCSBucketBase):
            refresh_interval (int, optional): The refresh interval in seconds. Defaults to 86400. (24 hours)
            always_read_secret_manager (bool, optional): Whether to always read from the secret manager. Defaults to False. Since we do want to cache values
        """
-        from litellm.proxy.proxy_server import premium_user
+        from litellm_proxy.proxy_server import premium_user
        if premium_user is not True:
            raise ValueError(
--- a/litellm/secret_managers/hashicorp_secret_manager.py
+++ b/litellm/secret_managers/hashicorp_secret_manager.py
@ -12,14 +12,14 @@ from litellm.llms.custom_httpx.http_handler import (
    get_async_httpx_client,
    httpxSpecialProvider,
 )
-from litellm.proxy._types import KeyManagementSystem
+from litellm_proxy._types import KeyManagementSystem
 from .base_secret_manager import BaseSecretManager
 class HashicorpSecretManager(BaseSecretManager):
    def __init__(self):
-        from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
+        from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
        # Vault-specific config
        self.vault_addr = os.getenv("HCP_VAULT_ADDR", "http://127.0.0.1:8200")
--- a/litellm/secret_managers/main.py
+++ b/litellm/secret_managers/main.py
@ -11,7 +11,7 @@ import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.caching.caching import DualCache
 from litellm.llms.custom_httpx.http_handler import HTTPHandler
-from litellm.proxy._types import KeyManagementSystem
+from litellm.types.utils import KeyManagementSystem
 oidc_cache = DualCache()
--- a/litellm/types/integrations/opentelemetry.py
+++ b/litellm/types/integrations/opentelemetry.py
@ -0,0 +1,49 @@
 import enum
 class SpanAttributes(str, enum.Enum):
    # Note: We've taken this from opentelemetry-semantic-conventions-ai
    # I chose to not add a new dependency to litellm for this
    # Semantic Conventions for LLM requests, this needs to be removed after
    # OpenTelemetry Semantic Conventions support Gen AI.
    # Issue at https://github.com/open-telemetry/opentelemetry-python/issues/3868
    # Refer to https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/llm-spans.md
    LLM_SYSTEM = "gen_ai.system"
    LLM_REQUEST_MODEL = "gen_ai.request.model"
    LLM_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
    LLM_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
    LLM_REQUEST_TOP_P = "gen_ai.request.top_p"
    LLM_PROMPTS = "gen_ai.prompt"
    LLM_COMPLETIONS = "gen_ai.completion"
    LLM_RESPONSE_MODEL = "gen_ai.response.model"
    LLM_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens"
    LLM_USAGE_PROMPT_TOKENS = "gen_ai.usage.prompt_tokens"
    LLM_TOKEN_TYPE = "gen_ai.token.type"
    # To be added
    # LLM_RESPONSE_FINISH_REASON = "gen_ai.response.finish_reasons"
    # LLM_RESPONSE_ID = "gen_ai.response.id"
    # LLM
    LLM_REQUEST_TYPE = "llm.request.type"
    LLM_USAGE_TOTAL_TOKENS = "llm.usage.total_tokens"
    LLM_USAGE_TOKEN_TYPE = "llm.usage.token_type"
    LLM_USER = "llm.user"
    LLM_HEADERS = "llm.headers"
    LLM_TOP_K = "llm.top_k"
    LLM_IS_STREAMING = "llm.is_streaming"
    LLM_FREQUENCY_PENALTY = "llm.frequency_penalty"
    LLM_PRESENCE_PENALTY = "llm.presence_penalty"
    LLM_CHAT_STOP_SEQUENCES = "llm.chat.stop_sequences"
    LLM_REQUEST_FUNCTIONS = "llm.request.functions"
    LLM_REQUEST_REPETITION_PENALTY = "llm.request.repetition_penalty"
    LLM_RESPONSE_FINISH_REASON = "llm.response.finish_reason"
    LLM_RESPONSE_STOP_REASON = "llm.response.stop_reason"
    LLM_CONTENT_COMPLETION_CHUNK = "llm.content.completion.chunk"
    # OpenAI
    LLM_OPENAI_RESPONSE_SYSTEM_FINGERPRINT = "gen_ai.openai.system_fingerprint"
    LLM_OPENAI_API_BASE = "gen_ai.openai.api_base"
    LLM_OPENAI_API_VERSION = "gen_ai.openai.api_version"
    LLM_OPENAI_API_TYPE = "gen_ai.openai.api_type"
--- a/litellm/types/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/types/proxy/management_endpoints/internal_user_endpoints.py
@ -3,7 +3,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
 from fastapi import HTTPException
 from pydantic import BaseModel, EmailStr
-from litellm.proxy._types import LiteLLM_UserTableWithKeyCount
+from litellm_proxy._types import LiteLLM_UserTableWithKeyCount
 class UserListResponse(BaseModel):
--- a/litellm/types/proxy/management_endpoints/ui_sso.py
+++ b/litellm/types/proxy/management_endpoints/ui_sso.py
@ -2,7 +2,7 @@ from typing import List, Literal, Optional, TypedDict
 from pydantic import Field
-from litellm.proxy._types import LiteLLMPydanticObjectBase, LitellmUserRoles
+from litellm.types.utils import LiteLLMPydanticObjectBase
 class MicrosoftGraphAPIUserGroupDirectoryObject(TypedDict, total=False):
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -1,3 +1,4 @@
 import enum
 import json
 import time
 import uuid
@ -2262,3 +2263,61 @@ class SpecialEnums(Enum):
 LLMResponseTypes = Union[
    ModelResponse, EmbeddingResponse, ImageResponse, OpenAIFileObject
 ]
 AllowedModelRegion = Literal["eu", "us"]
 class KeyManagementSystem(enum.Enum):
    GOOGLE_KMS = "google_kms"
    AZURE_KEY_VAULT = "azure_key_vault"
    AWS_SECRET_MANAGER = "aws_secret_manager"
    GOOGLE_SECRET_MANAGER = "google_secret_manager"
    HASHICORP_VAULT = "hashicorp_vault"
    LOCAL = "local"
    AWS_KMS = "aws_kms"
 class KeyManagementSettings(LiteLLMPydanticObjectBase):
    hosted_keys: Optional[List] = None
    store_virtual_keys: Optional[bool] = False
    """
    If True, virtual keys created by litellm will be stored in the secret manager
    """
    prefix_for_stored_virtual_keys: str = "litellm/"
    """
    If set, this prefix will be used for stored virtual keys in the secret manager
    """
    access_mode: Literal["read_only", "write_only", "read_and_write"] = "read_only"
    """
    Access mode for the secret manager, when write_only will only use for writing secrets
    """
    primary_secret_name: Optional[str] = None
    """
    If set, will read secrets from this primary secret in the secret manager
    eg. on AWS you can store multiple secret values as K/V pairs in a single secret
    """
 class LiteLLM_UpperboundKeyGenerateParams(LiteLLMPydanticObjectBase):
    """
    Set default upperbound to max budget a key called via `/key/generate` can be.
    Args:
        max_budget (Optional[float], optional): Max budget a key can be. Defaults to None.
        budget_duration (Optional[str], optional): Duration of the budget. Defaults to None.
        duration (Optional[str], optional): Duration of the key. Defaults to None.
        max_parallel_requests (Optional[int], optional): Max number of requests that can be made in parallel. Defaults to None.
        tpm_limit (Optional[int], optional): Tpm limit. Defaults to None.
        rpm_limit (Optional[int], optional): Rpm limit. Defaults to None.
    """
    max_budget: Optional[float] = None
    budget_duration: Optional[str] = None
    duration: Optional[str] = None
    max_parallel_requests: Optional[int] = None
    tpm_limit: Optional[int] = None
    rpm_limit: Optional[int] = None
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -259,13 +259,13 @@ from .exceptions import (
    UnprocessableEntityError,
    UnsupportedParamsError,
 )
 from .proxy._types import AllowedModelRegion, KeyManagementSystem
 from .types.llms.openai import (
    ChatCompletionDeltaToolCallChunk,
    ChatCompletionToolCallChunk,
    ChatCompletionToolCallFunctionChunk,
 )
 from .types.router import LiteLLM_Params
 from .types.utils import AllowedModelRegion, KeyManagementSystem
 ####### ENVIRONMENT VARIABLES ####################
 # Adjust to your specific application needs / system capabilities.
--- a/litellm_proxy/README.md
+++ b/litellm_proxy/README.md
--- a/litellm_proxy/init.py
+++ b/litellm_proxy/init.py
--- a/litellm_proxy/_experimental/mcp_server/mcp_server_manager.py
+++ b/litellm_proxy/_experimental/mcp_server/mcp_server_manager.py
--- a/litellm_proxy/_experimental/mcp_server/server.py
+++ b/litellm_proxy/_experimental/mcp_server/server.py
@ -13,8 +13,8 @@ from pydantic import ConfigDict, ValidationError
 from litellm._logging import verbose_logger
 from litellm.constants import MCP_TOOL_NAME_PREFIX
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
-from litellm.proxy._types import UserAPIKeyAuth
+from litellm_proxy._types import UserAPIKeyAuth
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
 from litellm.types.mcp_server.mcp_server_manager import MCPInfo
 from litellm.types.utils import StandardLoggingMCPToolCall
 from litellm.utils import client
@ -288,7 +288,7 @@ if MCP_AVAILABLE:
        """
        REST API to call a specific MCP tool with the provided arguments
        """
-        from litellm.proxy.proxy_server import add_litellm_data_to_request, proxy_config
+        from litellm_proxy.proxy_server import add_litellm_data_to_request, proxy_config
        data = await request.json()
        data = await add_litellm_data_to_request(
--- a/litellm_proxy/_experimental/mcp_server/sse_transport.py
+++ b/litellm_proxy/_experimental/mcp_server/sse_transport.py
--- a/litellm_proxy/_experimental/mcp_server/tool_registry.py
+++ b/litellm_proxy/_experimental/mcp_server/tool_registry.py
@ -2,7 +2,7 @@ import json
 from typing import Any, Callable, Dict, List, Optional
 from litellm._logging import verbose_logger
-from litellm.proxy.types_utils.utils import get_instance_fn
+from litellm_proxy.types_utils.utils import get_instance_fn
 from litellm.types.mcp_server.tool_registry import MCPTool
--- a/litellm_proxy/_experimental/out/_next/static/chunks/117-1c5bfc45bfc4237d.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/117-1c5bfc45bfc4237d.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/13b76428-ebdf3012af0e4489.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/13b76428-ebdf3012af0e4489.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/250-7d480872c0e251dc.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/250-7d480872c0e251dc.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/261-ee7f0f1f1c8c22a0.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/261-ee7f0f1f1c8c22a0.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/3014691f-b7b79b78e27792f3.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/3014691f-b7b79b78e27792f3.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/42-69f5b4e6a9942a9f.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/42-69f5b4e6a9942a9f.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/466-65538e7f331af98e.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/466-65538e7f331af98e.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/699-2176ba2273e4676d.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/699-2176ba2273e4676d.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/899-57685cedd1dcbc78.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/899-57685cedd1dcbc78.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/app/_not-found/page-3b0daafcbe368586.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/app/_not-found/page-3b0daafcbe368586.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/app/layout-311f9b6ff79980ae.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/app/layout-311f9b6ff79980ae.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/app/model_hub/page-a965e43ba9638156.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/app/model_hub/page-a965e43ba9638156.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/app/onboarding/page-9598003bc1e91371.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/app/onboarding/page-9598003bc1e91371.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/app/page-36914b80c40b5032.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/app/page-36914b80c40b5032.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/fd9d1056-205af899b895cbac.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/fd9d1056-205af899b895cbac.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/framework-b370f160bb96059c.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/framework-b370f160bb96059c.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/main-8ee698634884314e.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/main-8ee698634884314e.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/main-app-2b16cdb7ff4e1af7.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/main-app-2b16cdb7ff4e1af7.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/pages/_app-15e2daefa259f0b5.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/pages/_app-15e2daefa259f0b5.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/pages/_error-28b803cb2479b966.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/pages/_error-28b803cb2479b966.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/polyfills-42372ed130431b0a.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/polyfills-42372ed130431b0a.js
--- a/litellm_proxy/_experimental/out/_next/static/chunks/webpack-75a5453f51d60261.js
+++ b/litellm_proxy/_experimental/out/_next/static/chunks/webpack-75a5453f51d60261.js
--- a/litellm_proxy/_experimental/out/_next/static/css/005c96178151b9fd.css
+++ b/litellm_proxy/_experimental/out/_next/static/css/005c96178151b9fd.css
--- a/litellm_proxy/_experimental/out/_next/static/css/86f6cc749f6b8493.css
+++ b/litellm_proxy/_experimental/out/_next/static/css/86f6cc749f6b8493.css
--- a/litellm_proxy/_experimental/out/_next/static/fzhvjOFL6KeNsWYrLD4ya/_buildManifest.js
+++ b/litellm_proxy/_experimental/out/_next/static/fzhvjOFL6KeNsWYrLD4ya/_buildManifest.js
--- a/litellm_proxy/_experimental/out/_next/static/fzhvjOFL6KeNsWYrLD4ya/_ssgManifest.js
+++ b/litellm_proxy/_experimental/out/_next/static/fzhvjOFL6KeNsWYrLD4ya/_ssgManifest.js
--- a/litellm_proxy/_experimental/out/_next/static/media/26a46d62cd723877-s.woff2
+++ b/litellm_proxy/_experimental/out/_next/static/media/26a46d62cd723877-s.woff2
--- a/litellm_proxy/_experimental/out/_next/static/media/55c55f0601d81cf3-s.woff2
+++ b/litellm_proxy/_experimental/out/_next/static/media/55c55f0601d81cf3-s.woff2
--- a/litellm_proxy/_experimental/out/_next/static/media/581909926a08bbc8-s.woff2
+++ b/litellm_proxy/_experimental/out/_next/static/media/581909926a08bbc8-s.woff2
--- a/litellm_proxy/_experimental/out/_next/static/media/6d93bde91c0c2823-s.woff2
+++ b/litellm_proxy/_experimental/out/_next/static/media/6d93bde91c0c2823-s.woff2
--- a/litellm_proxy/_experimental/out/_next/static/media/97e0cb1ae144a2a9-s.woff2
+++ b/litellm_proxy/_experimental/out/_next/static/media/97e0cb1ae144a2a9-s.woff2
--- a/litellm_proxy/_experimental/out/_next/static/media/a34f9d1faa5f3315-s.p.woff2
+++ b/litellm_proxy/_experimental/out/_next/static/media/a34f9d1faa5f3315-s.p.woff2
--- a/litellm_proxy/_experimental/out/_next/static/media/df0a9ae256c0569c-s.woff2
+++ b/litellm_proxy/_experimental/out/_next/static/media/df0a9ae256c0569c-s.woff2
--- a/litellm_proxy/_experimental/out/assets/logos/anthropic.svg
+++ b/litellm_proxy/_experimental/out/assets/logos/anthropic.svg
--- a/litellm_proxy/_experimental/out/assets/logos/assemblyai_small.png
+++ b/litellm_proxy/_experimental/out/assets/logos/assemblyai_small.png
--- a/litellm_proxy/_experimental/out/assets/logos/aws.svg
+++ b/litellm_proxy/_experimental/out/assets/logos/aws.svg
--- a/litellm_proxy/_experimental/out/assets/logos/bedrock.svg
+++ b/litellm_proxy/_experimental/out/assets/logos/bedrock.svg
--- a/litellm_proxy/_experimental/out/assets/logos/cerebras.svg
+++ b/litellm_proxy/_experimental/out/assets/logos/cerebras.svg
--- a/litellm_proxy/_experimental/out/assets/logos/cohere.svg
+++ b/litellm_proxy/_experimental/out/assets/logos/cohere.svg
--- a/litellm_proxy/_experimental/out/assets/logos/databricks.svg
+++ b/litellm_proxy/_experimental/out/assets/logos/databricks.svg
--- a/litellm_proxy/_experimental/out/assets/logos/deepseek.svg
+++ b/litellm_proxy/_experimental/out/assets/logos/deepseek.svg
--- a/litellm_proxy/_experimental/out/assets/logos/fireworks.svg
+++ b/litellm_proxy/_experimental/out/assets/logos/fireworks.svg
--- a/litellm_proxy/_experimental/out/assets/logos/google.svg
+++ b/litellm_proxy/_experimental/out/assets/logos/google.svg
--- a/litellm_proxy/_experimental/out/assets/logos/groq.svg
+++ b/litellm_proxy/_experimental/out/assets/logos/groq.svg
--- a/litellm_proxy/_experimental/out/assets/logos/microsoft_azure.svg
+++ b/litellm_proxy/_experimental/out/assets/logos/microsoft_azure.svg
--- a/Show more
+++ b/Show more