This commit is contained in:
Ishaan Jaff 2025-04-23 23:48:00 +00:00 committed by GitHub
commit 24aed29ad7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
414 changed files with 2224 additions and 2205 deletions

View file

@ -402,7 +402,7 @@ If you have suggestions on how to improve the code quality feel free to open an
1. (In root) create virtual environment `python -m venv .venv` 1. (In root) create virtual environment `python -m venv .venv`
2. Activate virtual environment `source .venv/bin/activate` 2. Activate virtual environment `source .venv/bin/activate`
3. Install dependencies `pip install -e ".[all]"` 3. Install dependencies `pip install -e ".[all]"`
4. Start proxy backend `uvicorn litellm.proxy.proxy_server:app --host localhost --port 4000 --reload` 4. Start proxy backend `uvicorn litellm_proxy.proxy_server:app --host localhost --port 4000 --reload`
### Frontend ### Frontend
1. Navigate to `ui/litellm-dashboard` 1. Navigate to `ui/litellm-dashboard`

View file

@ -17,7 +17,7 @@ This function is called just before a litellm completion call is made, and allow
```python ```python
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
import litellm import litellm
from litellm.proxy.proxy_server import UserAPIKeyAuth, DualCache from litellm_proxy.proxy_server import UserAPIKeyAuth, DualCache
from typing import Optional, Literal from typing import Optional, Literal
# This file includes the custom callbacks for LiteLLM Proxy # This file includes the custom callbacks for LiteLLM Proxy

View file

@ -9,7 +9,7 @@ Here's how:
Make sure the response type follows the `UserAPIKeyAuth` pydantic object. This is used by for logging usage specific to that user key. Make sure the response type follows the `UserAPIKeyAuth` pydantic object. This is used by for logging usage specific to that user key.
```python ```python
from litellm.proxy._types import UserAPIKeyAuth from litellm_proxy._types import UserAPIKeyAuth
async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth: async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
try: try:

View file

@ -20,12 +20,12 @@ Make sure the response type follows the `SSOUserDefinedValues` pydantic object.
from fastapi import Request from fastapi import Request
from fastapi_sso.sso.base import OpenID from fastapi_sso.sso.base import OpenID
from litellm.proxy._types import LitellmUserRoles, SSOUserDefinedValues from litellm_proxy._types import LitellmUserRoles, SSOUserDefinedValues
from litellm.proxy.management_endpoints.internal_user_endpoints import ( from litellm_proxy.management_endpoints.internal_user_endpoints import (
new_user, new_user,
user_info, user_info,
) )
from litellm.proxy.management_endpoints.team_endpoints import add_new_member from litellm_proxy.management_endpoints.team_endpoints import add_new_member
async def custom_sso_handler(userIDPInfo: OpenID) -> SSOUserDefinedValues: async def custom_sso_handler(userIDPInfo: OpenID) -> SSOUserDefinedValues:

View file

@ -29,8 +29,8 @@ import litellm
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.caching.caching import DualCache from litellm.caching.caching import DualCache
from litellm.integrations.custom_guardrail import CustomGuardrail from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.proxy._types import UserAPIKeyAuth from litellm_proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata from litellm_proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
from litellm.types.guardrails import GuardrailEventHooks from litellm.types.guardrails import GuardrailEventHooks
@ -449,7 +449,7 @@ import litellm
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.caching.caching import DualCache from litellm.caching.caching import DualCache
from litellm.integrations.custom_guardrail import CustomGuardrail from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.proxy._types import UserAPIKeyAuth from litellm_proxy._types import UserAPIKeyAuth
class myCustomGuardrail(CustomGuardrail): class myCustomGuardrail(CustomGuardrail):
def __init__(self, **kwargs): def __init__(self, **kwargs):

View file

@ -14,11 +14,11 @@ sys.path.insert(
from typing import Optional, Literal, Any from typing import Optional, Literal, Any
import litellm import litellm
import sys import sys
from litellm.proxy._types import UserAPIKeyAuth from litellm_proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_guardrail import CustomGuardrail from litellm.integrations.custom_guardrail import CustomGuardrail
from fastapi import HTTPException from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata from litellm_proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
from litellm.litellm_core_utils.logging_utils import ( from litellm.litellm_core_utils.logging_utils import (
convert_litellm_response_object_to_str, convert_litellm_response_object_to_str,
) )
@ -140,7 +140,7 @@ class AporiaGuardrail(CustomGuardrail):
user_api_key_dict: UserAPIKeyAuth, user_api_key_dict: UserAPIKeyAuth,
response, response,
): ):
from litellm.proxy.common_utils.callback_utils import ( from litellm_proxy.common_utils.callback_utils import (
add_guardrail_to_applied_guardrails_header, add_guardrail_to_applied_guardrails_header,
) )
@ -176,7 +176,7 @@ class AporiaGuardrail(CustomGuardrail):
"responses", "responses",
], ],
): ):
from litellm.proxy.common_utils.callback_utils import ( from litellm_proxy.common_utils.callback_utils import (
add_guardrail_to_applied_guardrails_header, add_guardrail_to_applied_guardrails_header,
) )

View file

@ -10,7 +10,7 @@
from typing import Literal from typing import Literal
import litellm import litellm
from litellm.caching.caching import DualCache from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth from litellm_proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from fastapi import HTTPException from fastapi import HTTPException

View file

@ -9,9 +9,9 @@
from typing import Optional, Literal from typing import Optional, Literal
import litellm import litellm
from litellm.proxy.utils import PrismaClient from litellm_proxy.utils import PrismaClient
from litellm.caching.caching import DualCache from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable from litellm_proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from fastapi import HTTPException from fastapi import HTTPException

View file

@ -9,7 +9,7 @@
from typing import Literal from typing import Literal
import litellm import litellm
from litellm.proxy._types import UserAPIKeyAuth from litellm_proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger

View file

@ -17,7 +17,7 @@ sys.path.insert(
from typing import Optional, Literal from typing import Optional, Literal
import litellm import litellm
import sys import sys
from litellm.proxy._types import UserAPIKeyAuth from litellm_proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger

View file

@ -9,7 +9,7 @@
from typing import Optional, Literal from typing import Optional, Literal
import litellm import litellm
from litellm.proxy._types import UserAPIKeyAuth from litellm_proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger

View file

@ -14,7 +14,7 @@ sys.path.insert(
from typing import Literal from typing import Literal
import litellm import litellm
import sys import sys
from litellm.proxy._types import UserAPIKeyAuth from litellm_proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
@ -50,7 +50,7 @@ class _ENTERPRISE_OpenAI_Moderation(CustomLogger):
if "content" in m and isinstance(m["content"], str): if "content" in m and isinstance(m["content"], str):
text += m["content"] text += m["content"]
from litellm.proxy.proxy_server import llm_router from litellm_proxy.proxy_server import llm_router
if llm_router is None: if llm_router is None:
return return

View file

@ -11,13 +11,18 @@ import os
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
from typing import Optional from typing import Any, Optional
from litellm.caching.caching import DualCache from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth from typing import TYPE_CHECKING
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
import tempfile import tempfile
from litellm.integrations.custom_guardrail import CustomGuardrail from litellm.integrations.custom_guardrail import CustomGuardrail
if TYPE_CHECKING:
from litellm_proxy._types import UserAPIKeyAuth
else:
UserAPIKeyAuth = Any
GUARDRAIL_NAME = "hide_secrets" GUARDRAIL_NAME = "hide_secrets"
_custom_plugins_path = "file://" + os.path.join( _custom_plugins_path = "file://" + os.path.join(

View file

@ -61,13 +61,14 @@ from litellm.constants import (
DEFAULT_ALLOWED_FAILS, DEFAULT_ALLOWED_FAILS,
) )
from litellm.types.guardrails import GuardrailItem from litellm.types.guardrails import GuardrailItem
from litellm.proxy._types import ( from litellm.types.proxy.management_endpoints.ui_sso import DefaultTeamSSOParams
from litellm.types.utils import (
StandardKeyGenerationConfig,
LlmProviders,
KeyManagementSystem, KeyManagementSystem,
KeyManagementSettings, KeyManagementSettings,
LiteLLM_UpperboundKeyGenerateParams, LiteLLM_UpperboundKeyGenerateParams,
) )
from litellm.types.proxy.management_endpoints.ui_sso import DefaultTeamSSOParams
from litellm.types.utils import StandardKeyGenerationConfig, LlmProviders
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.logging_callback_manager import LoggingCallbackManager from litellm.litellm_core_utils.logging_callback_manager import LoggingCallbackManager
import httpx import httpx
@ -1048,7 +1049,6 @@ from .exceptions import (
MockException, MockException,
) )
from .budget_manager import BudgetManager from .budget_manager import BudgetManager
from .proxy.proxy_cli import run_server
from .router import Router from .router import Router
from .assistants.main import * from .assistants.main import *
from .batches.main import * from .batches.main import *

View file

@ -4,7 +4,6 @@ from typing import TYPE_CHECKING, Any, Optional, Union
import litellm import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.proxy._types import UserAPIKeyAuth
from .integrations.custom_logger import CustomLogger from .integrations.custom_logger import CustomLogger
from .integrations.datadog.datadog import DataDogLogger from .integrations.datadog.datadog import DataDogLogger
@ -15,11 +14,14 @@ from .types.services import ServiceLoggerPayload, ServiceTypes
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
from litellm_proxy._types import UserAPIKeyAuth
Span = Union[_Span, Any] Span = Union[_Span, Any]
OTELClass = OpenTelemetry OTELClass = OpenTelemetry
else: else:
Span = Any Span = Any
OTELClass = Any OTELClass = Any
UserAPIKeyAuth = Any
class ServiceLogging(CustomLogger): class ServiceLogging(CustomLogger):
@ -143,7 +145,7 @@ class ServiceLogging(CustomLogger):
event_metadata=event_metadata, event_metadata=event_metadata,
) )
elif callback == "otel" or isinstance(callback, OpenTelemetry): elif callback == "otel" or isinstance(callback, OpenTelemetry):
from litellm.proxy.proxy_server import open_telemetry_logger from litellm_proxy.proxy_server import open_telemetry_logger
await self.init_otel_logger_if_none() await self.init_otel_logger_if_none()
@ -188,7 +190,7 @@ class ServiceLogging(CustomLogger):
initializes otel_logger if it is None or no attribute exists on ServiceLogging Object initializes otel_logger if it is None or no attribute exists on ServiceLogging Object
""" """
from litellm.proxy.proxy_server import open_telemetry_logger from litellm_proxy.proxy_server import open_telemetry_logger
if not hasattr(self, "otel_logger"): if not hasattr(self, "otel_logger"):
if open_telemetry_logger is not None and isinstance( if open_telemetry_logger is not None and isinstance(
@ -251,7 +253,7 @@ class ServiceLogging(CustomLogger):
event_metadata=event_metadata, event_metadata=event_metadata,
) )
elif callback == "otel" or isinstance(callback, OpenTelemetry): elif callback == "otel" or isinstance(callback, OpenTelemetry):
from litellm.proxy.proxy_server import open_telemetry_logger from litellm_proxy.proxy_server import open_telemetry_logger
await self.init_otel_logger_if_none() await self.init_otel_logger_if_none()

View file

@ -281,7 +281,7 @@ class QdrantSemanticCache(BaseCache):
async def async_set_cache(self, key, value, **kwargs): async def async_set_cache(self, key, value, **kwargs):
import uuid import uuid
from litellm.proxy.proxy_server import llm_model_list, llm_router from litellm_proxy.proxy_server import llm_model_list, llm_router
print_verbose(f"async qdrant semantic-cache set_cache, kwargs: {kwargs}") print_verbose(f"async qdrant semantic-cache set_cache, kwargs: {kwargs}")
@ -344,7 +344,7 @@ class QdrantSemanticCache(BaseCache):
async def async_get_cache(self, key, **kwargs): async def async_get_cache(self, key, **kwargs):
print_verbose(f"async qdrant semantic-cache get_cache, kwargs: {kwargs}") print_verbose(f"async qdrant semantic-cache get_cache, kwargs: {kwargs}")
from litellm.proxy.proxy_server import llm_model_list, llm_router from litellm_proxy.proxy_server import llm_model_list, llm_router
# get the messages # get the messages
messages = kwargs["messages"] messages = kwargs["messages"]

View file

@ -279,7 +279,7 @@ class RedisSemanticCache(BaseCache):
Returns: Returns:
List[float]: The embedding vector List[float]: The embedding vector
""" """
from litellm.proxy.proxy_server import llm_model_list, llm_router from litellm_proxy.proxy_server import llm_model_list, llm_router
# Route the embedding request through the proxy if appropriate # Route the embedding request through the proxy if appropriate
router_model_names = ( router_model_names = (

View file

@ -26,8 +26,8 @@ from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )
from litellm.proxy._types import AlertType, CallInfo, VirtualKeyEvent, WebhookEvent
from litellm.types.integrations.slack_alerting import * from litellm.types.integrations.slack_alerting import *
from litellm_proxy._types import AlertType, CallInfo, VirtualKeyEvent, WebhookEvent
from ..email_templates.templates import * from ..email_templates.templates import *
from .batching_handler import send_to_webhook, squash_payloads from .batching_handler import send_to_webhook, squash_payloads
@ -823,9 +823,9 @@ class SlackAlerting(CustomBatchLogger):
### UNIQUE CACHE KEY ### ### UNIQUE CACHE KEY ###
cache_key = provider + region_name cache_key = provider + region_name
outage_value: Optional[ outage_value: Optional[ProviderRegionOutageModel] = (
ProviderRegionOutageModel await self.internal_usage_cache.async_get_cache(key=cache_key)
] = await self.internal_usage_cache.async_get_cache(key=cache_key) )
if ( if (
getattr(exception, "status_code", None) is None getattr(exception, "status_code", None) is None
@ -1148,7 +1148,7 @@ Model Info:
email_logo_url: Optional[str] = None, email_logo_url: Optional[str] = None,
email_support_contact: Optional[str] = None, email_support_contact: Optional[str] = None,
): ):
from litellm.proxy.proxy_server import CommonProxyErrors, premium_user from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
if premium_user is not True: if premium_user is not True:
if email_logo_url is not None or email_support_contact is not None: if email_logo_url is not None or email_support_contact is not None:
@ -1161,7 +1161,7 @@ Model Info:
self, webhook_event: WebhookEvent self, webhook_event: WebhookEvent
) -> bool: ) -> bool:
try: try:
from litellm.proxy.utils import send_email from litellm_proxy.utils import send_email
if self.alerting is None or "email" not in self.alerting: if self.alerting is None or "email" not in self.alerting:
# do nothing if user does not want email alerts # do nothing if user does not want email alerts
@ -1170,7 +1170,7 @@ Model Info:
self.alerting, self.alerting,
) )
return False return False
from litellm.proxy.proxy_server import premium_user, prisma_client from litellm_proxy.proxy_server import premium_user, prisma_client
email_logo_url = os.getenv( email_logo_url = os.getenv(
"SMTP_SENDER_LOGO", os.getenv("EMAIL_LOGO_URL", None) "SMTP_SENDER_LOGO", os.getenv("EMAIL_LOGO_URL", None)
@ -1271,8 +1271,8 @@ Model Info:
Returns -> True if sent, False if not. Returns -> True if sent, False if not.
""" """
from litellm.proxy.proxy_server import premium_user from litellm_proxy.proxy_server import premium_user
from litellm.proxy.utils import send_email from litellm_proxy.utils import send_email
email_logo_url = os.getenv( email_logo_url = os.getenv(
"SMTP_SENDER_LOGO", os.getenv("EMAIL_LOGO_URL", None) "SMTP_SENDER_LOGO", os.getenv("EMAIL_LOGO_URL", None)
@ -1406,9 +1406,9 @@ Model Info:
self.alert_to_webhook_url is not None self.alert_to_webhook_url is not None
and alert_type in self.alert_to_webhook_url and alert_type in self.alert_to_webhook_url
): ):
slack_webhook_url: Optional[ slack_webhook_url: Optional[Union[str, List[str]]] = (
Union[str, List[str]] self.alert_to_webhook_url[alert_type]
] = self.alert_to_webhook_url[alert_type] )
elif self.default_webhook_url is not None: elif self.default_webhook_url is not None:
slack_webhook_url = self.default_webhook_url slack_webhook_url = self.default_webhook_url
else: else:
@ -1598,7 +1598,7 @@ Model Info:
return return
try: try:
from litellm.proxy.spend_tracking.spend_management_endpoints import ( from litellm_proxy.spend_tracking.spend_management_endpoints import (
_get_spend_report_for_time_range, _get_spend_report_for_time_range,
) )
@ -1662,7 +1662,7 @@ Model Info:
try: try:
from calendar import monthrange from calendar import monthrange
from litellm.proxy.spend_tracking.spend_management_endpoints import ( from litellm_proxy.spend_tracking.spend_management_endpoints import (
_get_spend_report_for_time_range, _get_spend_report_for_time_range,
) )

View file

@ -5,8 +5,8 @@ Utils used for slack alerting
import asyncio import asyncio
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from litellm.proxy._types import AlertType
from litellm.secret_managers.main import get_secret from litellm.secret_managers.main import get_secret
from litellm_proxy._types import AlertType
if TYPE_CHECKING: if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as _Logging from litellm.litellm_core_utils.litellm_logging import Logging as _Logging
@ -17,7 +17,7 @@ else:
def process_slack_alerting_variables( def process_slack_alerting_variables(
alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]] alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]],
) -> Optional[Dict[AlertType, Union[List[str], str]]]: ) -> Optional[Dict[AlertType, Union[List[str], str]]]:
""" """
process alert_to_webhook_url process alert_to_webhook_url

View file

@ -321,7 +321,7 @@ class AzureBlobStorageLogger(CustomBatchLogger):
""" """
Checks if the user is a premium user, raises an error if not Checks if the user is a premium user, raises an error if not
""" """
from litellm.proxy.proxy_server import CommonProxyErrors, premium_user from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
if premium_user is not True: if premium_user is not True:
raise ValueError( raise ValueError(

View file

@ -165,7 +165,7 @@ class CustomGuardrail(CustomLogger):
""" """
Returns True if the user is a premium user Returns True if the user is a premium user
""" """
from litellm.proxy.proxy_server import CommonProxyErrors, premium_user from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
if premium_user is not True: if premium_user is not True:
verbose_logger.warning( verbose_logger.warning(
@ -183,7 +183,7 @@ class CustomGuardrail(CustomLogger):
""" """
Builds `StandardLoggingGuardrailInformation` and adds it to the request metadata so it can be used for logging to DataDog, Langfuse, etc. Builds `StandardLoggingGuardrailInformation` and adds it to the request metadata so it can be used for logging to DataDog, Langfuse, etc.
""" """
from litellm.proxy.proxy_server import premium_user from litellm_proxy.proxy_server import premium_user
if premium_user is not True: if premium_user is not True:
verbose_logger.warning( verbose_logger.warning(

View file

@ -15,7 +15,6 @@ from typing import (
from pydantic import BaseModel from pydantic import BaseModel
from litellm.caching.caching import DualCache from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.integrations.argilla import ArgillaItem from litellm.types.integrations.argilla import ArgillaItem
from litellm.types.llms.openai import AllMessageValues, ChatCompletionRequest from litellm.types.llms.openai import AllMessageValues, ChatCompletionRequest
from litellm.types.utils import ( from litellm.types.utils import (
@ -30,9 +29,12 @@ from litellm.types.utils import (
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
from litellm_proxy._types import UserAPIKeyAuth
Span = Union[_Span, Any] Span = Union[_Span, Any]
else: else:
Span = Any Span = Any
UserAPIKeyAuth = Any
class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callback#callback-class class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callback#callback-class

View file

@ -3,10 +3,14 @@ Functions for sending Email Alerts
""" """
import os import os
from typing import List, Optional from typing import TYPE_CHECKING, Any, List, Optional
from litellm._logging import verbose_logger, verbose_proxy_logger from litellm._logging import verbose_logger, verbose_proxy_logger
from litellm.proxy._types import WebhookEvent
if TYPE_CHECKING:
from litellm_proxy._types import WebhookEvent
else:
WebhookEvent = Any
# we use this for the email header, please send a test email if you change this. verify it looks good on email # we use this for the email header, please send a test email if you change this. verify it looks good on email
LITELLM_LOGO_URL = "https://litellm-listing.s3.amazonaws.com/litellm_logo.png" LITELLM_LOGO_URL = "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
@ -19,7 +23,7 @@ async def get_all_team_member_emails(team_id: Optional[str] = None) -> list:
) )
if team_id is None: if team_id is None:
return [] return []
from litellm.proxy.proxy_server import prisma_client from litellm_proxy.proxy_server import prisma_client
if prisma_client is None: if prisma_client is None:
raise Exception("Not connected to DB!") raise Exception("Not connected to DB!")
@ -71,7 +75,7 @@ async def send_team_budget_alert(webhook_event: WebhookEvent) -> bool:
Send an Email Alert to All Team Members when the Team Budget is crossed Send an Email Alert to All Team Members when the Team Budget is crossed
Returns -> True if sent, False if not. Returns -> True if sent, False if not.
""" """
from litellm.proxy.utils import send_email from litellm_proxy.utils import send_email
_team_id = webhook_event.team_id _team_id = webhook_event.team_id
team_alias = webhook_event.team_alias team_alias = webhook_event.team_alias

View file

@ -9,10 +9,10 @@ from urllib.parse import quote
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.integrations.additional_logging_utils import AdditionalLoggingUtils from litellm.integrations.additional_logging_utils import AdditionalLoggingUtils
from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
from litellm.proxy._types import CommonProxyErrors
from litellm.types.integrations.base_health_check import IntegrationHealthCheckStatus from litellm.types.integrations.base_health_check import IntegrationHealthCheckStatus
from litellm.types.integrations.gcs_bucket import * from litellm.types.integrations.gcs_bucket import *
from litellm.types.utils import StandardLoggingPayload from litellm.types.utils import StandardLoggingPayload
from litellm_proxy._types import CommonProxyErrors
if TYPE_CHECKING: if TYPE_CHECKING:
from litellm.llms.vertex_ai.vertex_llm_base import VertexBase from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
@ -22,7 +22,7 @@ else:
class GCSBucketLogger(GCSBucketBase, AdditionalLoggingUtils): class GCSBucketLogger(GCSBucketBase, AdditionalLoggingUtils):
def __init__(self, bucket_name: Optional[str] = None) -> None: def __init__(self, bucket_name: Optional[str] = None) -> None:
from litellm.proxy.proxy_server import premium_user from litellm_proxy.proxy_server import premium_user
super().__init__(bucket_name=bucket_name) super().__init__(bucket_name=bucket_name)
@ -48,7 +48,7 @@ class GCSBucketLogger(GCSBucketBase, AdditionalLoggingUtils):
#### ASYNC #### #### ASYNC ####
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
from litellm.proxy.proxy_server import premium_user from litellm_proxy.proxy_server import premium_user
if premium_user is not True: if premium_user is not True:
raise ValueError( raise ValueError(

View file

@ -15,7 +15,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from litellm.types.utils import StandardLoggingPayload from litellm.types.utils import StandardLoggingPayload
if TYPE_CHECKING: if TYPE_CHECKING:
from litellm.proxy._types import SpendLogsPayload from litellm_proxy._types import SpendLogsPayload
else: else:
SpendLogsPayload = Any SpendLogsPayload = Any
@ -44,7 +44,7 @@ class GcsPubSubLogger(CustomBatchLogger):
topic_id (str): Pub/Sub topic ID topic_id (str): Pub/Sub topic ID
credentials_path (str, optional): Path to Google Cloud credentials JSON file credentials_path (str, optional): Path to Google Cloud credentials JSON file
""" """
from litellm.proxy.utils import _premium_user_check from litellm_proxy.utils import _premium_user_check
_premium_user_check() _premium_user_check()
@ -108,10 +108,10 @@ class GcsPubSubLogger(CustomBatchLogger):
Raises: Raises:
Raises a NON Blocking verbose_logger.exception if an error occurs Raises a NON Blocking verbose_logger.exception if an error occurs
""" """
from litellm.proxy.spend_tracking.spend_tracking_utils import ( from litellm_proxy.spend_tracking.spend_tracking_utils import (
get_logging_payload, get_logging_payload,
) )
from litellm.proxy.utils import _premium_user_check from litellm_proxy.utils import _premium_user_check
_premium_user_check() _premium_user_check()

View file

@ -1,7 +1,7 @@
import json import json
from typing import TYPE_CHECKING, Any, Union from typing import TYPE_CHECKING, Any, Union
from litellm.proxy._types import SpanAttributes from litellm.types.integrations.opentelemetry import SpanAttributes
if TYPE_CHECKING: if TYPE_CHECKING:
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span

View file

@ -18,10 +18,10 @@ if TYPE_CHECKING:
from opentelemetry.sdk.trace.export import SpanExporter as _SpanExporter from opentelemetry.sdk.trace.export import SpanExporter as _SpanExporter
from opentelemetry.trace import Span as _Span from opentelemetry.trace import Span as _Span
from litellm.proxy._types import ( from litellm_proxy._types import (
ManagementEndpointLoggingPayload as _ManagementEndpointLoggingPayload, ManagementEndpointLoggingPayload as _ManagementEndpointLoggingPayload,
) )
from litellm.proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth from litellm_proxy.proxy_server import UserAPIKeyAuth as _UserAPIKeyAuth
Span = Union[_Span, Any] Span = Union[_Span, Any]
SpanExporter = Union[_SpanExporter, Any] SpanExporter = Union[_SpanExporter, Any]
@ -126,7 +126,7 @@ class OpenTelemetry(CustomLogger):
- Adds Otel as a service callback - Adds Otel as a service callback
- Sets `proxy_server.open_telemetry_logger` to self - Sets `proxy_server.open_telemetry_logger` to self
""" """
from litellm.proxy import proxy_server from litellm_proxy import proxy_server
# Add Otel as a service callback # Add Otel as a service callback
if "otel" not in litellm.service_callback: if "otel" not in litellm.service_callback:
@ -350,9 +350,9 @@ class OpenTelemetry(CustomLogger):
""" """
from opentelemetry import trace from opentelemetry import trace
standard_callback_dynamic_params: Optional[ standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
StandardCallbackDynamicParams kwargs.get("standard_callback_dynamic_params")
] = kwargs.get("standard_callback_dynamic_params") )
if not standard_callback_dynamic_params: if not standard_callback_dynamic_params:
return return
@ -406,7 +406,7 @@ class OpenTelemetry(CustomLogger):
def set_tools_attributes(self, span: Span, tools): def set_tools_attributes(self, span: Span, tools):
import json import json
from litellm.proxy._types import SpanAttributes from litellm.types.integrations.opentelemetry import SpanAttributes
if not tools: if not tools:
return return
@ -460,7 +460,7 @@ class OpenTelemetry(CustomLogger):
def _tool_calls_kv_pair( def _tool_calls_kv_pair(
tool_calls: List[ChatCompletionMessageToolCall], tool_calls: List[ChatCompletionMessageToolCall],
) -> Dict[str, Any]: ) -> Dict[str, Any]:
from litellm.proxy._types import SpanAttributes from litellm.types.integrations.opentelemetry import SpanAttributes
kv_pairs: Dict[str, Any] = {} kv_pairs: Dict[str, Any] = {}
for idx, tool_call in enumerate(tool_calls): for idx, tool_call in enumerate(tool_calls):
@ -496,7 +496,7 @@ class OpenTelemetry(CustomLogger):
span, kwargs, response_obj span, kwargs, response_obj
) )
return return
from litellm.proxy._types import SpanAttributes from litellm.types.integrations.opentelemetry import SpanAttributes
optional_params = kwargs.get("optional_params", {}) optional_params = kwargs.get("optional_params", {})
litellm_params = kwargs.get("litellm_params", {}) or {} litellm_params = kwargs.get("litellm_params", {}) or {}

View file

@ -9,7 +9,7 @@ Handles two types of alerts:
import asyncio import asyncio
import os import os
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from typing import List, Literal, Optional, Union from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.caching import DualCache from litellm.caching import DualCache
@ -19,7 +19,6 @@ from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.integrations.pagerduty import ( from litellm.types.integrations.pagerduty import (
AlertingConfig, AlertingConfig,
PagerDutyInternalEvent, PagerDutyInternalEvent,
@ -31,6 +30,12 @@ from litellm.types.utils import (
StandardLoggingPayloadErrorInformation, StandardLoggingPayloadErrorInformation,
) )
if TYPE_CHECKING:
from litellm_proxy._types import UserAPIKeyAuth
else:
UserAPIKeyAuth = Any
PAGERDUTY_DEFAULT_FAILURE_THRESHOLD = 60 PAGERDUTY_DEFAULT_FAILURE_THRESHOLD = 60
PAGERDUTY_DEFAULT_FAILURE_THRESHOLD_WINDOW_SECONDS = 60 PAGERDUTY_DEFAULT_FAILURE_THRESHOLD_WINDOW_SECONDS = 60
PAGERDUTY_DEFAULT_HANGING_THRESHOLD_SECONDS = 60 PAGERDUTY_DEFAULT_HANGING_THRESHOLD_SECONDS = 60
@ -46,7 +51,7 @@ class PagerDutyAlerting(SlackAlerting):
def __init__( def __init__(
self, alerting_args: Optional[Union[AlertingConfig, dict]] = None, **kwargs self, alerting_args: Optional[Union[AlertingConfig, dict]] = None, **kwargs
): ):
from litellm.proxy.proxy_server import CommonProxyErrors, premium_user from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
super().__init__() super().__init__()
_api_key = os.getenv("PAGERDUTY_API_KEY") _api_key = os.getenv("PAGERDUTY_API_KEY")

View file

@ -18,10 +18,10 @@ from typing import (
import litellm import litellm
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import LiteLLM_TeamTable, UserAPIKeyAuth
from litellm.types.integrations.prometheus import * from litellm.types.integrations.prometheus import *
from litellm.types.utils import StandardLoggingPayload from litellm.types.utils import StandardLoggingPayload
from litellm.utils import get_end_user_id_for_cost_tracking from litellm.utils import get_end_user_id_for_cost_tracking
from litellm_proxy._types import LiteLLM_TeamTable, UserAPIKeyAuth
if TYPE_CHECKING: if TYPE_CHECKING:
from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.schedulers.asyncio import AsyncIOScheduler
@ -38,7 +38,7 @@ class PrometheusLogger(CustomLogger):
try: try:
from prometheus_client import Counter, Gauge, Histogram from prometheus_client import Counter, Gauge, Histogram
from litellm.proxy.proxy_server import CommonProxyErrors, premium_user from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
if premium_user is not True: if premium_user is not True:
verbose_logger.warning( verbose_logger.warning(
@ -456,7 +456,7 @@ class PrometheusLogger(CustomLogger):
and isinstance(user_api_key, str) and isinstance(user_api_key, str)
and user_api_key.startswith("sk-") and user_api_key.startswith("sk-")
): ):
from litellm.proxy.utils import hash_token from litellm_proxy.utils import hash_token
user_api_key = hash_token(user_api_key) user_api_key = hash_token(user_api_key)
@ -661,7 +661,7 @@ class PrometheusLogger(CustomLogger):
kwargs: dict, kwargs: dict,
metadata: dict, metadata: dict,
): ):
from litellm.proxy.common_utils.callback_utils import ( from litellm_proxy.common_utils.callback_utils import (
get_model_group_from_litellm_kwargs, get_model_group_from_litellm_kwargs,
) )
@ -1363,7 +1363,7 @@ class PrometheusLogger(CustomLogger):
set_metrics_function: Function to set metrics for the fetched data. set_metrics_function: Function to set metrics for the fetched data.
data_type: String representing the type of data ("teams" or "keys") for logging purposes. data_type: String representing the type of data ("teams" or "keys") for logging purposes.
""" """
from litellm.proxy.proxy_server import prisma_client from litellm_proxy.proxy_server import prisma_client
if prisma_client is None: if prisma_client is None:
return return
@ -1398,10 +1398,10 @@ class PrometheusLogger(CustomLogger):
""" """
Initialize team budget metrics by reusing the generic pagination logic. Initialize team budget metrics by reusing the generic pagination logic.
""" """
from litellm.proxy.management_endpoints.team_endpoints import ( from litellm_proxy.management_endpoints.team_endpoints import (
get_paginated_teams, get_paginated_teams,
) )
from litellm.proxy.proxy_server import prisma_client from litellm_proxy.proxy_server import prisma_client
if prisma_client is None: if prisma_client is None:
verbose_logger.debug( verbose_logger.debug(
@ -1432,10 +1432,10 @@ class PrometheusLogger(CustomLogger):
from typing import Union from typing import Union
from litellm.constants import UI_SESSION_TOKEN_TEAM_ID from litellm.constants import UI_SESSION_TOKEN_TEAM_ID
from litellm.proxy.management_endpoints.key_management_endpoints import ( from litellm_proxy.management_endpoints.key_management_endpoints import (
_list_key_helper, _list_key_helper,
) )
from litellm.proxy.proxy_server import prisma_client from litellm_proxy.proxy_server import prisma_client
if prisma_client is None: if prisma_client is None:
verbose_logger.debug( verbose_logger.debug(
@ -1480,7 +1480,7 @@ class PrometheusLogger(CustomLogger):
- If redis cache is not available, we initialize the metrics directly. - If redis cache is not available, we initialize the metrics directly.
""" """
from litellm.constants import PROMETHEUS_EMIT_BUDGET_METRICS_JOB_NAME from litellm.constants import PROMETHEUS_EMIT_BUDGET_METRICS_JOB_NAME
from litellm.proxy.proxy_server import proxy_logging_obj from litellm_proxy.proxy_server import proxy_logging_obj
pod_lock_manager = proxy_logging_obj.db_spend_update_writer.pod_lock_manager pod_lock_manager = proxy_logging_obj.db_spend_update_writer.pod_lock_manager
@ -1561,8 +1561,8 @@ class PrometheusLogger(CustomLogger):
Fields not available in metadata: Fields not available in metadata:
- `budget_reset_at` - `budget_reset_at`
""" """
from litellm.proxy.auth.auth_checks import get_team_object from litellm_proxy.auth.auth_checks import get_team_object
from litellm.proxy.proxy_server import prisma_client, user_api_key_cache from litellm_proxy.proxy_server import prisma_client, user_api_key_cache
_total_team_spend = (spend or 0) + response_cost _total_team_spend = (spend or 0) + response_cost
team_object = LiteLLM_TeamTable( team_object = LiteLLM_TeamTable(
@ -1711,8 +1711,8 @@ class PrometheusLogger(CustomLogger):
""" """
Assemble a UserAPIKeyAuth object Assemble a UserAPIKeyAuth object
""" """
from litellm.proxy.auth.auth_checks import get_key_object from litellm_proxy.auth.auth_checks import get_key_object
from litellm.proxy.proxy_server import prisma_client, user_api_key_cache from litellm_proxy.proxy_server import prisma_client, user_api_key_cache
_total_key_spend = (key_spend or 0) + response_cost _total_key_spend = (key_spend or 0) + response_cost
user_api_key_dict = UserAPIKeyAuth( user_api_key_dict = UserAPIKeyAuth(
@ -1803,8 +1803,8 @@ class PrometheusLogger(CustomLogger):
from prometheus_client import make_asgi_app from prometheus_client import make_asgi_app
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.proxy._types import CommonProxyErrors from litellm_proxy._types import CommonProxyErrors
from litellm.proxy.proxy_server import app from litellm_proxy.proxy_server import app
if premium_user is not True: if premium_user is not True:
verbose_proxy_logger.warning( verbose_proxy_logger.warning(

View file

@ -28,7 +28,6 @@ from litellm._logging import _is_debugging_on, verbose_logger
from litellm.batches.batch_utils import _handle_completed_batch from litellm.batches.batch_utils import _handle_completed_batch
from litellm.caching.caching import DualCache, InMemoryCache from litellm.caching.caching import DualCache, InMemoryCache
from litellm.caching.caching_handler import LLMCachingHandler from litellm.caching.caching_handler import LLMCachingHandler
from litellm.constants import ( from litellm.constants import (
DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT, DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT,
DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT, DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT,
@ -2886,7 +2885,7 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
_in_memory_loggers.append(_otel_logger) _in_memory_loggers.append(_otel_logger)
return _otel_logger # type: ignore return _otel_logger # type: ignore
elif logging_integration == "dynamic_rate_limiter": elif logging_integration == "dynamic_rate_limiter":
from litellm.proxy.hooks.dynamic_rate_limiter import ( from litellm_proxy.hooks.dynamic_rate_limiter import (
_PROXY_DynamicRateLimitHandler, _PROXY_DynamicRateLimitHandler,
) )
@ -3074,7 +3073,7 @@ def get_custom_logger_compatible_class( # noqa: PLR0915
return callback # type: ignore return callback # type: ignore
elif logging_integration == "dynamic_rate_limiter": elif logging_integration == "dynamic_rate_limiter":
from litellm.proxy.hooks.dynamic_rate_limiter import ( from litellm_proxy.hooks.dynamic_rate_limiter import (
_PROXY_DynamicRateLimitHandler, _PROXY_DynamicRateLimitHandler,
) )
@ -3130,7 +3129,7 @@ def _get_custom_logger_settings_from_proxy_server(callback_name: str) -> Dict:
otel: otel:
message_logging: False message_logging: False
""" """
from litellm.proxy.proxy_server import callback_settings from litellm_proxy.proxy_server import callback_settings
if callback_settings: if callback_settings:
return dict(callback_settings.get(callback_name, {})) return dict(callback_settings.get(callback_name, {}))

View file

@ -342,7 +342,7 @@ def get_format_from_file_id(file_id: Optional[str]) -> Optional[str]:
unified_file_id = litellm_proxy:{};unified_id,{} unified_file_id = litellm_proxy:{};unified_id,{}
If not a unified file id, returns 'file' as default format If not a unified file id, returns 'file' as default format
""" """
from litellm.proxy.hooks.managed_files import _PROXY_LiteLLMManagedFiles from litellm_proxy.hooks.managed_files import _PROXY_LiteLLMManagedFiles
if not file_id: if not file_id:
return None return None

View file

@ -37,15 +37,15 @@ class AnthropicMessagesHandler:
"""Helper function to handle Anthropic streaming responses using the existing logging handlers""" """Helper function to handle Anthropic streaming responses using the existing logging handlers"""
from datetime import datetime from datetime import datetime
from litellm.proxy.pass_through_endpoints.streaming_handler import (
PassThroughStreamingHandler,
)
from litellm.proxy.pass_through_endpoints.success_handler import (
PassThroughEndpointLogging,
)
from litellm.types.passthrough_endpoints.pass_through_endpoints import ( from litellm.types.passthrough_endpoints.pass_through_endpoints import (
EndpointType, EndpointType,
) )
from litellm_proxy.pass_through_endpoints.streaming_handler import (
PassThroughStreamingHandler,
)
from litellm_proxy.pass_through_endpoints.success_handler import (
PassThroughEndpointLogging,
)
# Create success handler object # Create success handler object
passthrough_success_handler_obj = PassThroughEndpointLogging() passthrough_success_handler_obj = PassThroughEndpointLogging()

View file

@ -1,2 +0,0 @@
.env
secrets.toml

View file

@ -4550,7 +4550,7 @@ class Router:
Each provider uses diff .env vars for pass-through endpoints, this helper uses the deployment credentials to set the .env vars for pass-through endpoints Each provider uses diff .env vars for pass-through endpoints, this helper uses the deployment credentials to set the .env vars for pass-through endpoints
""" """
if deployment.litellm_params.use_in_pass_through is True: if deployment.litellm_params.use_in_pass_through is True:
from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import ( from litellm_proxy.pass_through_endpoints.llm_passthrough_endpoints import (
passthrough_endpoint_router, passthrough_endpoint_router,
) )

View file

@ -10,11 +10,11 @@ This means you can use this with weighted-pick, lowest-latency, simple-shuffle,
Example: Example:
``` ```
openai: openai:
budget_limit: 0.000000000001 budget_limit: 0.000000000001
time_period: 1d time_period: 1d
anthropic: anthropic:
budget_limit: 100 budget_limit: 100
time_period: 7d time_period: 7d
``` ```
""" """
@ -53,9 +53,9 @@ class RouterBudgetLimiting(CustomLogger):
self.dual_cache = dual_cache self.dual_cache = dual_cache
self.redis_increment_operation_queue: List[RedisPipelineIncrementOperation] = [] self.redis_increment_operation_queue: List[RedisPipelineIncrementOperation] = []
asyncio.create_task(self.periodic_sync_in_memory_spend_with_redis()) asyncio.create_task(self.periodic_sync_in_memory_spend_with_redis())
self.provider_budget_config: Optional[ self.provider_budget_config: Optional[GenericBudgetConfigType] = (
GenericBudgetConfigType provider_budget_config
] = provider_budget_config )
self.deployment_budget_config: Optional[GenericBudgetConfigType] = None self.deployment_budget_config: Optional[GenericBudgetConfigType] = None
self.tag_budget_config: Optional[GenericBudgetConfigType] = None self.tag_budget_config: Optional[GenericBudgetConfigType] = None
self._init_provider_budgets() self._init_provider_budgets()
@ -797,7 +797,7 @@ class RouterBudgetLimiting(CustomLogger):
def _init_tag_budgets(self): def _init_tag_budgets(self):
if litellm.tag_budget_config is None: if litellm.tag_budget_config is None:
return return
from litellm.proxy.proxy_server import CommonProxyErrors, premium_user from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
if premium_user is not True: if premium_user is not True:
raise ValueError( raise ValueError(

View file

@ -187,7 +187,7 @@ class LowestCostLoggingHandler(CustomLogger):
self.logged_success += 1 self.logged_success += 1
except Exception as e: except Exception as e:
verbose_logger.exception( verbose_logger.exception(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format( "litellm_proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e) str(e)
) )
) )

View file

@ -170,7 +170,7 @@ class LowestLatencyLoggingHandler(CustomLogger):
self.logged_success += 1 self.logged_success += 1
except Exception as e: except Exception as e:
verbose_logger.exception( verbose_logger.exception(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format( "litellm_proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e) str(e)
) )
) )
@ -238,7 +238,7 @@ class LowestLatencyLoggingHandler(CustomLogger):
return return
except Exception as e: except Exception as e:
verbose_logger.exception( verbose_logger.exception(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format( "litellm_proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e) str(e)
) )
) )

View file

@ -270,7 +270,7 @@ class LowestTPMLoggingHandler_v2(BaseRoutingStrategy, CustomLogger):
self.logged_success += 1 self.logged_success += 1
except Exception as e: except Exception as e:
verbose_logger.exception( verbose_logger.exception(
"litellm.proxy.hooks.lowest_tpm_rpm_v2.py::log_success_event(): Exception occured - {}".format( "litellm_proxy.hooks.lowest_tpm_rpm_v2.py::log_success_event(): Exception occured - {}".format(
str(e) str(e)
) )
) )
@ -321,7 +321,7 @@ class LowestTPMLoggingHandler_v2(BaseRoutingStrategy, CustomLogger):
self.logged_success += 1 self.logged_success += 1
except Exception as e: except Exception as e:
verbose_logger.exception( verbose_logger.exception(
"litellm.proxy.hooks.lowest_tpm_rpm_v2.py::async_log_success_event(): Exception occured - {}".format( "litellm_proxy.hooks.lowest_tpm_rpm_v2.py::async_log_success_event(): Exception occured - {}".format(
str(e) str(e)
) )
) )

View file

@ -4,7 +4,7 @@ This is a file for the AWS Secret Manager Integration
Relevant issue: https://github.com/BerriAI/litellm/issues/1883 Relevant issue: https://github.com/BerriAI/litellm/issues/1883
Requires: Requires:
* `os.environ["AWS_REGION_NAME"], * `os.environ["AWS_REGION_NAME"],
* `pip install boto3>=1.28.57` * `pip install boto3>=1.28.57`
""" """
@ -15,7 +15,7 @@ import re
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
import litellm import litellm
from litellm.proxy._types import KeyManagementSystem from litellm_proxy._types import KeyManagementSystem
def validate_environment(): def validate_environment():

View file

@ -9,7 +9,7 @@ Handles Async Operations for:
Relevant issue: https://github.com/BerriAI/litellm/issues/1883 Relevant issue: https://github.com/BerriAI/litellm/issues/1883
Requires: Requires:
* `os.environ["AWS_REGION_NAME"], * `os.environ["AWS_REGION_NAME"],
* `pip install boto3>=1.28.57` * `pip install boto3>=1.28.57`
""" """
@ -26,8 +26,8 @@ from litellm.llms.custom_httpx.http_handler import (
_get_httpx_client, _get_httpx_client,
get_async_httpx_client, get_async_httpx_client,
) )
from litellm.proxy._types import KeyManagementSystem
from litellm.types.llms.custom_http import httpxSpecialProvider from litellm.types.llms.custom_http import httpxSpecialProvider
from litellm_proxy._types import KeyManagementSystem
from .base_secret_manager import BaseSecretManager from .base_secret_manager import BaseSecretManager

View file

@ -12,7 +12,7 @@ import os
from typing import Optional from typing import Optional
import litellm import litellm
from litellm.proxy._types import KeyManagementSystem from litellm_proxy._types import KeyManagementSystem
def validate_environment(): def validate_environment():

View file

@ -8,7 +8,7 @@ from litellm.caching.caching import InMemoryCache
from litellm.constants import SECRET_MANAGER_REFRESH_INTERVAL from litellm.constants import SECRET_MANAGER_REFRESH_INTERVAL
from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
from litellm.llms.custom_httpx.http_handler import _get_httpx_client from litellm.llms.custom_httpx.http_handler import _get_httpx_client
from litellm.proxy._types import CommonProxyErrors, KeyManagementSystem from litellm_proxy._types import CommonProxyErrors, KeyManagementSystem
class GoogleSecretManager(GCSBucketBase): class GoogleSecretManager(GCSBucketBase):
@ -22,7 +22,7 @@ class GoogleSecretManager(GCSBucketBase):
refresh_interval (int, optional): The refresh interval in seconds. Defaults to 86400. (24 hours) refresh_interval (int, optional): The refresh interval in seconds. Defaults to 86400. (24 hours)
always_read_secret_manager (bool, optional): Whether to always read from the secret manager. Defaults to False. Since we do want to cache values always_read_secret_manager (bool, optional): Whether to always read from the secret manager. Defaults to False. Since we do want to cache values
""" """
from litellm.proxy.proxy_server import premium_user from litellm_proxy.proxy_server import premium_user
if premium_user is not True: if premium_user is not True:
raise ValueError( raise ValueError(

View file

@ -12,14 +12,14 @@ from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client, get_async_httpx_client,
httpxSpecialProvider, httpxSpecialProvider,
) )
from litellm.proxy._types import KeyManagementSystem from litellm_proxy._types import KeyManagementSystem
from .base_secret_manager import BaseSecretManager from .base_secret_manager import BaseSecretManager
class HashicorpSecretManager(BaseSecretManager): class HashicorpSecretManager(BaseSecretManager):
def __init__(self): def __init__(self):
from litellm.proxy.proxy_server import CommonProxyErrors, premium_user from litellm_proxy.proxy_server import CommonProxyErrors, premium_user
# Vault-specific config # Vault-specific config
self.vault_addr = os.getenv("HCP_VAULT_ADDR", "http://127.0.0.1:8200") self.vault_addr = os.getenv("HCP_VAULT_ADDR", "http://127.0.0.1:8200")

View file

@ -11,7 +11,7 @@ import litellm
from litellm._logging import print_verbose, verbose_logger from litellm._logging import print_verbose, verbose_logger
from litellm.caching.caching import DualCache from litellm.caching.caching import DualCache
from litellm.llms.custom_httpx.http_handler import HTTPHandler from litellm.llms.custom_httpx.http_handler import HTTPHandler
from litellm.proxy._types import KeyManagementSystem from litellm.types.utils import KeyManagementSystem
oidc_cache = DualCache() oidc_cache = DualCache()

View file

@ -0,0 +1,49 @@
import enum
class SpanAttributes(str, enum.Enum):
# Note: We've taken this from opentelemetry-semantic-conventions-ai
# I chose to not add a new dependency to litellm for this
# Semantic Conventions for LLM requests, this needs to be removed after
# OpenTelemetry Semantic Conventions support Gen AI.
# Issue at https://github.com/open-telemetry/opentelemetry-python/issues/3868
# Refer to https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/llm-spans.md
LLM_SYSTEM = "gen_ai.system"
LLM_REQUEST_MODEL = "gen_ai.request.model"
LLM_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
LLM_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
LLM_REQUEST_TOP_P = "gen_ai.request.top_p"
LLM_PROMPTS = "gen_ai.prompt"
LLM_COMPLETIONS = "gen_ai.completion"
LLM_RESPONSE_MODEL = "gen_ai.response.model"
LLM_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens"
LLM_USAGE_PROMPT_TOKENS = "gen_ai.usage.prompt_tokens"
LLM_TOKEN_TYPE = "gen_ai.token.type"
# To be added
# LLM_RESPONSE_FINISH_REASON = "gen_ai.response.finish_reasons"
# LLM_RESPONSE_ID = "gen_ai.response.id"
# LLM
LLM_REQUEST_TYPE = "llm.request.type"
LLM_USAGE_TOTAL_TOKENS = "llm.usage.total_tokens"
LLM_USAGE_TOKEN_TYPE = "llm.usage.token_type"
LLM_USER = "llm.user"
LLM_HEADERS = "llm.headers"
LLM_TOP_K = "llm.top_k"
LLM_IS_STREAMING = "llm.is_streaming"
LLM_FREQUENCY_PENALTY = "llm.frequency_penalty"
LLM_PRESENCE_PENALTY = "llm.presence_penalty"
LLM_CHAT_STOP_SEQUENCES = "llm.chat.stop_sequences"
LLM_REQUEST_FUNCTIONS = "llm.request.functions"
LLM_REQUEST_REPETITION_PENALTY = "llm.request.repetition_penalty"
LLM_RESPONSE_FINISH_REASON = "llm.response.finish_reason"
LLM_RESPONSE_STOP_REASON = "llm.response.stop_reason"
LLM_CONTENT_COMPLETION_CHUNK = "llm.content.completion.chunk"
# OpenAI
LLM_OPENAI_RESPONSE_SYSTEM_FINGERPRINT = "gen_ai.openai.system_fingerprint"
LLM_OPENAI_API_BASE = "gen_ai.openai.api_base"
LLM_OPENAI_API_VERSION = "gen_ai.openai.api_version"
LLM_OPENAI_API_TYPE = "gen_ai.openai.api_type"

View file

@ -3,7 +3,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
from fastapi import HTTPException from fastapi import HTTPException
from pydantic import BaseModel, EmailStr from pydantic import BaseModel, EmailStr
from litellm.proxy._types import LiteLLM_UserTableWithKeyCount from litellm_proxy._types import LiteLLM_UserTableWithKeyCount
class UserListResponse(BaseModel): class UserListResponse(BaseModel):

View file

@ -2,7 +2,7 @@ from typing import List, Literal, Optional, TypedDict
from pydantic import Field from pydantic import Field
from litellm.proxy._types import LiteLLMPydanticObjectBase, LitellmUserRoles from litellm.types.utils import LiteLLMPydanticObjectBase
class MicrosoftGraphAPIUserGroupDirectoryObject(TypedDict, total=False): class MicrosoftGraphAPIUserGroupDirectoryObject(TypedDict, total=False):

View file

@ -1,3 +1,4 @@
import enum
import json import json
import time import time
import uuid import uuid
@ -2262,3 +2263,61 @@ class SpecialEnums(Enum):
LLMResponseTypes = Union[ LLMResponseTypes = Union[
ModelResponse, EmbeddingResponse, ImageResponse, OpenAIFileObject ModelResponse, EmbeddingResponse, ImageResponse, OpenAIFileObject
] ]
AllowedModelRegion = Literal["eu", "us"]
class KeyManagementSystem(enum.Enum):
GOOGLE_KMS = "google_kms"
AZURE_KEY_VAULT = "azure_key_vault"
AWS_SECRET_MANAGER = "aws_secret_manager"
GOOGLE_SECRET_MANAGER = "google_secret_manager"
HASHICORP_VAULT = "hashicorp_vault"
LOCAL = "local"
AWS_KMS = "aws_kms"
class KeyManagementSettings(LiteLLMPydanticObjectBase):
hosted_keys: Optional[List] = None
store_virtual_keys: Optional[bool] = False
"""
If True, virtual keys created by litellm will be stored in the secret manager
"""
prefix_for_stored_virtual_keys: str = "litellm/"
"""
If set, this prefix will be used for stored virtual keys in the secret manager
"""
access_mode: Literal["read_only", "write_only", "read_and_write"] = "read_only"
"""
Access mode for the secret manager, when write_only will only use for writing secrets
"""
primary_secret_name: Optional[str] = None
"""
If set, will read secrets from this primary secret in the secret manager
eg. on AWS you can store multiple secret values as K/V pairs in a single secret
"""
class LiteLLM_UpperboundKeyGenerateParams(LiteLLMPydanticObjectBase):
"""
Set default upperbound to max budget a key called via `/key/generate` can be.
Args:
max_budget (Optional[float], optional): Max budget a key can be. Defaults to None.
budget_duration (Optional[str], optional): Duration of the budget. Defaults to None.
duration (Optional[str], optional): Duration of the key. Defaults to None.
max_parallel_requests (Optional[int], optional): Max number of requests that can be made in parallel. Defaults to None.
tpm_limit (Optional[int], optional): Tpm limit. Defaults to None.
rpm_limit (Optional[int], optional): Rpm limit. Defaults to None.
"""
max_budget: Optional[float] = None
budget_duration: Optional[str] = None
duration: Optional[str] = None
max_parallel_requests: Optional[int] = None
tpm_limit: Optional[int] = None
rpm_limit: Optional[int] = None

View file

@ -259,13 +259,13 @@ from .exceptions import (
UnprocessableEntityError, UnprocessableEntityError,
UnsupportedParamsError, UnsupportedParamsError,
) )
from .proxy._types import AllowedModelRegion, KeyManagementSystem
from .types.llms.openai import ( from .types.llms.openai import (
ChatCompletionDeltaToolCallChunk, ChatCompletionDeltaToolCallChunk,
ChatCompletionToolCallChunk, ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk, ChatCompletionToolCallFunctionChunk,
) )
from .types.router import LiteLLM_Params from .types.router import LiteLLM_Params
from .types.utils import AllowedModelRegion, KeyManagementSystem
####### ENVIRONMENT VARIABLES #################### ####### ENVIRONMENT VARIABLES ####################
# Adjust to your specific application needs / system capabilities. # Adjust to your specific application needs / system capabilities.

View file

@ -13,8 +13,8 @@ from pydantic import ConfigDict, ValidationError
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.constants import MCP_TOOL_NAME_PREFIX from litellm.constants import MCP_TOOL_NAME_PREFIX
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.proxy._types import UserAPIKeyAuth from litellm_proxy._types import UserAPIKeyAuth
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth from litellm_proxy.auth.user_api_key_auth import user_api_key_auth
from litellm.types.mcp_server.mcp_server_manager import MCPInfo from litellm.types.mcp_server.mcp_server_manager import MCPInfo
from litellm.types.utils import StandardLoggingMCPToolCall from litellm.types.utils import StandardLoggingMCPToolCall
from litellm.utils import client from litellm.utils import client
@ -288,7 +288,7 @@ if MCP_AVAILABLE:
""" """
REST API to call a specific MCP tool with the provided arguments REST API to call a specific MCP tool with the provided arguments
""" """
from litellm.proxy.proxy_server import add_litellm_data_to_request, proxy_config from litellm_proxy.proxy_server import add_litellm_data_to_request, proxy_config
data = await request.json() data = await request.json()
data = await add_litellm_data_to_request( data = await add_litellm_data_to_request(

View file

@ -2,7 +2,7 @@ import json
from typing import Any, Callable, Dict, List, Optional from typing import Any, Callable, Dict, List, Optional
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.proxy.types_utils.utils import get_instance_fn from litellm_proxy.types_utils.utils import get_instance_fn
from litellm.types.mcp_server.tool_registry import MCPTool from litellm.types.mcp_server.tool_registry import MCPTool

View file

Before

Width:  |  Height:  |  Size: 381 B

After

Width:  |  Height:  |  Size: 381 B

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 414 B

After

Width:  |  Height:  |  Size: 414 B

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 2.5 KiB

After

Width:  |  Height:  |  Size: 2.5 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 2.2 KiB

After

Width:  |  Height:  |  Size: 2.2 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 8 KiB

After

Width:  |  Height:  |  Size: 8 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 742 B

After

Width:  |  Height:  |  Size: 742 B

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 528 B

After

Width:  |  Height:  |  Size: 528 B

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 2.3 KiB

After

Width:  |  Height:  |  Size: 2.3 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 592 B

After

Width:  |  Height:  |  Size: 592 B

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 728 B

After

Width:  |  Height:  |  Size: 728 B

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 619 B

After

Width:  |  Height:  |  Size: 619 B

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 7.2 KiB

After

Width:  |  Height:  |  Size: 7.2 KiB

Before After
Before After

Some files were not shown because too many files have changed in this diff Show more