Litellm dev 11 20 2024 (#6831)

* feat(customer_endpoints.py): support passing budget duration via `/customer/new` endpoint Closes https://github.com/BerriAI/litellm/issues/5651 * docs: add missing params to swagger + api documentation test * docs: add documentation for all key endpoints documents all params on swagger * docs(internal_user_endpoints.py): document all /user/new params Ensures all params are documented * docs(team_endpoints.py): add missing documentation for team endpoints Ensures 100% param documentation on swagger * docs(organization_endpoints.py): document all org params Adds documentation for all params in org endpoint * docs(customer_endpoints.py): add coverage for all params on /customer endpoints ensures all /customer/* params are documented * ci(config.yml): add endpoint doc testing to ci/cd * fix: fix internal_user_endpoints.py * fix(internal_user_endpoints.py): support 'duration' param * fix(partner_models/main.py): fix anthropic re-raise exception on vertex * fix: fix pydantic obj
2024-11-21 04:06:06 +05:30 · 2024-11-21 04:06:06 +05:30 · 689cd677c6
commit 689cd677c6
parent a1f06de53d
11 changed files with 480 additions and 139 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -770,6 +770,7 @@ jobs:
      - run: python ./tests/code_coverage_tests/test_router_strategy_async.py
      - run: python ./tests/code_coverage_tests/litellm_logging_code_coverage.py
      - run: python ./tests/documentation_tests/test_env_keys.py
      - run: python ./tests/documentation_tests/test_api_docs.py
      - run: helm lint ./deploy/charts/litellm-helm
  db_migration_disable_update_check:
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/main.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/main.py
@ -236,4 +236,6 @@ class VertexAIPartnerModels(VertexBase):
            )
        except Exception as e:
            if hasattr(e, "status_code"):
                raise e
            raise VertexAIError(status_code=500, message=str(e))
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -623,6 +623,8 @@ class GenerateRequestBase(LiteLLMBase):
    Overlapping schema between key and user generate/update requests
    """
    key_alias: Optional[str] = None
    duration: Optional[str] = None
    models: Optional[list] = []
    spend: Optional[float] = 0
    max_budget: Optional[float] = None
@ -635,13 +637,6 @@ class GenerateRequestBase(LiteLLMBase):
    budget_duration: Optional[str] = None
    allowed_cache_controls: Optional[list] = []
    soft_budget: Optional[float] = None
 class _GenerateKeyRequest(GenerateRequestBase):
    key_alias: Optional[str] = None
    key: Optional[str] = None
    duration: Optional[str] = None
    aliases: Optional[dict] = {}
    config: Optional[dict] = {}
    permissions: Optional[dict] = {}
    model_max_budget: Optional[dict] = (
@ -654,6 +649,11 @@ class _GenerateKeyRequest(GenerateRequestBase):
    model_tpm_limit: Optional[dict] = None
    guardrails: Optional[List[str]] = None
    blocked: Optional[bool] = None
    aliases: Optional[dict] = {}
 class _GenerateKeyRequest(GenerateRequestBase):
    key: Optional[str] = None
 class GenerateKeyRequest(_GenerateKeyRequest):
@ -719,7 +719,7 @@ class LiteLLM_ModelTable(LiteLLMBase):
    model_config = ConfigDict(protected_namespaces=())
-class NewUserRequest(_GenerateKeyRequest):
+class NewUserRequest(GenerateRequestBase):
    max_budget: Optional[float] = None
    user_email: Optional[str] = None
    user_alias: Optional[str] = None
@ -786,7 +786,51 @@ class DeleteUserRequest(LiteLLMBase):
 AllowedModelRegion = Literal["eu", "us"]
-class NewCustomerRequest(LiteLLMBase):
+class BudgetNew(LiteLLMBase):
    budget_id: Optional[str] = Field(default=None, description="The unique budget id.")
    max_budget: Optional[float] = Field(
        default=None,
        description="Requests will fail if this budget (in USD) is exceeded.",
    )
    soft_budget: Optional[float] = Field(
        default=None,
        description="Requests will NOT fail if this is exceeded. Will fire alerting though.",
    )
    max_parallel_requests: Optional[int] = Field(
        default=None, description="Max concurrent requests allowed for this budget id."
    )
    tpm_limit: Optional[int] = Field(
        default=None, description="Max tokens per minute, allowed for this budget id."
    )
    rpm_limit: Optional[int] = Field(
        default=None, description="Max requests per minute, allowed for this budget id."
    )
    budget_duration: Optional[str] = Field(
        default=None,
        description="Max duration budget should be set for (e.g. '1hr', '1d', '28d')",
    )
 class BudgetRequest(LiteLLMBase):
    budgets: List[str]
 class BudgetDeleteRequest(LiteLLMBase):
    id: str
 class CustomerBase(LiteLLMBase):
    user_id: str
    alias: Optional[str] = None
    spend: float = 0.0
    allowed_model_region: Optional[AllowedModelRegion] = None
    default_model: Optional[str] = None
    budget_id: Optional[str] = None
    litellm_budget_table: Optional[BudgetNew] = None
    blocked: bool = False
 class NewCustomerRequest(BudgetNew):
    """
    Create a new customer, allocate a budget to them
    """
@ -794,7 +838,6 @@ class NewCustomerRequest(LiteLLMBase):
    user_id: str
    alias: Optional[str] = None  # human-friendly alias
    blocked: bool = False  # allow/disallow requests for this end-user
    max_budget: Optional[float] = None
    budget_id: Optional[str] = None  # give either a budget_id or max_budget
    allowed_model_region: Optional[AllowedModelRegion] = (
        None  # require all user requests to use models in this specific region
@ -1083,39 +1126,6 @@ class OrganizationRequest(LiteLLMBase):
    organizations: List[str]
 class BudgetNew(LiteLLMBase):
    budget_id: str = Field(default=None, description="The unique budget id.")
    max_budget: Optional[float] = Field(
        default=None,
        description="Requests will fail if this budget (in USD) is exceeded.",
    )
    soft_budget: Optional[float] = Field(
        default=None,
        description="Requests will NOT fail if this is exceeded. Will fire alerting though.",
    )
    max_parallel_requests: Optional[int] = Field(
        default=None, description="Max concurrent requests allowed for this budget id."
    )
    tpm_limit: Optional[int] = Field(
        default=None, description="Max tokens per minute, allowed for this budget id."
    )
    rpm_limit: Optional[int] = Field(
        default=None, description="Max requests per minute, allowed for this budget id."
    )
    budget_duration: Optional[str] = Field(
        default=None,
        description="Max duration budget should be set for (e.g. '1hr', '1d', '28d')",
    )
 class BudgetRequest(LiteLLMBase):
    budgets: List[str]
 class BudgetDeleteRequest(LiteLLMBase):
    id: str
 class KeyManagementSystem(enum.Enum):
    GOOGLE_KMS = "google_kms"
    AZURE_KEY_VAULT = "azure_key_vault"
@ -2081,3 +2091,45 @@ JWKKeyValue = Union[List[JWTKeyItem], JWTKeyItem]
 class JWKUrlResponse(TypedDict, total=False):
    keys: JWKKeyValue
 class UserManagementEndpointParamDocStringEnums(str, enum.Enum):
    user_id_doc_str = (
        "Optional[str] - Specify a user id. If not set, a unique id will be generated."
    )
    user_alias_doc_str = (
        "Optional[str] - A descriptive name for you to know who this user id refers to."
    )
    teams_doc_str = "Optional[list] - specify a list of team id's a user belongs to."
    user_email_doc_str = "Optional[str] - Specify a user email."
    send_invite_email_doc_str = (
        "Optional[bool] - Specify if an invite email should be sent."
    )
    user_role_doc_str = """Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`"""
    max_budget_doc_str = """Optional[float] - Specify max budget for a given user."""
    budget_duration_doc_str = """Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"), months ("1mo")."""
    models_doc_str = """Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)"""
    tpm_limit_doc_str = (
        """Optional[int] - Specify tpm limit for a given user (Tokens per minute)"""
    )
    rpm_limit_doc_str = (
        """Optional[int] - Specify rpm limit for a given user (Requests per minute)"""
    )
    auto_create_key_doc_str = """bool - Default=True. Flag used for returning a key as part of the /user/new response"""
    aliases_doc_str = """Optional[dict] - Model aliases for the user - [Docs](https://litellm.vercel.app/docs/proxy/virtual_keys#model-aliases)"""
    config_doc_str = """Optional[dict] - [DEPRECATED PARAM] User-specific config."""
    allowed_cache_controls_doc_str = """Optional[list] - List of allowed cache control values. Example - ["no-cache", "no-store"]. See all values - https://docs.litellm.ai/docs/proxy/caching#turn-on--off-caching-per-request-"""
    blocked_doc_str = (
        """Optional[bool] - [Not Implemented Yet] Whether the user is blocked."""
    )
    guardrails_doc_str = """Optional[List[str]] - [Not Implemented Yet] List of active guardrails for the user"""
    permissions_doc_str = """Optional[dict] - [Not Implemented Yet] User-specific permissions, eg. turning off pii masking."""
    metadata_doc_str = """Optional[dict] - Metadata for user, store information for user. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }"""
    max_parallel_requests_doc_str = """Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x."""
    soft_budget_doc_str = """Optional[float] - Get alerts when user crosses given budget, doesn't block requests."""
    model_max_budget_doc_str = """Optional[dict] - Model-specific max budget for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-budgets-to-keys)"""
    model_rpm_limit_doc_str = """Optional[float] - Model-specific rpm limit for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-limits-to-keys)"""
    model_tpm_limit_doc_str = """Optional[float] - Model-specific tpm limit for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-limits-to-keys)"""
    spend_doc_str = """Optional[float] - Amount spent by user. Default is 0. Will be updated by proxy whenever user is used."""
    team_id_doc_str = """Optional[str] - [DEPRECATED PARAM] The team id of the user. Default is None."""
    duration_doc_str = """Optional[str] - Duration for the key auto-created on `/user/new`. Default is None."""
--- a/litellm/proxy/management_endpoints/customer_endpoints.py
+++ b/litellm/proxy/management_endpoints/customer_endpoints.py
@ -1,3 +1,14 @@
 """
 CUSTOMER MANAGEMENT
 All /customer management endpoints 
 /customer/new   
 /customer/info
 /customer/update
 /customer/delete
 """
 #### END-USER/CUSTOMER MANAGEMENT ####
 import asyncio
 import copy
@ -129,6 +140,26 @@ async def unblock_user(data: BlockUsers):
    return {"blocked_users": litellm.blocked_user_list}
 def new_budget_request(data: NewCustomerRequest) -> Optional[BudgetNew]:
    """
    Return a new budget object if new budget params are passed.
    """
    budget_params = BudgetNew.model_fields.keys()
    budget_kv_pairs = {}
    # Get the actual values from the data object using getattr
    for field_name in budget_params:
        if field_name == "budget_id":
            continue
        value = getattr(data, field_name, None)
        if value is not None:
            budget_kv_pairs[field_name] = value
    if budget_kv_pairs:
        return BudgetNew(**budget_kv_pairs)
    return None
@router.post(
    "/end_user/new",
    tags=["Customer Management"],
@ -157,6 +188,11 @@ async def new_end_user(
    - allowed_model_region: Optional[Union[Literal["eu"], Literal["us"]]] - Require all user requests to use models in this specific region.
    - default_model: Optional[str] - If no equivalent model in the allowed region, default all requests to this model.
    - metadata: Optional[dict] = Metadata for customer, store information for customer. Example metadata = {"data_training_opt_out": True}
    - budget_duration: Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
    - tpm_limit: Optional[int] - [Not Implemented Yet] Specify tpm limit for a given customer (Tokens per minute)
    - rpm_limit: Optional[int] - [Not Implemented Yet] Specify rpm limit for a given customer (Requests per minute)
    - max_parallel_requests: Optional[int] - [Not Implemented Yet] Specify max parallel requests for a given customer.
    - soft_budget: Optional[float] - [Not Implemented Yet] Get alerts when customer crosses given budget, doesn't block requests.
    - Allow specifying allowed regions 
@ -223,14 +259,19 @@ async def new_end_user(
        new_end_user_obj: Dict = {}
        ## CREATE BUDGET ## if set
-        if data.max_budget is not None:
+        _new_budget = new_budget_request(data)
        if _new_budget is not None:
            try:
                budget_record = await prisma_client.db.litellm_budgettable.create(
                    data={
-                    "max_budget": data.max_budget,
+                        **_new_budget.model_dump(exclude_unset=True),
                        "created_by": user_api_key_dict.user_id or litellm_proxy_admin_name,  # type: ignore
-                    "updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
+                        "updated_by": user_api_key_dict.user_id
                        or litellm_proxy_admin_name,
                    }
                )
            except Exception as e:
                raise HTTPException(status_code=422, detail={"error": str(e)})
            new_end_user_obj["budget_id"] = budget_record.budget_id
        elif data.budget_id is not None:
@ -239,16 +280,22 @@ async def new_end_user(
        _user_data = data.dict(exclude_none=True)
        for k, v in _user_data.items():
-            if k != "max_budget" and k != "budget_id":
+            if k not in BudgetNew.model_fields.keys():
                new_end_user_obj[k] = v
        ## WRITE TO DB ##
        end_user_record = await prisma_client.db.litellm_endusertable.create(
-            data=new_end_user_obj  # type: ignore
+            data=new_end_user_obj,  # type: ignore
            include={"litellm_budget_table": True},
        )
        return end_user_record
    except Exception as e:
        verbose_proxy_logger.exception(
            "litellm.proxy.management_endpoints.customer_endpoints.new_end_user(): Exception occured - {}".format(
                str(e)
            )
        )
        if "Unique constraint failed on the fields: (`user_id`)" in str(e):
            raise ProxyException(
                message=f"Customer already exists, passed user_id={data.user_id}. Please pass a new user_id.",
--- a/litellm/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py
@ -102,11 +102,27 @@ async def new_user(
    - send_invite_email: Optional[bool] - Specify if an invite email should be sent.
    - user_role: Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`
    - max_budget: Optional[float] - Specify max budget for a given user.
-    - budget_duration: Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
+    - budget_duration: Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"), months ("1mo").
    - models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
    - tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
    - rpm_limit: Optional[int] - Specify rpm limit for a given user (Requests per minute)
    - auto_create_key: bool - Default=True. Flag used for returning a key as part of the /user/new response
    - aliases: Optional[dict] - Model aliases for the user - [Docs](https://litellm.vercel.app/docs/proxy/virtual_keys#model-aliases)
    - config: Optional[dict] - [DEPRECATED PARAM] User-specific config.
    - allowed_cache_controls: Optional[list] - List of allowed cache control values. Example - ["no-cache", "no-store"]. See all values - https://docs.litellm.ai/docs/proxy/caching#turn-on--off-caching-per-request-
    - blocked: Optional[bool] - [Not Implemented Yet] Whether the user is blocked.
    - guardrails: Optional[List[str]] - [Not Implemented Yet] List of active guardrails for the user
    - permissions: Optional[dict] - [Not Implemented Yet] User-specific permissions, eg. turning off pii masking.
    - metadata: Optional[dict] - Metadata for user, store information for user. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
    - max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
    - soft_budget: Optional[float] - Get alerts when user crosses given budget, doesn't block requests.
    - model_max_budget: Optional[dict] - Model-specific max budget for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-budgets-to-keys)
    - model_rpm_limit: Optional[float] - Model-specific rpm limit for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-limits-to-keys)
    - model_tpm_limit: Optional[float] - Model-specific tpm limit for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-limits-to-keys)
    - spend: Optional[float] - Amount spent by user. Default is 0. Will be updated by proxy whenever user is used. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"), months ("1mo").
    - team_id: Optional[str] - [DEPRECATED PARAM] The team id of the user. Default is None. 
    - duration: Optional[str] - Duration for the key auto-created on `/user/new`. Default is None.
    - key_alias: Optional[str] - Alias for the key auto-created on `/user/new`. Default is None.
    Returns:
    - key: (str) The generated api key for the user
@ -445,54 +461,36 @@ async def user_update(
    }'
    Parameters:
-        user_id: Optional[str]
+        - user_id: Optional[str] - Specify a user id. If not set, a unique id will be generated.
-            Unique identifier for the user to update
+        - user_email: Optional[str] - Specify a user email.
        - password: Optional[str] - Specify a user password.
        - user_alias: Optional[str] - A descriptive name for you to know who this user id refers to.
        - teams: Optional[list] - specify a list of team id's a user belongs to.
        - send_invite_email: Optional[bool] - Specify if an invite email should be sent.
        - user_role: Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`
        - max_budget: Optional[float] - Specify max budget for a given user.
        - budget_duration: Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"), months ("1mo").
        - models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
        - tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
        - rpm_limit: Optional[int] - Specify rpm limit for a given user (Requests per minute)
        - auto_create_key: bool - Default=True. Flag used for returning a key as part of the /user/new response
        - aliases: Optional[dict] - Model aliases for the user - [Docs](https://litellm.vercel.app/docs/proxy/virtual_keys#model-aliases)
        - config: Optional[dict] - [DEPRECATED PARAM] User-specific config.
        - allowed_cache_controls: Optional[list] - List of allowed cache control values. Example - ["no-cache", "no-store"]. See all values - https://docs.litellm.ai/docs/proxy/caching#turn-on--off-caching-per-request-
        - blocked: Optional[bool] - [Not Implemented Yet] Whether the user is blocked.
        - guardrails: Optional[List[str]] - [Not Implemented Yet] List of active guardrails for the user
        - permissions: Optional[dict] - [Not Implemented Yet] User-specific permissions, eg. turning off pii masking.
        - metadata: Optional[dict] - Metadata for user, store information for user. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
        - max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
        - soft_budget: Optional[float] - Get alerts when user crosses given budget, doesn't block requests.
        - model_max_budget: Optional[dict] - Model-specific max budget for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-budgets-to-keys)
        - model_rpm_limit: Optional[float] - Model-specific rpm limit for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-limits-to-keys)
        - model_tpm_limit: Optional[float] - Model-specific tpm limit for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-limits-to-keys)
        - spend: Optional[float] - Amount spent by user. Default is 0. Will be updated by proxy whenever user is used. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"), months ("1mo").
        - team_id: Optional[str] - [DEPRECATED PARAM] The team id of the user. Default is None. 
        - duration: Optional[str] - [NOT IMPLEMENTED].
        - key_alias: Optional[str] - [NOT IMPLEMENTED].
        user_email: Optional[str]
            Email address for the user
        password: Optional[str]
            Password for the user
        user_role: Optional[Literal["proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer"]]
            Role assigned to the user. Can be one of:
            - proxy_admin: Full admin access
            - proxy_admin_viewer: Read-only admin access
            - internal_user: Standard internal user
            - internal_user_viewer: Read-only internal user
        models: Optional[list]
            List of model names the user is allowed to access
        spend: Optional[float]
            Current spend amount for the user
        max_budget: Optional[float]
            Maximum budget allowed for the user
        team_id: Optional[str]
            ID of the team the user belongs to
        max_parallel_requests: Optional[int]
            Maximum number of concurrent requests allowed
        metadata: Optional[dict]
            Additional metadata associated with the user
        tpm_limit: Optional[int]
            Maximum tokens per minute allowed
        rpm_limit: Optional[int]
            Maximum requests per minute allowed
        budget_duration: Optional[str]
            Duration for budget renewal (e.g., "30d" for 30 days)
        allowed_cache_controls: Optional[list]
            List of allowed cache control options
        soft_budget: Optional[float]
            Soft budget limit for alerting purposes
    ```
    """
    from litellm.proxy.proxy_server import prisma_client
--- a/litellm/proxy/management_endpoints/key_management_endpoints.py
+++ b/litellm/proxy/management_endpoints/key_management_endpoints.py
@ -83,6 +83,13 @@ async def generate_key_fn(  # noqa: PLR0915
    - model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
    - model_rpm_limit: Optional[dict] - key-specific model rpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific rpm limit.
    - model_tpm_limit: Optional[dict] - key-specific model tpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific tpm limit.
    - allowed_cache_controls: Optional[list] - List of allowed cache control values. Example - ["no-cache", "no-store"]. See all values - https://docs.litellm.ai/docs/proxy/caching#turn-on--off-caching-per-request
    - blocked: Optional[bool] - Whether the key is blocked.
    - rpm_limit: Optional[int] - Specify rpm limit for a given key (Requests per minute)
    - tpm_limit: Optional[int] - Specify tpm limit for a given key (Tokens per minute)
    - soft_budget: Optional[float] - Specify soft budget for a given key. Will trigger a slack alert when this soft budget is reached.
    - tags: Optional[List[str]] - Tags for [tracking spend](https://litellm.vercel.app/docs/proxy/enterprise#tracking-spend-for-custom-tags) and/or doing [tag-based routing](https://litellm.vercel.app/docs/proxy/tag_routing).
    Examples:
    1. Allow users to turn on/off pii masking
@ -349,6 +356,8 @@ async def update_key_fn(
    - send_invite_email: Optional[bool] - Send invite email to user_id
    - guardrails: Optional[List[str]] - List of active guardrails for the key
    - blocked: Optional[bool] - Whether the key is blocked
    - aliases: Optional[dict] - Model aliases for the key - [Docs](https://litellm.vercel.app/docs/proxy/virtual_keys#model-aliases)
    - config: Optional[dict] - [DEPRECATED PARAM] Key-specific config.
    Example:
    ```bash
--- a/litellm/proxy/management_endpoints/organization_endpoints.py
+++ b/litellm/proxy/management_endpoints/organization_endpoints.py
@ -5,6 +5,7 @@ Endpoints for /organization operations
 /organization/update
 /organization/delete
 /organization/info
 /organization/list
 """
 #### ORGANIZATION MANAGEMENT ####
@ -55,15 +56,23 @@ async def new_organization(
    # Parameters
-    - `organization_alias`: *str* = The name of the organization.
+    - organization_alias: *str* - The name of the organization.
-    - `models`: *List* = The models the organization has access to.
+    - models: *List* - The models the organization has access to.
-    - `budget_id`: *Optional[str]* = The id for a budget (tpm/rpm/max budget) for the organization.
+    - budget_id: *Optional[str]* - The id for a budget (tpm/rpm/max budget) for the organization.
    ### IF NO BUDGET ID - CREATE ONE WITH THESE PARAMS ###
-    - `max_budget`: *Optional[float]* = Max budget for org
+    - max_budget: *Optional[float]* - Max budget for org
-    - `tpm_limit`: *Optional[int]* = Max tpm limit for org
+    - tpm_limit: *Optional[int]* - Max tpm limit for org
-    - `rpm_limit`: *Optional[int]* = Max rpm limit for org
+    - rpm_limit: *Optional[int]* - Max rpm limit for org
-    - `model_max_budget`: *Optional[dict]* = Max budget for a specific model
+    - max_parallel_requests: *Optional[int]* - [Not Implemented Yet] Max parallel requests for org
-    - `budget_duration`: *Optional[str]* = Frequency of reseting org budget
+    - soft_budget: *Optional[float]* - [Not Implemented Yet] Get a slack alert when this soft budget is reached. Don't block requests.
    - model_max_budget: *Optional[dict]* - Max budget for a specific model
    - budget_duration: *Optional[str]* - Frequency of reseting org budget
    - metadata: *Optional[dict]* - Metadata for team, store information for team. Example metadata - {"extra_info": "some info"}
    - blocked: *bool* - Flag indicating if the org is blocked or not - will stop all calls from keys with this org_id.
    - tags: *Optional[List[str]]* - Tags for [tracking spend](https://litellm.vercel.app/docs/proxy/enterprise#tracking-spend-for-custom-tags) and/or doing [tag-based routing](https://litellm.vercel.app/docs/proxy/tag_routing).
    - organization_id: *Optional[str]* - The organization id of the team. Default is None. Create via `/organization/new`.
    - model_aliases: Optional[dict] - Model aliases for the team. [Docs](https://docs.litellm.ai/docs/proxy/team_based_routing#create-team-with-model-alias)
    Case 1: Create new org **without** a budget_id
@ -185,7 +194,7 @@ async def new_organization(
 )
 async def update_organization():
    """[TODO] Not Implemented yet. Let us know if you need this - https://github.com/BerriAI/litellm/issues"""
-    pass
+    raise NotImplementedError("Not Implemented Yet")
@router.post(
@ -195,7 +204,7 @@ async def update_organization():
 )
 async def delete_organization():
    """[TODO] Not Implemented yet. Let us know if you need this - https://github.com/BerriAI/litellm/issues"""
-    pass
+    raise NotImplementedError("Not Implemented Yet")
@router.get(
--- a/litellm/proxy/management_endpoints/team_endpoints.py
+++ b/litellm/proxy/management_endpoints/team_endpoints.py
@ -1,3 +1,14 @@
 """
 TEAM MANAGEMENT
 All /team management endpoints 
 /team/new
 /team/info
 /team/update
 /team/delete
 """
 import asyncio
 import copy
 import json
@ -121,6 +132,10 @@ async def new_team(  # noqa: PLR0915
    - budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
    - models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
    - blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
    - members: Optional[List] - Control team members via `/team/member/add` and `/team/member/delete`. 
    - tags: Optional[List[str]] - Tags for [tracking spend](https://litellm.vercel.app/docs/proxy/enterprise#tracking-spend-for-custom-tags) and/or doing [tag-based routing](https://litellm.vercel.app/docs/proxy/tag_routing).
    - organization_id: Optional[str] - The organization id of the team. Default is None. Create via `/organization/new`.
    - model_aliases: Optional[dict] - Model aliases for the team. [Docs](https://docs.litellm.ai/docs/proxy/team_based_routing#create-team-with-model-alias)
    Returns:
    - team_id: (str) Unique team id - used for tracking spend across multiple keys for same team id.
@ -353,6 +368,8 @@ async def update_team(
    - budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
    - models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
    - blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
    - tags: Optional[List[str]] - Tags for [tracking spend](https://litellm.vercel.app/docs/proxy/enterprise#tracking-spend-for-custom-tags) and/or doing [tag-based routing](https://litellm.vercel.app/docs/proxy/tag_routing).
    - organization_id: Optional[str] - The organization id of the team. Default is None. Create via `/organization/new`.
    Example - update team TPM Limit
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -3127,6 +3127,7 @@ def _get_docs_url() -> Optional[str]:
    # default to "/"
    return "/"
 def handle_exception_on_proxy(e: Exception) -> ProxyException:
    """
    Returns an Exception as ProxyException, this ensures all exceptions are OpenAI API compatible
@ -3148,4 +3149,3 @@ def handle_exception_on_proxy(e: Exception) -> ProxyException:
        param=getattr(e, "param", "None"),
        code=status.HTTP_500_INTERNAL_SERVER_ERROR,
    )
--- a/tests/documentation_tests/test_api_docs.py
+++ b/tests/documentation_tests/test_api_docs.py
@ -0,0 +1,206 @@
 import ast
 from typing import List, Dict, Set, Optional
 import os
 from dataclasses import dataclass
 import argparse
 import re
 import sys
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import litellm
@dataclass
 class FunctionInfo:
    """Store function information."""
    name: str
    docstring: Optional[str]
    parameters: Set[str]
    file_path: str
    line_number: int
 class FastAPIDocVisitor(ast.NodeVisitor):
    """AST visitor to find FastAPI endpoint functions."""
    def __init__(self, target_functions: Set[str]):
        self.target_functions = target_functions
        self.functions: Dict[str, FunctionInfo] = {}
        self.current_file = ""
    def visit_FunctionDef(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None:
        """Visit function definitions (both async and sync) and collect info if they match target functions."""
        if node.name in self.target_functions:
            # Extract docstring
            docstring = ast.get_docstring(node)
            # Extract parameters
            parameters = set()
            for arg in node.args.args:
                if arg.annotation is not None:
                    # Get the parameter type from annotation
                    if isinstance(arg.annotation, ast.Name):
                        parameters.add((arg.arg, arg.annotation.id))
                    elif isinstance(arg.annotation, ast.Subscript):
                        if isinstance(arg.annotation.value, ast.Name):
                            parameters.add((arg.arg, arg.annotation.value.id))
            self.functions[node.name] = FunctionInfo(
                name=node.name,
                docstring=docstring,
                parameters=parameters,
                file_path=self.current_file,
                line_number=node.lineno,
            )
    # Also need to add this to handle async functions
    def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
        """Handle async functions by delegating to the regular function visitor."""
        return self.visit_FunctionDef(node)
 def find_functions_in_file(
    file_path: str, target_functions: Set[str]
 ) -> Dict[str, FunctionInfo]:
    """Find target functions in a Python file using AST."""
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            content = f.read()
        visitor = FastAPIDocVisitor(target_functions)
        visitor.current_file = file_path
        tree = ast.parse(content)
        visitor.visit(tree)
        return visitor.functions
    except Exception as e:
        print(f"Error parsing {file_path}: {str(e)}")
        return {}
 def extract_docstring_params(docstring: Optional[str]) -> Set[str]:
    """Extract parameter names from docstring."""
    if not docstring:
        return set()
    params = set()
    # Match parameters in format:
    # - parameter_name: description
    # or
    # parameter_name: description
    param_pattern = r"-?\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*(?:\([^)]*\))?\s*:"
    for match in re.finditer(param_pattern, docstring):
        params.add(match.group(1))
    return params
 def analyze_function(func_info: FunctionInfo) -> Dict:
    """Analyze function documentation and return validation results."""
    docstring_params = extract_docstring_params(func_info.docstring)
    print(f"func_info.parameters: {func_info.parameters}")
    pydantic_params = set()
    for name, type_name in func_info.parameters:
        if type_name.endswith("Request") or type_name.endswith("Response"):
            pydantic_model = getattr(litellm.proxy._types, type_name, None)
            if pydantic_model is not None:
                for param in pydantic_model.model_fields.keys():
                    pydantic_params.add(param)
    print(f"pydantic_params: {pydantic_params}")
    missing_params = pydantic_params - docstring_params
    return {
        "function": func_info.name,
        "file_path": func_info.file_path,
        "line_number": func_info.line_number,
        "has_docstring": bool(func_info.docstring),
        "pydantic_params": list(pydantic_params),
        "documented_params": list(docstring_params),
        "missing_params": list(missing_params),
        "is_valid": len(missing_params) == 0,
    }
 def print_validation_results(results: Dict) -> None:
    """Print validation results in a readable format."""
    print(f"\nChecking function: {results['function']}")
    print(f"File: {results['file_path']}:{results['line_number']}")
    print("-" * 50)
    if not results["has_docstring"]:
        print("❌ No docstring found!")
        return
    if not results["pydantic_params"]:
        print("ℹ️  No Pydantic input models found.")
        return
    if results["is_valid"]:
        print("✅ All Pydantic parameters are documented!")
    else:
        print("❌ Missing documentation for parameters:")
        for param in sorted(results["missing_params"]):
            print(f"  - {param}")
 def main():
    function_names = [
        "new_end_user",
        "end_user_info",
        "update_end_user",
        "delete_end_user",
        "generate_key_fn",
        "info_key_fn",
        "update_key_fn",
        "delete_key_fn",
        "new_user",
        "new_team",
        "team_info",
        "update_team",
        "delete_team",
        "new_organization",
        "update_organization",
        "delete_organization",
        "list_organization",
        "user_update",
    ]
    directory = "../../litellm/proxy/management_endpoints"  # LOCAL
    # directory = "./litellm/proxy/management_endpoints"
    # Convert function names to set for faster lookup
    target_functions = set(function_names)
    found_functions: Dict[str, FunctionInfo] = {}
    # Walk through directory
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".py"):
                file_path = os.path.join(root, file)
                found = find_functions_in_file(file_path, target_functions)
                found_functions.update(found)
    # Analyze and output results
    for func_name in function_names:
        if func_name in found_functions:
            result = analyze_function(found_functions[func_name])
            if not result["is_valid"]:
                raise Exception(print_validation_results(result))
    #         results.append(result)
    #         print_validation_results(result)
    # # Exit with error code if any validation failed
    # if any(not r["is_valid"] for r in results):
    #     exit(1)
 if __name__ == "__main__":
    main()
--- a/tests/proxy_unit_tests/test_key_generate_prisma.py
+++ b/tests/proxy_unit_tests/test_key_generate_prisma.py
@ -1018,7 +1018,7 @@ def test_generate_and_call_with_expired_key(prisma_client):
            # use generated key to auth in
            result = await user_api_key_auth(request=request, api_key=bearer_token)
            print("result from user auth with new key", result)
-            pytest.fail(f"This should have failed!. IT's an expired key")
+            pytest.fail("This should have failed!. It's an expired key")
        asyncio.run(test())
    except Exception as e: