diff --git a/docs/my-website/docs/proxy/virtual_keys.md b/docs/my-website/docs/proxy/virtual_keys.md index ee8a99042..1cb28a2e3 100644 --- a/docs/my-website/docs/proxy/virtual_keys.md +++ b/docs/my-website/docs/proxy/virtual_keys.md @@ -1,4 +1,4 @@ -# Key Management +# Virtual Keys Track Spend, Set budgets and create virtual keys for the proxy Grant other's temporary access to your proxy, with keys that expire after a set duration. @@ -12,7 +12,7 @@ Grant other's temporary access to your proxy, with keys that expire after a set ::: -## Quick Start +## Setup Requirements: @@ -58,16 +58,37 @@ litellm --config /path/to/config.yaml curl 'http://0.0.0.0:8000/key/generate' \ --header 'Authorization: Bearer ' \ --header 'Content-Type: application/json' \ ---data-raw '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m","metadata": {"user": "ishaan@berri.ai", "team": "core-infra"}}' +--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m","metadata": {"user": "ishaan@berri.ai"}}' ``` + +## /key/generate + +### Request +```shell +curl 'http://0.0.0.0:8000/key/generate' \ +--header 'Authorization: Bearer ' \ +--header 'Content-Type: application/json' \ +--data-raw '{ + "models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], + "duration": "20m", + "metadata": {"user": "ishaan@berri.ai"}, + "team_id": "core-infra" +}' +``` + + +Request Params: + - `models`: *list or null (optional)* - Specify the models a token has access too. If null, then token has access to all models on server. - `duration`: *str or null (optional)* Specify the length of time the token is valid for. If null, default is set to 1 hour. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). - `metadata`: *dict or null (optional)* Pass metadata for the created token. If null defaults to {} -Expected response: +- `team_id`: *str or null (optional)* Specify team_id for the associated key + +### Response ```python { @@ -76,7 +97,7 @@ Expected response: } ``` -## Keys that don't expire +### Keys that don't expire Just set duration to None. @@ -87,7 +108,7 @@ curl --location 'http://0.0.0.0:8000/key/generate' \ --data '{"models": ["azure-models"], "aliases": {"mistral-7b": "gpt-3.5-turbo"}, "duration": null}' ``` -## Upgrade/Downgrade Models +### Upgrade/Downgrade Models If a user is expected to use a given model (i.e. gpt3-5), and you want to: @@ -137,7 +158,7 @@ curl -X POST "https://0.0.0.0:8000/key/generate" \ - **How are routing between diff keys/api bases done?** litellm handles this by shuffling between different models in the model list with the same model_name. [**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/router.py) -## Grant Access to new model +### Grant Access to new model Use model access groups to give users access to select models, and add new ones to it over time (e.g. mistral, llama-2, etc.) @@ -165,6 +186,102 @@ curl --location 'http://localhost:8000/key/generate' \ "max_budget": 0,}' ``` + +## /key/info + +### Request +```shell +curl -X GET "http://0.0.0.0:8000/key/info?key=sk-02Wr4IAlN3NvPXvL5JVvDA" \ +-H "Authorization: Bearer sk-1234" +``` + +Request Params: +- key: str - The key you want the info for + +### Response + +`token` is the hashed key (The DB stores the hashed key for security) +```json +{ + "key": "sk-02Wr4IAlN3NvPXvL5JVvDA", + "info": { + "token": "80321a12d03412c527f2bd9db5fabd746abead2e1d50b435a534432fbaca9ef5", + "spend": 0.0, + "expires": "2024-01-18T23:52:09.125000+00:00", + "models": ["azure-gpt-3.5", "azure-embedding-model"], + "aliases": {}, + "config": {}, + "user_id": "ishaan2@berri.ai", + "team_id": "None", + "max_parallel_requests": null, + "metadata": {} + } +} + + +``` + +## /key/update + +### Request +```shell +curl 'http://0.0.0.0:8000/key/update' \ +--header 'Authorization: Bearer ' \ +--header 'Content-Type: application/json' \ +--data-raw '{ + "key": "sk-kdEXbIqZRwEeEiHwdg7sFA", + "models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], + "metadata": {"user": "ishaan@berri.ai"}, + "team_id": "core-infra" +}' +``` + +Request Params: +- key: str - The key that needs to be updated. + +- models: list or null (optional) - Specify the models a token has access to. If null, then the token has access to all models on the server. + +- metadata: dict or null (optional) - Pass metadata for the updated token. If null, defaults to an empty dictionary. + +- team_id: str or null (optional) - Specify the team_id for the associated key. + +### Response + +```json +{ + "key": "sk-kdEXbIqZRwEeEiHwdg7sFA", + "models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], + "metadata": { + "user": "ishaan@berri.ai" + } +} + +``` + + +## /key/delete + +### Request +```shell +curl 'http://0.0.0.0:8000/key/delete' \ +--header 'Authorization: Bearer ' \ +--header 'Content-Type: application/json' \ +--data-raw '{ + "keys": ["sk-kdEXbIqZRwEeEiHwdg7sFA"] +}' +``` + +Request Params: +- keys: List[str] - List of keys to delete + +### Response + +```json +{ + "deleted_keys": ["sk-kdEXbIqZRwEeEiHwdg7sFA"] +} +``` + ## Tracking Spend You can get spend for a key by using the `/key/info` endpoint. diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index e01f6f3a3..a12c01518 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -129,6 +129,7 @@ class GenerateKeyRequest(LiteLLMBase): config: Optional[dict] = {} spend: Optional[float] = 0 user_id: Optional[str] = None + team_id: Optional[str] = None max_parallel_requests: Optional[int] = None metadata: Optional[dict] = {} diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index afa0f0fe0..5d55a7161 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1078,6 +1078,7 @@ async def generate_key_helper_fn( max_budget: Optional[float] = None, token: Optional[str] = None, user_id: Optional[str] = None, + team_id: Optional[str] = None, user_email: Optional[str] = None, max_parallel_requests: Optional[int] = None, metadata: Optional[dict] = {}, @@ -1121,12 +1122,15 @@ async def generate_key_helper_fn( config_json = json.dumps(config) metadata_json = json.dumps(metadata) user_id = user_id or str(uuid.uuid4()) + if type(team_id) is not str: + team_id = str(team_id) try: # Create a new verification token (you may want to enhance this logic based on your needs) user_data = { "max_budget": max_budget, "user_email": user_email, "user_id": user_id, + "team_id": team_id, "spend": spend, "models": models, } @@ -1138,6 +1142,7 @@ async def generate_key_helper_fn( "config": config_json, "spend": spend, "user_id": user_id, + "team_id": team_id, "max_parallel_requests": max_parallel_requests, "metadata": metadata_json, } @@ -2084,6 +2089,7 @@ async def generate_key_fn( Parameters: - duration: Optional[str] - Specify the length of time the token is valid for. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"). **(Default is set to 1 hour.)** + - team_id: Optional[str] - The team id of the user - models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models) - aliases: Optional[dict] - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models - config: Optional[dict] - any key-specific configs, overrides config in config.yaml diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma index 2e40a3204..bbd9fec45 100644 --- a/litellm/proxy/schema.prisma +++ b/litellm/proxy/schema.prisma @@ -9,6 +9,7 @@ generator client { model LiteLLM_UserTable { user_id String @unique + team_id String? max_budget Float? spend Float @default(0.0) user_email String? @@ -24,6 +25,7 @@ model LiteLLM_VerificationToken { aliases Json @default("{}") config Json @default("{}") user_id String? + team_id String? max_parallel_requests Int? metadata Json @default("{}") } diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py index 5accd03c6..ae51e4e96 100644 --- a/litellm/tests/test_key_generate_prisma.py +++ b/litellm/tests/test_key_generate_prisma.py @@ -543,7 +543,8 @@ def test_generate_and_update_key(prisma_client): async def test(): await litellm.proxy.proxy_server.prisma_client.connect() request = NewUserRequest( - metadata={"team": "litellm-team3", "project": "litellm-project3"} + metadata={"team": "litellm-team3", "project": "litellm-project3"}, + team_id="litellm-core-infra@gmail.com", ) key = await new_user(request) print(key) @@ -560,6 +561,7 @@ def test_generate_and_update_key(prisma_client): "team": "litellm-team3", "project": "litellm-project3", } + assert result["info"].team_id == "litellm-core-infra@gmail.com" request = Request(scope={"type": "http"}) request._url = URL(url="/update/key") diff --git a/schema.prisma b/schema.prisma index 31eae05c2..7626989d7 100644 --- a/schema.prisma +++ b/schema.prisma @@ -9,6 +9,7 @@ generator client { model LiteLLM_UserTable { user_id String @unique + team_id String? max_budget Float? spend Float @default(0.0) user_email String? @@ -24,6 +25,7 @@ model LiteLLM_VerificationToken { aliases Json @default("{}") config Json @default("{}") user_id String? + team_id String? max_parallel_requests Int? metadata Json @default("{}") }