From 8b8e5890e32061b8b8f4c003f095a781b0b84b76 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 21 Nov 2023 17:20:24 -0800 Subject: [PATCH] (docs) routing --- docs/my-website/docs/routing.md | 78 ++++++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 15 deletions(-) diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md index 4c5928cccb..1441016a62 100644 --- a/docs/my-website/docs/routing.md +++ b/docs/my-website/docs/routing.md @@ -250,37 +250,85 @@ Queue your LLM API requests to ensure you're under your rate limits - Step 1: Create a `/queue/reques` request - Step 2: Poll your request, to check if it's completed -### Step 1: Queue a `/chat/completion` request +## Step 1 Add a config to the proxy, generate a temp key ```python import requests -# args to litellm.completion() -data = { - 'model': 'gpt-3.5-turbo', - 'messages': [ - {'role': 'system', 'content': f'You are a helpful assistant. What llm are you?'}, - ], +import time +config = { + } -response = requests.post("http://0.0.0.0:8000/queue/request", json=data) -response = response.json() -polling_url = response["url"] + +response = requests.post( + url = "http://0.0.0.0:8000/key/generate", + json={ + "config": config, + "duration": "30d" # default to 30d, set it to 30m if you want a temp key + }, + headers={ + "Authorization": "Bearer sk-hosted-litellm" + } +) + +print("\nresponse from generating key", response.json()) + +generated_key = response.json()["key"] +print("\ngenerated key for proxy", generated_key) ``` -### Step 2: Poll your `/chat/completion` request +# Step 2: Queue a request to the proxy, using your generated_key ```python - while True: +job_response = requests.post( + url = "http://0.0.0.0:8000/queue/request", + json={ + 'model': 'gpt-3.5-turbo', + 'messages': [ + {'role': 'system', 'content': f'You are a helpful assistant. What is your name'}, + ], + }, + headers={ + "Authorization": f"Bearer {generated_key}" + } +) + +job_response = job_response.json() +job_id = job_response["id"] +polling_url = job_response["url"] +polling_url = f"http://0.0.0.0:8000{polling_url}" +print("\nCreated Job, Polling Url", polling_url) +``` + +# Step 3: Poll the request +```python +while True: try: - polling_url = f"http://0.0.0.0:8000{polling_url}" - polling_response = requests.get(polling_url) + print("\nPolling URL", polling_url) + polling_response = requests.get( + url=polling_url, + headers={ + "Authorization": f"Bearer {generated_key}" + } + ) polling_response = polling_response.json() - print("\n RESPONSE FROM POLLING JOB", polling_response) + print("\nResponse from polling url", polling_response) status = polling_response["status"] if status == "finished": llm_response = polling_response["result"] + print("LLM Response") print(llm_response) + break + time.sleep(0.5) + except Exception as e: + print("got exception in polling", e) + break + ``` + + + +