From c7fe33202dd4aacf03c84f84c4274aaafab38383 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 10 Jan 2024 16:29:38 +0530 Subject: [PATCH 01/10] v0 --- litellm/proxy/proxy_cli.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index b154b21e1..e9f1c1fb4 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -366,16 +366,37 @@ def run_server( use_queue=use_queue, ) try: - import uvicorn + import gunicorn except: raise ImportError( "Uvicorn needs to be imported. Run - `pip install uvicorn`" ) if port == 8000 and is_port_in_use(port): port = random.randint(1024, 49152) - uvicorn.run( - "litellm.proxy.proxy_server:app", host=host, port=port, workers=num_workers - ) + + from gunicorn.app.base import BaseApplication + + class StandaloneApplication(BaseApplication): + def __init__(self, app, options=None): + self.options = options or {} + self.application = app + super().__init__() + + def load_config(self): + for key, value in self.options.items(): + self.cfg.set(key, value) + + def load(self): + return self.application + + num_workers = 4 # Set the desired number of Gunicorn workers + host = "0.0.0.0" + gunicorn_options = { + "bind": f"{host}:{port}", + "workers": num_workers, + } + + StandaloneApplication(app, gunicorn_options).run() if __name__ == "__main__": From 5136d5980ffc3d5ef1c1ebf004702056cae635d9 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 10 Jan 2024 17:09:03 +0530 Subject: [PATCH 02/10] (fix) use gunicorn to start proxt --- litellm/proxy/proxy_cli.py | 25 +++++++++++++++++-------- litellm/proxy/proxy_config.yaml | 7 ------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index e9f1c1fb4..3a242bd3c 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -366,37 +366,46 @@ def run_server( use_queue=use_queue, ) try: - import gunicorn + import uvicorn except: raise ImportError( "Uvicorn needs to be imported. Run - `pip install uvicorn`" ) if port == 8000 and is_port_in_use(port): port = random.randint(1024, 49152) + # uvicorn.run( + # "litellm.proxy.proxy_server:app", host=host, port=port, workers=num_workers + # ) - from gunicorn.app.base import BaseApplication + import gunicorn.app.base - class StandaloneApplication(BaseApplication): + class StandaloneApplication(gunicorn.app.base.BaseApplication): def __init__(self, app, options=None): self.options = options or {} self.application = app super().__init__() def load_config(self): - for key, value in self.options.items(): - self.cfg.set(key, value) + config = { + key: value + for key, value in self.options.items() + if key in self.cfg.settings and value is not None + } + for key, value in config.items(): + self.cfg.set(key.lower(), value) def load(self): return self.application - num_workers = 4 # Set the desired number of Gunicorn workers - host = "0.0.0.0" gunicorn_options = { "bind": f"{host}:{port}", "workers": num_workers, + "worker_class": "uvicorn.workers.UvicornWorker", + "preload": True, # Add the preload flag } + from litellm.proxy.proxy_server import app - StandaloneApplication(app, gunicorn_options).run() + StandaloneApplication(app=app, options=gunicorn_options).run() if __name__ == "__main__": diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index e461820fe..81374c8ee 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -41,13 +41,6 @@ model_list: api_key: os.environ/OPENAI_API_KEY model_info: mode: embedding - - model_name: text-davinci-003 - litellm_params: - model: text-davinci-003 - api_key: os.environ/OPENAI_API_KEY - model_info: - mode: completion - litellm_settings: fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}] # cache: True From 873965df226eb11adb168150debd57faca21ff97 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 10 Jan 2024 17:39:05 +0530 Subject: [PATCH 03/10] (chore) remove old uvicorn logic --- litellm/proxy/proxy_cli.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 3a242bd3c..2b411738c 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -65,7 +65,7 @@ def is_port_in_use(port): @click.command() @click.option("--host", default="0.0.0.0", help="Host for the server to listen on.") @click.option("--port", default=8000, help="Port to bind the server to.") -@click.option("--num_workers", default=1, help="Number of uvicorn workers to spin up") +@click.option("--num_workers", default=1, help="Number of gunicorn workers to spin up") @click.option("--api_base", default=None, help="API base URL.") @click.option( "--api_version", @@ -373,9 +373,6 @@ def run_server( ) if port == 8000 and is_port_in_use(port): port = random.randint(1024, 49152) - # uvicorn.run( - # "litellm.proxy.proxy_server:app", host=host, port=port, workers=num_workers - # ) import gunicorn.app.base From 1276112119e8db8137d55c62c3c9524979701cf5 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 10 Jan 2024 17:47:24 +0530 Subject: [PATCH 04/10] (feat) add gunicorn as a dep --- pyproject.toml | 2 ++ requirements.txt | 1 + 2 files changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index ae8a8306b..73a07bc02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ jinja2 = "^3.1.2" aiohttp = "*" uvicorn = {version = "^0.22.0", optional = true} +gunicorn = {version = "^21.2.0", optional = true} fastapi = {version = "^0.104.1", optional = true} backoff = {version = "*", optional = true} pyyaml = {version = "^6.0", optional = true} @@ -27,6 +28,7 @@ streamlit = {version = "^1.29.0", optional = true} [tool.poetry.extras] proxy = [ + "gunicorn", "uvicorn", "fastapi", "backoff", diff --git a/requirements.txt b/requirements.txt index 8dbf49ef9..6ee965bdd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ pydantic>=2.5 # openai req. backoff==2.2.1 # server dep pyyaml==6.0 # server dep uvicorn==0.22.0 # server dep +gunicorn==21.2.0 # server dep boto3==1.28.58 # aws bedrock/sagemaker calls redis==4.6.0 # caching prisma==0.11.0 # for db From 67dc9adc71a30183c758138a1318d94e942a3ff3 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 10 Jan 2024 17:47:34 +0530 Subject: [PATCH 05/10] (fix) import gunicorn --- litellm/proxy/proxy_cli.py | 5 ++--- litellm/proxy/proxy_server.py | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 2b411738c..09b41034d 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -367,15 +367,14 @@ def run_server( ) try: import uvicorn + import gunicorn.app.base except: raise ImportError( - "Uvicorn needs to be imported. Run - `pip install uvicorn`" + "Uvicorn, gunicorn needs to be imported. Run - `pip 'litellm[proxy]'`" ) if port == 8000 and is_port_in_use(port): port = random.randint(1024, 49152) - import gunicorn.app.base - class StandaloneApplication(gunicorn.app.base.BaseApplication): def __init__(self, app, options=None): self.options = options or {} diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index e93c9baf1..a1390688e 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -14,7 +14,6 @@ sys.path.insert( ) # Adds the parent directory to the system path - for litellm local dev try: - import uvicorn import fastapi import backoff import yaml From 2b9174c3d7b058380c03f276ffdd9290000ba83a Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 10 Jan 2024 17:50:51 +0530 Subject: [PATCH 06/10] (feat) add comments on starting with gunicorn --- litellm/proxy/proxy_cli.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 09b41034d..3773053cb 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -375,13 +375,15 @@ def run_server( if port == 8000 and is_port_in_use(port): port = random.randint(1024, 49152) + # Gunicorn Application Class class StandaloneApplication(gunicorn.app.base.BaseApplication): def __init__(self, app, options=None): - self.options = options or {} - self.application = app + self.options = options or {} # gunicorn options + self.application = app # FastAPI app super().__init__() def load_config(self): + # note: This Loads the gunicorn config - has nothing to do with LiteLLM Proxy config config = { key: value for key, value in self.options.items() @@ -391,17 +393,18 @@ def run_server( self.cfg.set(key.lower(), value) def load(self): + # gunicorn app function return self.application gunicorn_options = { "bind": f"{host}:{port}", - "workers": num_workers, + "workers": num_workers, # default is 1 "worker_class": "uvicorn.workers.UvicornWorker", "preload": True, # Add the preload flag } from litellm.proxy.proxy_server import app - StandaloneApplication(app=app, options=gunicorn_options).run() + StandaloneApplication(app=app, options=gunicorn_options).run() # Run gunicorn if __name__ == "__main__": From dd3dabc979b79df7fa7600c0e66ebebcb8054565 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 10 Jan 2024 17:57:36 +0530 Subject: [PATCH 07/10] (fix) use litellm entrypoint --- Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3b701cd49..270c3b736 100644 --- a/Dockerfile +++ b/Dockerfile @@ -51,8 +51,6 @@ RUN chmod +x entrypoint.sh EXPOSE 4000/tcp -# Set your entrypoint and command - if user wants to use Prisma Database -ENTRYPOINT ["sh", "-c", "[ -n \"$DATABASE_URL\" ] && ./entrypoint.sh"] # this allows accepting litellm args -CMD ["litellm", "--port", "4000"] \ No newline at end of file +ENTRYPOINT ["litellm", "--port", "4000"] \ No newline at end of file From 4d8d58f0c9237911a47359892e0f542be4fc67a5 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 10 Jan 2024 18:09:59 +0530 Subject: [PATCH 08/10] (test) temp - comment out deployed proxy keygen test --- .circleci/config.yml | 1 + litellm/tests/test_deployed_proxy_keygen.py | 98 ++++++++++----------- 2 files changed, 50 insertions(+), 49 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 56f9a15fe..8a155a0d9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -38,6 +38,7 @@ jobs: pip install openai pip install prisma pip install "httpx==0.24.1" + pip install "gunicorn==21.2.0" pip install "anyio==3.7.1" pip install "asyncio==3.4.3" - save_cache: diff --git a/litellm/tests/test_deployed_proxy_keygen.py b/litellm/tests/test_deployed_proxy_keygen.py index e62760943..e0acee083 100644 --- a/litellm/tests/test_deployed_proxy_keygen.py +++ b/litellm/tests/test_deployed_proxy_keygen.py @@ -1,63 +1,63 @@ -import sys, os, time -import traceback -from dotenv import load_dotenv +# import sys, os, time +# import traceback +# from dotenv import load_dotenv -load_dotenv() -import os, io +# load_dotenv() +# import os, io -# this file is to test litellm/proxy +# # this file is to test litellm/proxy -sys.path.insert( - 0, os.path.abspath("../..") -) # Adds the parent directory to the system path -import pytest, logging, requests -import litellm -from litellm import embedding, completion, completion_cost, Timeout -from litellm import RateLimitError +# sys.path.insert( +# 0, os.path.abspath("../..") +# ) # Adds the parent directory to the system path +# import pytest, logging, requests +# import litellm +# from litellm import embedding, completion, completion_cost, Timeout +# from litellm import RateLimitError -def test_add_new_key(): - max_retries = 3 - retry_delay = 1 # seconds +# def test_add_new_key(): +# max_retries = 3 +# retry_delay = 1 # seconds - for retry in range(max_retries + 1): - try: - # Your test data - test_data = { - "models": ["gpt-3.5-turbo", "gpt-4", "claude-2", "azure-model"], - "aliases": {"mistral-7b": "gpt-3.5-turbo"}, - "duration": "20m", - } - print("testing proxy server") +# for retry in range(max_retries + 1): +# try: +# # Your test data +# test_data = { +# "models": ["gpt-3.5-turbo", "gpt-4", "claude-2", "azure-model"], +# "aliases": {"mistral-7b": "gpt-3.5-turbo"}, +# "duration": "20m", +# } +# print("testing proxy server") - # Your bearer token - token = os.getenv("PROXY_MASTER_KEY") - headers = {"Authorization": f"Bearer {token}"} +# # Your bearer token +# token = os.getenv("PROXY_MASTER_KEY") +# headers = {"Authorization": f"Bearer {token}"} - staging_endpoint = "https://litellm-litellm-pr-1376.up.railway.app" - main_endpoint = "https://litellm-staging.up.railway.app" +# staging_endpoint = "https://litellm-litellm-pr-1376.up.railway.app" +# main_endpoint = "https://litellm-staging.up.railway.app" - # Make a request to the staging endpoint - response = requests.post( - main_endpoint + "/key/generate", json=test_data, headers=headers - ) +# # Make a request to the staging endpoint +# response = requests.post( +# main_endpoint + "/key/generate", json=test_data, headers=headers +# ) - print(f"response: {response.text}") +# print(f"response: {response.text}") - if response.status_code == 200: - result = response.json() - break # Successful response, exit the loop - elif response.status_code == 503 and retry < max_retries: - print( - f"Retrying in {retry_delay} seconds... (Retry {retry + 1}/{max_retries})" - ) - time.sleep(retry_delay) - else: - assert False, f"Unexpected response status code: {response.status_code}" +# if response.status_code == 200: +# result = response.json() +# break # Successful response, exit the loop +# elif response.status_code == 503 and retry < max_retries: +# print( +# f"Retrying in {retry_delay} seconds... (Retry {retry + 1}/{max_retries})" +# ) +# time.sleep(retry_delay) +# else: +# assert False, f"Unexpected response status code: {response.status_code}" - except Exception as e: - print(traceback.format_exc()) - pytest.fail(f"An error occurred {e}") +# except Exception as e: +# print(traceback.format_exc()) +# pytest.fail(f"An error occurred {e}") -test_add_new_key() +# test_add_new_key() From 9bd9ff1038ce951b064a04dd5797097952ab2f92 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 10 Jan 2024 18:12:54 +0530 Subject: [PATCH 09/10] (fix) add gunicorn to poetry lock --- poetry.lock | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0d1b737c4..24673701a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -670,6 +670,26 @@ gitdb = ">=4.0.1,<5" [package.extras] test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest", "pytest-cov", "pytest-instafail", "pytest-subtests", "pytest-sugar"] +[[package]] +name = "gunicorn" +version = "21.2.0" +description = "WSGI HTTP Server for UNIX" +optional = true +python-versions = ">=3.5" +files = [ + {file = "gunicorn-21.2.0-py3-none-any.whl", hash = "sha256:3213aa5e8c24949e792bcacfc176fef362e7aac80b76c56f6b5122bf350722f0"}, + {file = "gunicorn-21.2.0.tar.gz", hash = "sha256:88ec8bff1d634f98e61b9f65bc4bf3cd918a90806c6f5c48bc5603849ec81033"}, +] + +[package.dependencies] +packaging = "*" + +[package.extras] +eventlet = ["eventlet (>=0.24.1)"] +gevent = ["gevent (>=1.4.0)"] +setproctitle = ["setproctitle"] +tornado = ["tornado (>=0.2)"] + [[package]] name = "h11" version = "0.14.0" @@ -1238,8 +1258,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -2664,9 +2684,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [extras] extra-proxy = ["streamlit"] -proxy = ["backoff", "fastapi", "orjson", "pyyaml", "rq", "uvicorn"] +proxy = ["backoff", "fastapi", "gunicorn", "orjson", "pyyaml", "rq", "uvicorn"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.9.7 || >3.9.7" -content-hash = "9f15083d98fe14237abea81eaca802e1db28cfb89bbe127498aa1fabb3c99849" +content-hash = "b49d09f51e8a57cdf883ab03cd9fecaf1ad007c3092d53347e30129e25adceab" From fc9af5e90058357f76f2db9eaa4b2cfccd996b72 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 10 Jan 2024 21:36:31 +0530 Subject: [PATCH 10/10] (fix) use Dockerfile from main --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 270c3b736..3b701cd49 100644 --- a/Dockerfile +++ b/Dockerfile @@ -51,6 +51,8 @@ RUN chmod +x entrypoint.sh EXPOSE 4000/tcp +# Set your entrypoint and command - if user wants to use Prisma Database +ENTRYPOINT ["sh", "-c", "[ -n \"$DATABASE_URL\" ] && ./entrypoint.sh"] # this allows accepting litellm args -ENTRYPOINT ["litellm", "--port", "4000"] \ No newline at end of file +CMD ["litellm", "--port", "4000"] \ No newline at end of file