diff --git a/.circleci/config.yml b/.circleci/config.yml index 3ea6b7fca..26d714f9d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -38,6 +38,7 @@ jobs: pip install openai pip install prisma pip install "httpx==0.24.1" + pip install "gunicorn==21.2.0" pip install "anyio==3.7.1" pip install "asyncio==3.4.3" pip install "PyGithub==1.59.1" diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 0150cfe44..5d0dce8e9 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -42,7 +42,7 @@ def is_port_in_use(port): @click.command() @click.option("--host", default="0.0.0.0", help="Host for the server to listen on.") @click.option("--port", default=8000, help="Port to bind the server to.") -@click.option("--num_workers", default=1, help="Number of uvicorn workers to spin up") +@click.option("--num_workers", default=1, help="Number of gunicorn workers to spin up") @click.option("--api_base", default=None, help="API base URL.") @click.option( "--api_version", @@ -344,9 +344,10 @@ def run_server( ) try: import uvicorn + import gunicorn.app.base except: raise ImportError( - "Uvicorn needs to be imported. Run - `pip install uvicorn`" + "Uvicorn, gunicorn needs to be imported. Run - `pip 'litellm[proxy]'`" ) if os.getenv("DATABASE_URL", None) is not None: # run prisma db push, before starting server @@ -364,9 +365,37 @@ def run_server( os.chdir(original_dir) if port == 8000 and is_port_in_use(port): port = random.randint(1024, 49152) - uvicorn.run( - "litellm.proxy.proxy_server:app", host=host, port=port, workers=num_workers - ) + + # Gunicorn Application Class + class StandaloneApplication(gunicorn.app.base.BaseApplication): + def __init__(self, app, options=None): + self.options = options or {} # gunicorn options + self.application = app # FastAPI app + super().__init__() + + def load_config(self): + # note: This Loads the gunicorn config - has nothing to do with LiteLLM Proxy config + config = { + key: value + for key, value in self.options.items() + if key in self.cfg.settings and value is not None + } + for key, value in config.items(): + self.cfg.set(key.lower(), value) + + def load(self): + # gunicorn app function + return self.application + + gunicorn_options = { + "bind": f"{host}:{port}", + "workers": num_workers, # default is 1 + "worker_class": "uvicorn.workers.UvicornWorker", + "preload": True, # Add the preload flag + } + from litellm.proxy.proxy_server import app + + StandaloneApplication(app=app, options=gunicorn_options).run() # Run gunicorn if __name__ == "__main__": diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index e461820fe..81374c8ee 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -41,13 +41,6 @@ model_list: api_key: os.environ/OPENAI_API_KEY model_info: mode: embedding - - model_name: text-davinci-003 - litellm_params: - model: text-davinci-003 - api_key: os.environ/OPENAI_API_KEY - model_info: - mode: completion - litellm_settings: fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}] # cache: True diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index e93c9baf1..a1390688e 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -14,7 +14,6 @@ sys.path.insert( ) # Adds the parent directory to the system path - for litellm local dev try: - import uvicorn import fastapi import backoff import yaml diff --git a/poetry.lock b/poetry.lock index 0d1b737c4..24673701a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -670,6 +670,26 @@ gitdb = ">=4.0.1,<5" [package.extras] test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest", "pytest-cov", "pytest-instafail", "pytest-subtests", "pytest-sugar"] +[[package]] +name = "gunicorn" +version = "21.2.0" +description = "WSGI HTTP Server for UNIX" +optional = true +python-versions = ">=3.5" +files = [ + {file = "gunicorn-21.2.0-py3-none-any.whl", hash = "sha256:3213aa5e8c24949e792bcacfc176fef362e7aac80b76c56f6b5122bf350722f0"}, + {file = "gunicorn-21.2.0.tar.gz", hash = "sha256:88ec8bff1d634f98e61b9f65bc4bf3cd918a90806c6f5c48bc5603849ec81033"}, +] + +[package.dependencies] +packaging = "*" + +[package.extras] +eventlet = ["eventlet (>=0.24.1)"] +gevent = ["gevent (>=1.4.0)"] +setproctitle = ["setproctitle"] +tornado = ["tornado (>=0.2)"] + [[package]] name = "h11" version = "0.14.0" @@ -1238,8 +1258,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -2664,9 +2684,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [extras] extra-proxy = ["streamlit"] -proxy = ["backoff", "fastapi", "orjson", "pyyaml", "rq", "uvicorn"] +proxy = ["backoff", "fastapi", "gunicorn", "orjson", "pyyaml", "rq", "uvicorn"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.9.7 || >3.9.7" -content-hash = "9f15083d98fe14237abea81eaca802e1db28cfb89bbe127498aa1fabb3c99849" +content-hash = "b49d09f51e8a57cdf883ab03cd9fecaf1ad007c3092d53347e30129e25adceab" diff --git a/pyproject.toml b/pyproject.toml index 5dd615325..af738d509 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ jinja2 = "^3.1.2" aiohttp = "*" uvicorn = {version = "^0.22.0", optional = true} +gunicorn = {version = "^21.2.0", optional = true} fastapi = {version = "^0.104.1", optional = true} backoff = {version = "*", optional = true} pyyaml = {version = "^6.0", optional = true} @@ -27,6 +28,7 @@ streamlit = {version = "^1.29.0", optional = true} [tool.poetry.extras] proxy = [ + "gunicorn", "uvicorn", "fastapi", "backoff", diff --git a/requirements.txt b/requirements.txt index 8dbf49ef9..6ee965bdd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ pydantic>=2.5 # openai req. backoff==2.2.1 # server dep pyyaml==6.0 # server dep uvicorn==0.22.0 # server dep +gunicorn==21.2.0 # server dep boto3==1.28.58 # aws bedrock/sagemaker calls redis==4.6.0 # caching prisma==0.11.0 # for db