Merge pull request #1399 from BerriAI/litellm_default_use_gunicorn

LiteLLM Proxy - Use Gunicorn with Uvicorn workers
2024-01-10 21:46:04 +05:30 · 2024-01-10 21:46:04 +05:30 · 58d0366447
commit 58d0366447
parent 9a829ff956 fc9af5e900
7 changed files with 62 additions and 17 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -38,6 +38,7 @@ jobs:
            pip install openai
            pip install prisma            
            pip install "httpx==0.24.1"
+            pip install "gunicorn==21.2.0"
            pip install "anyio==3.7.1"
            pip install "asyncio==3.4.3"
            pip install "PyGithub==1.59.1"
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@ -42,7 +42,7 @@ def is_port_in_use(port):
@click.command()
@click.option("--host", default="0.0.0.0", help="Host for the server to listen on.")
@click.option("--port", default=8000, help="Port to bind the server to.")
-@click.option("--num_workers", default=1, help="Number of uvicorn workers to spin up")
+@click.option("--num_workers", default=1, help="Number of gunicorn workers to spin up")
@click.option("--api_base", default=None, help="API base URL.")
@click.option(
    "--api_version",
@ -344,9 +344,10 @@ def run_server(
        )
        try:
            import uvicorn
+            import gunicorn.app.base
        except:
            raise ImportError(
-                "Uvicorn needs to be imported. Run - `pip install uvicorn`"
+                "Uvicorn, gunicorn needs to be imported. Run - `pip 'litellm[proxy]'`"
            )
        if os.getenv("DATABASE_URL", None) is not None:
            # run prisma db push, before starting server
@ -364,9 +365,37 @@ def run_server(
                os.chdir(original_dir)
        if port == 8000 and is_port_in_use(port):
            port = random.randint(1024, 49152)
-        uvicorn.run(
-            "litellm.proxy.proxy_server:app", host=host, port=port, workers=num_workers
-        )
+
+        # Gunicorn Application Class
+        class StandaloneApplication(gunicorn.app.base.BaseApplication):
+            def __init__(self, app, options=None):
+                self.options = options or {}  # gunicorn options
+                self.application = app  # FastAPI app
+                super().__init__()
+
+            def load_config(self):
+                # note: This Loads the gunicorn config - has nothing to do with LiteLLM Proxy config
+                config = {
+                    key: value
+                    for key, value in self.options.items()
+                    if key in self.cfg.settings and value is not None
+                }
+                for key, value in config.items():
+                    self.cfg.set(key.lower(), value)
+
+            def load(self):
+                # gunicorn app function
+                return self.application
+
+        gunicorn_options = {
+            "bind": f"{host}:{port}",
+            "workers": num_workers,  # default is 1
+            "worker_class": "uvicorn.workers.UvicornWorker",
+            "preload": True,  # Add the preload flag
+        }
+        from litellm.proxy.proxy_server import app
+
+        StandaloneApplication(app=app, options=gunicorn_options).run()  # Run gunicorn


 if __name__ == "__main__":
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -41,13 +41,6 @@ model_list:
      api_key: os.environ/OPENAI_API_KEY
    model_info:
      mode: embedding
-  - model_name: text-davinci-003
-    litellm_params:
-      model: text-davinci-003
-      api_key: os.environ/OPENAI_API_KEY
-    model_info:
-      mode: completion
-
 litellm_settings:
  fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
  # cache: True     
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -14,7 +14,6 @@ sys.path.insert(
 )  # Adds the parent directory to the system path - for litellm local dev

 try:
-    import uvicorn
    import fastapi
    import backoff
    import yaml
--- a/poetry.lock
+++ b/poetry.lock
@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.

 [[package]]
 name = "aiohttp"
@ -670,6 +670,26 @@ gitdb = ">=4.0.1,<5"
 [package.extras]
 test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest", "pytest-cov", "pytest-instafail", "pytest-subtests", "pytest-sugar"]

+[[package]]
+name = "gunicorn"
+version = "21.2.0"
+description = "WSGI HTTP Server for UNIX"
+optional = true
+python-versions = ">=3.5"
+files = [
+    {file = "gunicorn-21.2.0-py3-none-any.whl", hash = "sha256:3213aa5e8c24949e792bcacfc176fef362e7aac80b76c56f6b5122bf350722f0"},
+    {file = "gunicorn-21.2.0.tar.gz", hash = "sha256:88ec8bff1d634f98e61b9f65bc4bf3cd918a90806c6f5c48bc5603849ec81033"},
+]
+
+[package.dependencies]
+packaging = "*"
+
+[package.extras]
+eventlet = ["eventlet (>=0.24.1)"]
+gevent = ["gevent (>=1.4.0)"]
+setproctitle = ["setproctitle"]
+tornado = ["tornado (>=0.2)"]
+
 [[package]]
 name = "h11"
 version = "0.14.0"
@ -1238,8 +1258,8 @@ files = [
 [package.dependencies]
 numpy = [
    {version = ">=1.20.3", markers = "python_version < \"3.10\""},
-    {version = ">=1.21.0", markers = "python_version >= \"3.10\""},
    {version = ">=1.23.2", markers = "python_version >= \"3.11\""},
+    {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""},
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
@ -2664,9 +2684,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p

 [extras]
 extra-proxy = ["streamlit"]
-proxy = ["backoff", "fastapi", "orjson", "pyyaml", "rq", "uvicorn"]
+proxy = ["backoff", "fastapi", "gunicorn", "orjson", "pyyaml", "rq", "uvicorn"]

 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<3.9.7 || >3.9.7"
-content-hash = "9f15083d98fe14237abea81eaca802e1db28cfb89bbe127498aa1fabb3c99849"
+content-hash = "b49d09f51e8a57cdf883ab03cd9fecaf1ad007c3092d53347e30129e25adceab"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -18,6 +18,7 @@ jinja2 = "^3.1.2"
 aiohttp = "*"

 uvicorn = {version = "^0.22.0", optional = true}
+gunicorn = {version = "^21.2.0", optional = true}
 fastapi = {version = "^0.104.1", optional = true}
 backoff = {version = "*", optional = true}
 pyyaml = {version = "^6.0", optional = true}
@ -27,6 +28,7 @@ streamlit = {version = "^1.29.0", optional = true}

 [tool.poetry.extras]
 proxy = [
+    "gunicorn",
    "uvicorn",
    "fastapi",
    "backoff",
--- a/requirements.txt
+++ b/requirements.txt
@ -6,6 +6,7 @@ pydantic>=2.5 # openai req.
 backoff==2.2.1 # server dep
 pyyaml==6.0 # server dep
 uvicorn==0.22.0 # server dep
+gunicorn==21.2.0 # server dep
 boto3==1.28.58 # aws bedrock/sagemaker calls
 redis==4.6.0 # caching
 prisma==0.11.0 # for db