forked from phoenix/litellm-mirror
Merge pull request #1399 from BerriAI/litellm_default_use_gunicorn
LiteLLM Proxy - Use Gunicorn with Uvicorn workers
This commit is contained in:
commit
58d0366447
7 changed files with 62 additions and 17 deletions
|
@ -38,6 +38,7 @@ jobs:
|
||||||
pip install openai
|
pip install openai
|
||||||
pip install prisma
|
pip install prisma
|
||||||
pip install "httpx==0.24.1"
|
pip install "httpx==0.24.1"
|
||||||
|
pip install "gunicorn==21.2.0"
|
||||||
pip install "anyio==3.7.1"
|
pip install "anyio==3.7.1"
|
||||||
pip install "asyncio==3.4.3"
|
pip install "asyncio==3.4.3"
|
||||||
pip install "PyGithub==1.59.1"
|
pip install "PyGithub==1.59.1"
|
||||||
|
|
|
@ -42,7 +42,7 @@ def is_port_in_use(port):
|
||||||
@click.command()
|
@click.command()
|
||||||
@click.option("--host", default="0.0.0.0", help="Host for the server to listen on.")
|
@click.option("--host", default="0.0.0.0", help="Host for the server to listen on.")
|
||||||
@click.option("--port", default=8000, help="Port to bind the server to.")
|
@click.option("--port", default=8000, help="Port to bind the server to.")
|
||||||
@click.option("--num_workers", default=1, help="Number of uvicorn workers to spin up")
|
@click.option("--num_workers", default=1, help="Number of gunicorn workers to spin up")
|
||||||
@click.option("--api_base", default=None, help="API base URL.")
|
@click.option("--api_base", default=None, help="API base URL.")
|
||||||
@click.option(
|
@click.option(
|
||||||
"--api_version",
|
"--api_version",
|
||||||
|
@ -344,9 +344,10 @@ def run_server(
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
import gunicorn.app.base
|
||||||
except:
|
except:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"Uvicorn needs to be imported. Run - `pip install uvicorn`"
|
"Uvicorn, gunicorn needs to be imported. Run - `pip 'litellm[proxy]'`"
|
||||||
)
|
)
|
||||||
if os.getenv("DATABASE_URL", None) is not None:
|
if os.getenv("DATABASE_URL", None) is not None:
|
||||||
# run prisma db push, before starting server
|
# run prisma db push, before starting server
|
||||||
|
@ -364,9 +365,37 @@ def run_server(
|
||||||
os.chdir(original_dir)
|
os.chdir(original_dir)
|
||||||
if port == 8000 and is_port_in_use(port):
|
if port == 8000 and is_port_in_use(port):
|
||||||
port = random.randint(1024, 49152)
|
port = random.randint(1024, 49152)
|
||||||
uvicorn.run(
|
|
||||||
"litellm.proxy.proxy_server:app", host=host, port=port, workers=num_workers
|
# Gunicorn Application Class
|
||||||
)
|
class StandaloneApplication(gunicorn.app.base.BaseApplication):
|
||||||
|
def __init__(self, app, options=None):
|
||||||
|
self.options = options or {} # gunicorn options
|
||||||
|
self.application = app # FastAPI app
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def load_config(self):
|
||||||
|
# note: This Loads the gunicorn config - has nothing to do with LiteLLM Proxy config
|
||||||
|
config = {
|
||||||
|
key: value
|
||||||
|
for key, value in self.options.items()
|
||||||
|
if key in self.cfg.settings and value is not None
|
||||||
|
}
|
||||||
|
for key, value in config.items():
|
||||||
|
self.cfg.set(key.lower(), value)
|
||||||
|
|
||||||
|
def load(self):
|
||||||
|
# gunicorn app function
|
||||||
|
return self.application
|
||||||
|
|
||||||
|
gunicorn_options = {
|
||||||
|
"bind": f"{host}:{port}",
|
||||||
|
"workers": num_workers, # default is 1
|
||||||
|
"worker_class": "uvicorn.workers.UvicornWorker",
|
||||||
|
"preload": True, # Add the preload flag
|
||||||
|
}
|
||||||
|
from litellm.proxy.proxy_server import app
|
||||||
|
|
||||||
|
StandaloneApplication(app=app, options=gunicorn_options).run() # Run gunicorn
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -41,13 +41,6 @@ model_list:
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
model_info:
|
model_info:
|
||||||
mode: embedding
|
mode: embedding
|
||||||
- model_name: text-davinci-003
|
|
||||||
litellm_params:
|
|
||||||
model: text-davinci-003
|
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
|
||||||
model_info:
|
|
||||||
mode: completion
|
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
|
fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
|
||||||
# cache: True
|
# cache: True
|
||||||
|
|
|
@ -14,7 +14,6 @@ sys.path.insert(
|
||||||
) # Adds the parent directory to the system path - for litellm local dev
|
) # Adds the parent directory to the system path - for litellm local dev
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import uvicorn
|
|
||||||
import fastapi
|
import fastapi
|
||||||
import backoff
|
import backoff
|
||||||
import yaml
|
import yaml
|
||||||
|
|
28
poetry.lock
generated
28
poetry.lock
generated
|
@ -1,4 +1,4 @@
|
||||||
# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
|
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aiohttp"
|
name = "aiohttp"
|
||||||
|
@ -670,6 +670,26 @@ gitdb = ">=4.0.1,<5"
|
||||||
[package.extras]
|
[package.extras]
|
||||||
test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest", "pytest-cov", "pytest-instafail", "pytest-subtests", "pytest-sugar"]
|
test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest", "pytest-cov", "pytest-instafail", "pytest-subtests", "pytest-sugar"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "gunicorn"
|
||||||
|
version = "21.2.0"
|
||||||
|
description = "WSGI HTTP Server for UNIX"
|
||||||
|
optional = true
|
||||||
|
python-versions = ">=3.5"
|
||||||
|
files = [
|
||||||
|
{file = "gunicorn-21.2.0-py3-none-any.whl", hash = "sha256:3213aa5e8c24949e792bcacfc176fef362e7aac80b76c56f6b5122bf350722f0"},
|
||||||
|
{file = "gunicorn-21.2.0.tar.gz", hash = "sha256:88ec8bff1d634f98e61b9f65bc4bf3cd918a90806c6f5c48bc5603849ec81033"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
packaging = "*"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
eventlet = ["eventlet (>=0.24.1)"]
|
||||||
|
gevent = ["gevent (>=1.4.0)"]
|
||||||
|
setproctitle = ["setproctitle"]
|
||||||
|
tornado = ["tornado (>=0.2)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "h11"
|
name = "h11"
|
||||||
version = "0.14.0"
|
version = "0.14.0"
|
||||||
|
@ -1238,8 +1258,8 @@ files = [
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
numpy = [
|
numpy = [
|
||||||
{version = ">=1.20.3", markers = "python_version < \"3.10\""},
|
{version = ">=1.20.3", markers = "python_version < \"3.10\""},
|
||||||
{version = ">=1.21.0", markers = "python_version >= \"3.10\""},
|
|
||||||
{version = ">=1.23.2", markers = "python_version >= \"3.11\""},
|
{version = ">=1.23.2", markers = "python_version >= \"3.11\""},
|
||||||
|
{version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""},
|
||||||
]
|
]
|
||||||
python-dateutil = ">=2.8.2"
|
python-dateutil = ">=2.8.2"
|
||||||
pytz = ">=2020.1"
|
pytz = ">=2020.1"
|
||||||
|
@ -2664,9 +2684,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
||||||
|
|
||||||
[extras]
|
[extras]
|
||||||
extra-proxy = ["streamlit"]
|
extra-proxy = ["streamlit"]
|
||||||
proxy = ["backoff", "fastapi", "orjson", "pyyaml", "rq", "uvicorn"]
|
proxy = ["backoff", "fastapi", "gunicorn", "orjson", "pyyaml", "rq", "uvicorn"]
|
||||||
|
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.8.1,<3.9.7 || >3.9.7"
|
python-versions = ">=3.8.1,<3.9.7 || >3.9.7"
|
||||||
content-hash = "9f15083d98fe14237abea81eaca802e1db28cfb89bbe127498aa1fabb3c99849"
|
content-hash = "b49d09f51e8a57cdf883ab03cd9fecaf1ad007c3092d53347e30129e25adceab"
|
||||||
|
|
|
@ -18,6 +18,7 @@ jinja2 = "^3.1.2"
|
||||||
aiohttp = "*"
|
aiohttp = "*"
|
||||||
|
|
||||||
uvicorn = {version = "^0.22.0", optional = true}
|
uvicorn = {version = "^0.22.0", optional = true}
|
||||||
|
gunicorn = {version = "^21.2.0", optional = true}
|
||||||
fastapi = {version = "^0.104.1", optional = true}
|
fastapi = {version = "^0.104.1", optional = true}
|
||||||
backoff = {version = "*", optional = true}
|
backoff = {version = "*", optional = true}
|
||||||
pyyaml = {version = "^6.0", optional = true}
|
pyyaml = {version = "^6.0", optional = true}
|
||||||
|
@ -27,6 +28,7 @@ streamlit = {version = "^1.29.0", optional = true}
|
||||||
|
|
||||||
[tool.poetry.extras]
|
[tool.poetry.extras]
|
||||||
proxy = [
|
proxy = [
|
||||||
|
"gunicorn",
|
||||||
"uvicorn",
|
"uvicorn",
|
||||||
"fastapi",
|
"fastapi",
|
||||||
"backoff",
|
"backoff",
|
||||||
|
|
|
@ -6,6 +6,7 @@ pydantic>=2.5 # openai req.
|
||||||
backoff==2.2.1 # server dep
|
backoff==2.2.1 # server dep
|
||||||
pyyaml==6.0 # server dep
|
pyyaml==6.0 # server dep
|
||||||
uvicorn==0.22.0 # server dep
|
uvicorn==0.22.0 # server dep
|
||||||
|
gunicorn==21.2.0 # server dep
|
||||||
boto3==1.28.58 # aws bedrock/sagemaker calls
|
boto3==1.28.58 # aws bedrock/sagemaker calls
|
||||||
redis==4.6.0 # caching
|
redis==4.6.0 # caching
|
||||||
prisma==0.11.0 # for db
|
prisma==0.11.0 # for db
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue