(feat) proxy: use orjson

This commit is contained in:
ishaan-jaff 2023-11-30 19:50:47 -08:00
parent 853af29a25
commit 2d55cc753e
3 changed files with 11 additions and 4 deletions

View file

@ -17,6 +17,7 @@ try:
import backoff import backoff
import yaml import yaml
import rq import rq
import orjson
except ImportError: except ImportError:
import sys import sys
@ -32,6 +33,7 @@ except ImportError:
"backoff", "backoff",
"pyyaml", "pyyaml",
"rq" "rq"
"orjson"
] ]
) )
import uvicorn import uvicorn
@ -39,6 +41,7 @@ except ImportError:
import appdirs import appdirs
import backoff import backoff
import yaml import yaml
import orjson
warnings.warn( warnings.warn(
"Installed runtime dependencies for proxy server. Specify these dependencies explicitly with `pip install litellm[proxy]`" "Installed runtime dependencies for proxy server. Specify these dependencies explicitly with `pip install litellm[proxy]`"
@ -780,8 +783,11 @@ async def chat_completion(request: Request, model: Optional[str] = None, user_ap
@router.post("/embeddings", dependencies=[Depends(user_api_key_auth)]) @router.post("/embeddings", dependencies=[Depends(user_api_key_auth)])
async def embeddings(request: Request, user_api_key_dict: dict = Depends(user_api_key_auth)): async def embeddings(request: Request, user_api_key_dict: dict = Depends(user_api_key_auth)):
try: try:
data = await request.json()
print_verbose(f"data: {data}") # Use orjson to parse JSON data, orjson speeds up requests significantly
data_bytes = await request.body()
data = orjson.loads(data_bytes.decode('utf-8'))
data["model"] = ( data["model"] = (
general_settings.get("embedding_model", None) # server default general_settings.get("embedding_model", None) # server default
or user_model # model name passed via cli args or user_model # model name passed via cli args

View file

@ -36,7 +36,7 @@ async def litellm_completion():
async def main(): async def main():
start = time.time() start = time.time()
n = 10 # Number of concurrent tasks n = 100 # Number of concurrent tasks
tasks = [litellm_completion() for _ in range(n)] tasks = [litellm_completion() for _ in range(n)]
chat_completions = await asyncio.gather(*tasks) chat_completions = await asyncio.gather(*tasks)

View file

@ -29,7 +29,8 @@ proxy = [
"uvicorn", "uvicorn",
"fastapi", "fastapi",
"backoff", "backoff",
"rq" "rq",
"orjson",
] ]
[tool.poetry.scripts] [tool.poetry.scripts]