(feat) proxy: use orjson

This commit is contained in:
ishaan-jaff 2023-11-30 19:50:47 -08:00
parent 853af29a25
commit 2d55cc753e
3 changed files with 11 additions and 4 deletions

View file

@ -17,6 +17,7 @@ try:
import backoff
import yaml
import rq
import orjson
except ImportError:
import sys
@ -32,6 +33,7 @@ except ImportError:
"backoff",
"pyyaml",
"rq"
"orjson"
]
)
import uvicorn
@ -39,6 +41,7 @@ except ImportError:
import appdirs
import backoff
import yaml
import orjson
warnings.warn(
"Installed runtime dependencies for proxy server. Specify these dependencies explicitly with `pip install litellm[proxy]`"
@ -780,8 +783,11 @@ async def chat_completion(request: Request, model: Optional[str] = None, user_ap
@router.post("/embeddings", dependencies=[Depends(user_api_key_auth)])
async def embeddings(request: Request, user_api_key_dict: dict = Depends(user_api_key_auth)):
try:
data = await request.json()
print_verbose(f"data: {data}")
# Use orjson to parse JSON data, orjson speeds up requests significantly
data_bytes = await request.body()
data = orjson.loads(data_bytes.decode('utf-8'))
data["model"] = (
general_settings.get("embedding_model", None) # server default
or user_model # model name passed via cli args

View file

@ -36,7 +36,7 @@ async def litellm_completion():
async def main():
start = time.time()
n = 10 # Number of concurrent tasks
n = 100 # Number of concurrent tasks
tasks = [litellm_completion() for _ in range(n)]
chat_completions = await asyncio.gather(*tasks)

View file

@ -29,7 +29,8 @@ proxy = [
"uvicorn",
"fastapi",
"backoff",
"rq"
"rq",
"orjson",
]
[tool.poetry.scripts]