forked from phoenix/litellm-mirror
(feat) proxy: use orjson
This commit is contained in:
parent
853af29a25
commit
2d55cc753e
3 changed files with 11 additions and 4 deletions
|
@ -17,6 +17,7 @@ try:
|
|||
import backoff
|
||||
import yaml
|
||||
import rq
|
||||
import orjson
|
||||
except ImportError:
|
||||
import sys
|
||||
|
||||
|
@ -32,6 +33,7 @@ except ImportError:
|
|||
"backoff",
|
||||
"pyyaml",
|
||||
"rq"
|
||||
"orjson"
|
||||
]
|
||||
)
|
||||
import uvicorn
|
||||
|
@ -39,6 +41,7 @@ except ImportError:
|
|||
import appdirs
|
||||
import backoff
|
||||
import yaml
|
||||
import orjson
|
||||
|
||||
warnings.warn(
|
||||
"Installed runtime dependencies for proxy server. Specify these dependencies explicitly with `pip install litellm[proxy]`"
|
||||
|
@ -780,8 +783,11 @@ async def chat_completion(request: Request, model: Optional[str] = None, user_ap
|
|||
@router.post("/embeddings", dependencies=[Depends(user_api_key_auth)])
|
||||
async def embeddings(request: Request, user_api_key_dict: dict = Depends(user_api_key_auth)):
|
||||
try:
|
||||
data = await request.json()
|
||||
print_verbose(f"data: {data}")
|
||||
|
||||
# Use orjson to parse JSON data, orjson speeds up requests significantly
|
||||
data_bytes = await request.body()
|
||||
data = orjson.loads(data_bytes.decode('utf-8'))
|
||||
|
||||
data["model"] = (
|
||||
general_settings.get("embedding_model", None) # server default
|
||||
or user_model # model name passed via cli args
|
||||
|
|
|
@ -36,7 +36,7 @@ async def litellm_completion():
|
|||
|
||||
async def main():
|
||||
start = time.time()
|
||||
n = 10 # Number of concurrent tasks
|
||||
n = 100 # Number of concurrent tasks
|
||||
tasks = [litellm_completion() for _ in range(n)]
|
||||
|
||||
chat_completions = await asyncio.gather(*tasks)
|
||||
|
|
|
@ -29,7 +29,8 @@ proxy = [
|
|||
"uvicorn",
|
||||
"fastapi",
|
||||
"backoff",
|
||||
"rq"
|
||||
"rq",
|
||||
"orjson",
|
||||
]
|
||||
|
||||
[tool.poetry.scripts]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue