fix(proxy_server.py): restrict model access for /v1/completions endpoint

This commit is contained in:
Krrish Dholakia 2024-02-16 11:49:59 -08:00
parent 691542ddf0
commit d97369dcbe
2 changed files with 10 additions and 5 deletions

View file

@ -965,15 +965,15 @@
},
"dolphin": {
"max_tokens": 4096,
"input_cost_per_token": 0.00002,
"output_cost_per_token": 0.00002,
"input_cost_per_token": 0.0000005,
"output_cost_per_token": 0.0000005,
"litellm_provider": "nlp_cloud",
"mode": "completion"
},
"chatdolphin": {
"max_tokens": 4096,
"input_cost_per_token": 0.00002,
"output_cost_per_token": 0.00002,
"input_cost_per_token": 0.0000005,
"output_cost_per_token": 0.0000005,
"litellm_provider": "nlp_cloud",
"mode": "chat"
},

View file

@ -2259,8 +2259,13 @@ async def completion(
response = await llm_router.atext_completion(
**data, specific_deployment=True
)
else: # router is not set
elif user_model is not None: # `litellm --model <your-model-name>`
response = await litellm.atext_completion(**data)
else:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"error": "Invalid model name passed in"},
)
if hasattr(response, "_hidden_params"):
model_id = response._hidden_params.get("model_id", None) or ""