forked from phoenix/litellm-mirror
(feat) proxy-pydantic,swagger for chat/completion
This commit is contained in:
parent
e77b7e5a50
commit
e8ae347681
1 changed files with 45 additions and 8 deletions
|
@ -113,6 +113,46 @@ app.add_middleware(
|
||||||
allow_headers=["*"],
|
allow_headers=["*"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
from typing import Dict
|
||||||
|
from pydantic import BaseModel, Extra
|
||||||
|
######### Request Class Definition ######
|
||||||
|
class ChatCompletionRequest(BaseModel):
|
||||||
|
model: str
|
||||||
|
messages: List[Dict[str, str]]
|
||||||
|
temperature: Optional[float] = None
|
||||||
|
top_p: Optional[float] = None
|
||||||
|
n: Optional[int] = None
|
||||||
|
stream: Optional[bool] = None
|
||||||
|
stop: Optional[List[str]] = None
|
||||||
|
max_tokens: Optional[float] = None
|
||||||
|
presence_penalty: Optional[float] = None
|
||||||
|
frequency_penalty: Optional[float] = None
|
||||||
|
logit_bias: Optional[Dict[str, float]] = None
|
||||||
|
user: Optional[str] = None
|
||||||
|
response_format: Optional[Dict[str, str]] = None
|
||||||
|
seed: Optional[int] = None
|
||||||
|
tools: Optional[List[str]] = None
|
||||||
|
tool_choice: Optional[str] = None
|
||||||
|
functions: List[str] = None # soon to be deprecated
|
||||||
|
function_call: Optional[str] = None # soon to be deprecated
|
||||||
|
|
||||||
|
# Optional LiteLLM params
|
||||||
|
caching: Optional[bool] = None
|
||||||
|
api_base: Optional[str] = None
|
||||||
|
api_version: Optional[str] = None
|
||||||
|
api_key: Optional[str] = None
|
||||||
|
num_retries: Optional[int] = None
|
||||||
|
context_window_fallback_dict: Optional[Dict[str, str]] = None
|
||||||
|
fallbacks: Optional[List[str]] = None
|
||||||
|
metadata: Optional[Dict[str, str]] = {}
|
||||||
|
deployment_id: Optional[str] = None
|
||||||
|
request_timeout: Optional[int] = None
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
extra='allow' # allow params not defined here, these fall in litellm.completion(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
user_api_base = None
|
user_api_base = None
|
||||||
user_model = None
|
user_model = None
|
||||||
user_debug = False
|
user_debug = False
|
||||||
|
@ -712,16 +752,13 @@ async def completion(request: Request, model: Optional[str] = None, user_api_key
|
||||||
@router.post("/v1/chat/completions", dependencies=[Depends(user_api_key_auth)], tags=["chat/completions"])
|
@router.post("/v1/chat/completions", dependencies=[Depends(user_api_key_auth)], tags=["chat/completions"])
|
||||||
@router.post("/chat/completions", dependencies=[Depends(user_api_key_auth)], tags=["chat/completions"])
|
@router.post("/chat/completions", dependencies=[Depends(user_api_key_auth)], tags=["chat/completions"])
|
||||||
@router.post("/openai/deployments/{model:path}/chat/completions", dependencies=[Depends(user_api_key_auth)], tags=["chat/completions"]) # azure compatible endpoint
|
@router.post("/openai/deployments/{model:path}/chat/completions", dependencies=[Depends(user_api_key_auth)], tags=["chat/completions"]) # azure compatible endpoint
|
||||||
async def chat_completion(request: Request, model: Optional[str] = None, user_api_key_dict: dict = Depends(user_api_key_auth)) -> litellm.ModelResponse:
|
async def chat_completion(request: ChatCompletionRequest, model: Optional[str] = None, user_api_key_dict: dict = Depends(user_api_key_auth)) -> litellm.ModelResponse:
|
||||||
global general_settings, user_debug
|
global general_settings, user_debug
|
||||||
try:
|
try:
|
||||||
data = {}
|
data = {}
|
||||||
body = await request.body()
|
request_items = request.model_dump()
|
||||||
body_str = body.decode()
|
data = {key: value for key, value in request_items.items() if value is not None} # pydantic sets all values to None, filter out None values here
|
||||||
try:
|
|
||||||
data = ast.literal_eval(body_str)
|
|
||||||
except:
|
|
||||||
data = json.loads(body_str)
|
|
||||||
print_verbose(f"receiving data: {data}")
|
print_verbose(f"receiving data: {data}")
|
||||||
data["model"] = (
|
data["model"] = (
|
||||||
general_settings.get("completion_model", None) # server default
|
general_settings.get("completion_model", None) # server default
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue