diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index ac1e49fbd..886d3dcfc 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -126,7 +126,7 @@ class ProxyChatCompletionRequest(BaseModel): n: Optional[int] = None stream: Optional[bool] = None stop: Optional[List[str]] = None - max_tokens: Optional[float] = None + max_tokens: Optional[Union[float, int]] = None presence_penalty: Optional[float] = None frequency_penalty: Optional[float] = None logit_bias: Optional[Dict[str, float]] = None @@ -752,7 +752,7 @@ async def completion(request: Request, model: Optional[str] = None, user_api_key @router.post("/v1/chat/completions", dependencies=[Depends(user_api_key_auth)], tags=["chat/completions"]) @router.post("/chat/completions", dependencies=[Depends(user_api_key_auth)], tags=["chat/completions"]) @router.post("/openai/deployments/{model:path}/chat/completions", dependencies=[Depends(user_api_key_auth)], tags=["chat/completions"]) # azure compatible endpoint -async def chat_completion(request: ProxyChatCompletionRequest, model: Optional[str] = None, user_api_key_dict: dict = Depends(user_api_key_auth)) -> Union[litellm.ModelResponse, StreamingResponse]: +async def chat_completion(request: ProxyChatCompletionRequest, model: Optional[str] = None, user_api_key_dict: dict = Depends(user_api_key_auth)): global general_settings, user_debug try: data = {} diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py index 0a32653ad..69cd8bba6 100644 --- a/litellm/tests/test_proxy_server.py +++ b/litellm/tests/test_proxy_server.py @@ -45,7 +45,7 @@ def test_chat_completion(): pytest.fail("LiteLLM Proxy test failed. Exception", e) # Run the test -# test_chat_completion() +test_chat_completion() def test_chat_completion_azure():