From 847e9f47cc8ac4c390a80302d257f59098debab1 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Fri, 8 Sep 2023 14:06:52 -0700 Subject: [PATCH] comments about using hosted vllm --- litellm/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/litellm/main.py b/litellm/main.py index 96dd50341d..ad2e4b99aa 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -225,6 +225,7 @@ def completion( elif ( model in litellm.open_ai_chat_completion_models or custom_llm_provider == "custom_openai" + # NOTE: Do NOT add custom_llm_provider == "openai". this will break hosted vllm calls. see: https://docs.litellm.ai/docs/providers/vllm#calling-hosted-vllm-server. VLLM expects requests to call openai.Completion we need those requests to always call openai.Completion or "ft:gpt-3.5-turbo" in model # finetuned gpt-3.5-turbo ): # allow user to make an openai call with a custom base openai.api_type = "openai"