From 847e9f47cc8ac4c390a80302d257f59098debab1 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Fri, 8 Sep 2023 14:06:52 -0700
Subject: [PATCH] comments about using hosted vllm

---
 litellm/main.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/litellm/main.py b/litellm/main.py
index 96dd50341d..ad2e4b99aa 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -225,6 +225,7 @@ def completion(
         elif (
             model in litellm.open_ai_chat_completion_models
             or custom_llm_provider == "custom_openai"
+            # NOTE: Do NOT add custom_llm_provider == "openai". this will break hosted vllm calls. see: https://docs.litellm.ai/docs/providers/vllm#calling-hosted-vllm-server. VLLM expects requests to call openai.Completion we need those requests to always call openai.Completion
             or "ft:gpt-3.5-turbo" in model  # finetuned gpt-3.5-turbo
         ):  # allow user to make an openai call with a custom base
             openai.api_type = "openai"