comments about using hosted vllm

2025-04-26 11:14:04 +00:00 · 2023-09-08 14:06:52 -07:00 · 2023-09-08 14:06:52 -07:00 · 847e9f47cc
commit 847e9f47cc
parent 8c0ce5b357
1 changed files with 1 additions and 0 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -225,6 +225,7 @@ def completion(
        elif (
            model in litellm.open_ai_chat_completion_models
            or custom_llm_provider == "custom_openai"
+            # NOTE: Do NOT add custom_llm_provider == "openai". this will break hosted vllm calls. see: https://docs.litellm.ai/docs/providers/vllm#calling-hosted-vllm-server. VLLM expects requests to call openai.Completion we need those requests to always call openai.Completion
            or "ft:gpt-3.5-turbo" in model  # finetuned gpt-3.5-turbo
        ):  # allow user to make an openai call with a custom base
            openai.api_type = "openai"