Added support for Triton chat completion using trtlllm generate endpoint and custom infer endpoint

2024-05-28 07:54:11 -07:00 · 2024-05-28 07:54:11 -07:00 · a58dc68418
commit a58dc68418
parent 073bca78d4
2 changed files with 165 additions and 4 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -2254,6 +2254,26 @@ def completion(
                return generator

            response = generator
+        
+        elif custom_llm_provider == "triton":
+            api_base = (
+                litellm.api_base  or api_base
+            )
+            model_response = triton_chat_completions.completion(
+            api_base=api_base,
+            timeout=timeout,
+            model=model,
+            messages=messages,
+            model_response=model_response,
+            optional_params=optional_params,
+            logging_obj=logging,
+            )
+
+            ## RESPONSE OBJECT
+            response = model_response
+            return response
+        
+        
        elif custom_llm_provider == "cloudflare":
            api_key = (
                api_key