Added support for Triton chat completion using trtlllm generate endpoint and custom infer endpoint

This commit is contained in:
Giri Tatavarty 2024-05-28 07:54:11 -07:00
parent 92df94d791
commit ff18d93a3a
2 changed files with 165 additions and 4 deletions

View file

@ -2254,6 +2254,26 @@ def completion(
return generator
response = generator
elif custom_llm_provider == "triton":
api_base = (
litellm.api_base or api_base
)
model_response = triton_chat_completions.completion(
api_base=api_base,
timeout=timeout,
model=model,
messages=messages,
model_response=model_response,
optional_params=optional_params,
logging_obj=logging,
)
## RESPONSE OBJECT
response = model_response
return response
elif custom_llm_provider == "cloudflare":
api_key = (
api_key