Merge pull request #3895 from giritatavarty-8451/litellm_triton_chatcompletion_support

Added support for Triton chat completion using trtlllm generate endpo…
This commit is contained in:
Ishaan Jaff 2024-05-29 12:50:31 -07:00 committed by GitHub
commit 64d050cadd
2 changed files with 165 additions and 4 deletions

View file

@ -2254,6 +2254,26 @@ def completion(
return generator
response = generator
elif custom_llm_provider == "triton":
api_base = (
litellm.api_base or api_base
)
model_response = triton_chat_completions.completion(
api_base=api_base,
timeout=timeout,
model=model,
messages=messages,
model_response=model_response,
optional_params=optional_params,
logging_obj=logging,
)
## RESPONSE OBJECT
response = model_response
return response
elif custom_llm_provider == "cloudflare":
api_key = (
api_key