mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
Added support for Triton chat completion using trtlllm generate endpoint and custom infer endpoint
This commit is contained in:
parent
92df94d791
commit
ff18d93a3a
2 changed files with 165 additions and 4 deletions
|
@ -2254,6 +2254,26 @@ def completion(
|
|||
return generator
|
||||
|
||||
response = generator
|
||||
|
||||
elif custom_llm_provider == "triton":
|
||||
api_base = (
|
||||
litellm.api_base or api_base
|
||||
)
|
||||
model_response = triton_chat_completions.completion(
|
||||
api_base=api_base,
|
||||
timeout=timeout,
|
||||
model=model,
|
||||
messages=messages,
|
||||
model_response=model_response,
|
||||
optional_params=optional_params,
|
||||
logging_obj=logging,
|
||||
)
|
||||
|
||||
## RESPONSE OBJECT
|
||||
response = model_response
|
||||
return response
|
||||
|
||||
|
||||
elif custom_llm_provider == "cloudflare":
|
||||
api_key = (
|
||||
api_key
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue