forked from phoenix/litellm-mirror
Added support for Triton chat completion using trtlllm generate endpoint and custom infer endpoint
This commit is contained in:
parent
073bca78d4
commit
a58dc68418
2 changed files with 165 additions and 4 deletions
|
@ -2254,6 +2254,26 @@ def completion(
|
|||
return generator
|
||||
|
||||
response = generator
|
||||
|
||||
elif custom_llm_provider == "triton":
|
||||
api_base = (
|
||||
litellm.api_base or api_base
|
||||
)
|
||||
model_response = triton_chat_completions.completion(
|
||||
api_base=api_base,
|
||||
timeout=timeout,
|
||||
model=model,
|
||||
messages=messages,
|
||||
model_response=model_response,
|
||||
optional_params=optional_params,
|
||||
logging_obj=logging,
|
||||
)
|
||||
|
||||
## RESPONSE OBJECT
|
||||
response = model_response
|
||||
return response
|
||||
|
||||
|
||||
elif custom_llm_provider == "cloudflare":
|
||||
api_key = (
|
||||
api_key
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue