[Bug fix ]: Triton /infer handler incompatible with batch responses (#7337)

* migrate triton to base llm http handler

* clean up triton handler.py

* use transform functions for triton

* add TritonConfig

* get openai params for triton

* use triton embedding config

* test_completion_triton_generate_api

* test_completion_triton_infer_api

* fix TritonConfig doc string

* use TritonResponseIterator

* fix triton embeddings

* docs triton chat usage
This commit is contained in:
Ishaan Jaff 2024-12-20 20:59:40 -08:00 committed by GitHub
parent e6bdec4eed
commit 1b2ed0c344
11 changed files with 814 additions and 450 deletions

View file

@ -183,4 +183,11 @@ def get_supported_openai_params( # noqa: PLR0915
return litellm.PredibaseConfig().get_supported_openai_params(model=model)
elif custom_llm_provider == "voyage":
return litellm.VoyageEmbeddingConfig().get_supported_openai_params(model=model)
elif custom_llm_provider == "triton":
if request_type == "embeddings":
return litellm.TritonEmbeddingConfig().get_supported_openai_params(
model=model
)
else:
return litellm.TritonConfig().get_supported_openai_params(model=model)
return None