[Bug fix ]: Triton /infer handler incompatible with batch responses (#7337)

* migrate triton to base llm http handler * clean up triton handler.py * use transform functions for triton * add TritonConfig * get openai params for triton * use triton embedding config * test_completion_triton_generate_api * test_completion_triton_infer_api * fix TritonConfig doc string * use TritonResponseIterator * fix triton embeddings * docs triton chat usage
2025-04-27 11:43:54 +00:00 · 2024-12-20 20:59:40 -08:00 · 2024-12-20 20:59:40 -08:00 · 1b2ed0c344
commit 1b2ed0c344
parent e6bdec4eed
11 changed files with 814 additions and 450 deletions
--- a/litellm/litellm_core_utils/get_supported_openai_params.py
+++ b/litellm/litellm_core_utils/get_supported_openai_params.py
@ -183,4 +183,11 @@ def get_supported_openai_params(  # noqa: PLR0915
        return litellm.PredibaseConfig().get_supported_openai_params(model=model)
    elif custom_llm_provider == "voyage":
        return litellm.VoyageEmbeddingConfig().get_supported_openai_params(model=model)
+    elif custom_llm_provider == "triton":
+        if request_type == "embeddings":
+            return litellm.TritonEmbeddingConfig().get_supported_openai_params(
+                model=model
+            )
+        else:
+            return litellm.TritonConfig().get_supported_openai_params(model=model)
    return None