[Bug fix ]: Triton /infer handler incompatible with batch responses (#7337)

* migrate triton to base llm http handler

* clean up triton handler.py

* use transform functions for triton

* add TritonConfig

* get openai params for triton

* use triton embedding config

* test_completion_triton_generate_api

* test_completion_triton_infer_api

* fix TritonConfig doc string

* use TritonResponseIterator

* fix triton embeddings

* docs triton chat usage
This commit is contained in:
Ishaan Jaff 2024-12-20 20:59:40 -08:00 committed by GitHub
parent 70a9ea99f2
commit 6107f9f3f3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 814 additions and 450 deletions

View file

@ -2249,10 +2249,19 @@ def get_optional_params_embeddings( # noqa: PLR0915
message="Setting dimensions is not supported for OpenAI `text-embedding-3` and later models. To drop it from the call, set `litellm.drop_params = True`.",
)
elif custom_llm_provider == "triton":
keys = list(non_default_params.keys())
for k in keys:
non_default_params.pop(k, None)
final_params = {**non_default_params, **kwargs}
supported_params = get_supported_openai_params(
model=model,
custom_llm_provider=custom_llm_provider,
request_type="embeddings",
)
_check_valid_arg(supported_params=supported_params)
optional_params = litellm.TritonEmbeddingConfig().map_openai_params(
non_default_params=non_default_params,
optional_params={},
model=model,
drop_params=drop_params if drop_params is not None else False,
)
final_params = {**optional_params, **kwargs}
return final_params
elif custom_llm_provider == "databricks":
supported_params = get_supported_openai_params(
@ -2812,6 +2821,17 @@ def get_optional_params( # noqa: PLR0915
else False
),
)
elif custom_llm_provider == "triton":
supported_params = get_supported_openai_params(
model=model, custom_llm_provider=custom_llm_provider
)
_check_valid_arg(supported_params=supported_params)
optional_params = litellm.TritonConfig().map_openai_params(
non_default_params=non_default_params,
optional_params=optional_params,
model=model,
drop_params=drop_params if drop_params is not None else False,
)
elif custom_llm_provider == "maritalk":
## check if unsupported param passed in
@ -6222,6 +6242,8 @@ class ProviderConfigManager:
) -> BaseEmbeddingConfig:
if litellm.LlmProviders.VOYAGE == provider:
return litellm.VoyageEmbeddingConfig()
elif litellm.LlmProviders.TRITON == provider:
return litellm.TritonEmbeddingConfig()
raise ValueError(f"Provider {provider} does not support embedding config")
@staticmethod