mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
[Bug fix ]: Triton /infer handler incompatible with batch responses (#7337)
* migrate triton to base llm http handler * clean up triton handler.py * use transform functions for triton * add TritonConfig * get openai params for triton * use triton embedding config * test_completion_triton_generate_api * test_completion_triton_infer_api * fix TritonConfig doc string * use TritonResponseIterator * fix triton embeddings * docs triton chat usage
This commit is contained in:
parent
e6bdec4eed
commit
1b2ed0c344
11 changed files with 814 additions and 450 deletions
|
@ -183,4 +183,11 @@ def get_supported_openai_params( # noqa: PLR0915
|
|||
return litellm.PredibaseConfig().get_supported_openai_params(model=model)
|
||||
elif custom_llm_provider == "voyage":
|
||||
return litellm.VoyageEmbeddingConfig().get_supported_openai_params(model=model)
|
||||
elif custom_llm_provider == "triton":
|
||||
if request_type == "embeddings":
|
||||
return litellm.TritonEmbeddingConfig().get_supported_openai_params(
|
||||
model=model
|
||||
)
|
||||
else:
|
||||
return litellm.TritonConfig().get_supported_openai_params(model=model)
|
||||
return None
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue