mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Add support for Triton streaming & triton async completions
This commit is contained in:
parent
1b3050477a
commit
d5c65c6be2
3 changed files with 199 additions and 33 deletions
|
@ -11013,6 +11013,42 @@ class CustomStreamWrapper:
|
|||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def handle_triton_stream(self, chunk):
|
||||
try:
|
||||
if isinstance(chunk, dict):
|
||||
parsed_response = chunk
|
||||
elif isinstance(chunk, (str, bytes)):
|
||||
if isinstance(chunk, bytes):
|
||||
chunk = chunk.decode("utf-8")
|
||||
if "text_output" in chunk:
|
||||
response = chunk.replace("data: ", "").strip()
|
||||
parsed_response = json.loads(response)
|
||||
else:
|
||||
return {
|
||||
"text": "",
|
||||
"is_finished": False,
|
||||
"prompt_tokens": 0,
|
||||
"completion_tokens": 0,
|
||||
}
|
||||
else:
|
||||
print_verbose(f"chunk: {chunk} (Type: {type(chunk)})")
|
||||
raise ValueError(
|
||||
f"Unable to parse response. Original response: {chunk}"
|
||||
)
|
||||
text = parsed_response.get("text_output", "")
|
||||
finish_reason = parsed_response.get("stop_reason")
|
||||
is_finished = parsed_response.get("is_finished", False)
|
||||
return {
|
||||
"text": text,
|
||||
"is_finished": is_finished,
|
||||
"finish_reason": finish_reason,
|
||||
"prompt_tokens": parsed_response.get("input_token_count", 0),
|
||||
"completion_tokens": parsed_response.get("generated_token_count", 0),
|
||||
}
|
||||
return {"text": "", "is_finished": False}
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def handle_clarifai_completion_chunk(self, chunk):
|
||||
try:
|
||||
if isinstance(chunk, dict):
|
||||
|
@ -11337,6 +11373,12 @@ class CustomStreamWrapper:
|
|||
completion_obj["content"] = response_obj["text"]
|
||||
if response_obj["is_finished"]:
|
||||
self.received_finish_reason = response_obj["finish_reason"]
|
||||
elif self.custom_llm_provider == "triton":
|
||||
response_obj = self.handle_triton_stream(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
print_verbose(f"completion obj content: {completion_obj['content']}")
|
||||
if response_obj["is_finished"]:
|
||||
self.received_finish_reason = response_obj["finish_reason"]
|
||||
elif self.custom_llm_provider == "text-completion-openai":
|
||||
response_obj = self.handle_openai_text_completion_chunk(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
|
@ -11773,6 +11815,7 @@ class CustomStreamWrapper:
|
|||
or self.custom_llm_provider == "predibase"
|
||||
or self.custom_llm_provider == "databricks"
|
||||
or self.custom_llm_provider == "bedrock"
|
||||
or self.custom_llm_provider == "triton"
|
||||
or self.custom_llm_provider in litellm.openai_compatible_endpoints
|
||||
):
|
||||
async for chunk in self.completion_stream:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue