From 65c01eae23f0da5ea82ee60f2afcb4a68f529772 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 6 Nov 2023 11:42:57 -0800 Subject: [PATCH] fix(huggingface_restapi.py): output parsing chat template models --- litellm/llms/huggingface_restapi.py | 22 ++++++++++++--- litellm/tests/test_router.py | 42 +++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py index 0c9d2432a0..a124e52b60 100644 --- a/litellm/llms/huggingface_restapi.py +++ b/litellm/llms/huggingface_restapi.py @@ -78,6 +78,22 @@ def validate_environment(api_key, headers): headers = default_headers return headers +def output_parser(generated_text: str): + """ + Parse the output text to remove any special characters. In our current approach we just check for ChatML tokens. + + Initial issue that prompted this - https://github.com/BerriAI/litellm/issues/763 + """ + chat_template_tokens = ["<|assistant|>", "<|system|>", "<|user|>", "", ""] + for token in chat_template_tokens: + if generated_text.strip().startswith(token): + generated_text = generated_text.replace(token, "", 1) + if generated_text.endswith(token): + generated_text = generated_text[::-1].replace(token[::-1], "", 1)[::-1] + return generated_text + + + tgi_models_cache = None conv_models_cache = None def read_tgi_conv_models(): @@ -308,7 +324,7 @@ def completion( if len(completion_response[0]["generated_text"]) > 0: model_response["choices"][0]["message"][ "content" - ] = completion_response[0]["generated_text"] + ] = output_parser(completion_response[0]["generated_text"]) ## GETTING LOGPROBS + FINISH REASON if "details" in completion_response[0] and "tokens" in completion_response[0]["details"]: model_response.choices[0].finish_reason = completion_response[0]["details"]["finish_reason"] @@ -324,7 +340,7 @@ def completion( for token in item["tokens"]: sum_logprob += token["logprob"] if len(item["generated_text"]) > 0: - message_obj = Message(content=item["generated_text"], logprobs=sum_logprob) + message_obj = Message(content=output_parser(item["generated_text"]), logprobs=sum_logprob) else: message_obj = Message(content=None) choice_obj = Choices(finish_reason=item["finish_reason"], index=idx+1, message=message_obj) @@ -334,7 +350,7 @@ def completion( if len(completion_response[0]["generated_text"]) > 0: model_response["choices"][0]["message"][ "content" - ] = completion_response[0]["generated_text"] + ] = output_parser(completion_response[0]["generated_text"]) ## CALCULATING USAGE prompt_tokens = 0 try: diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py index 4fb8d6cae3..456e73e23a 100644 --- a/litellm/tests/test_router.py +++ b/litellm/tests/test_router.py @@ -11,9 +11,51 @@ import litellm from litellm import Router from concurrent.futures import ThreadPoolExecutor from dotenv import load_dotenv +# import logging +# logging.basicConfig(level=logging.DEBUG) load_dotenv() +# def test_openai_only(): +# from litellm import completion +# import time +# completions = [] +# max_workers = 1000 # Adjust as needed +# start_time = time.time() +# print(f"Started test: {start_time}") +# with ThreadPoolExecutor(max_workers=max_workers) as executor: +# kwargs = { +# "model": "gpt-3.5-turbo", +# "messages": [{"role": "user", "content": """Context: + +# In the historical era of Ancient Greece, a multitude of significant individuals lived, contributing immensely to various disciplines like science, politics, philosophy, and literature. For instance, Socrates, a renowned philosopher, primarily focused on ethics. His notable method, the Socratic Method, involved acknowledging one's own ignorance to stimulate critical thinking and illuminate ideas. His student, Plato, another prominent figure, founded the Academy in Athens. He proposed theories on justice, beauty, and equality, and also introduced the theory of forms, which is pivotal to understanding his philosophical insights. Another student of Socrates, Xenophon, distinguished himself more in the domain of history and military affairs. + +# Aristotle, who studied under Plato, led an equally remarkable life. His extensive works have been influential across various domains, including science, logic, metaphysics, ethics, and politics. Perhaps most notably, a substantial portion of the Western intellectual tradition traces back to his writings. He later tutored Alexander the Great who went on to create one of the most vast empires in the world. + +# In the domain of mathematics, Pythagoras and Euclid made significant contributions. Pythagoras is best known for the Pythagorean theorem, a fundamental principle in geometry, while Euclid, often regarded as the father of geometry, wrote "The Elements", a collection of definitions, axioms, theorems, and proofs. + +# Apart from these luminaries, the period also saw a number of influential political figures. Pericles, a prominent and influential Greek statesman, orator, and general of Athens during the Golden Age, specifically between the Persian and Peloponnesian wars, played a significant role in developing the Athenian democracy. + +# The Ancient Greek era also witnessed extraordinary advancements in arts and literature. Homer, credited with the creation of the epic poems 'The Iliad' and 'The Odyssey,' is considered one of the greatest poets in history. The tragedies of Sophocles, Aeschylus, and Euripides left an indelible mark on the field of drama, and the comedies of Aristophanes remain influential even today. + +# --- +# Question: + +# Who among the mentioned figures from Ancient Greece contributed to the domain of mathematics and what are their significant contributions?"""}], +# } +# for _ in range(10000): +# future = executor.submit(completion, **kwargs) +# completions.append(future) + +# # Retrieve the results from the futures +# results = [future.result() for future in completions] +# end_time = time.time() + +# print(f"Total Duration: {end_time-start_time}") + +# test_openai_only() + + def test_multiple_deployments(): model_list = [{ # list of model deployments "model_name": "gpt-3.5-turbo", # openai model name