forked from phoenix/litellm-mirror
fix(utils.py): remove eos token for zephyr models
This commit is contained in:
parent
f24786095a
commit
94dc3f66f3
3 changed files with 33 additions and 18 deletions
|
@ -286,21 +286,6 @@ def hf_test_completion_tgi():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
# hf_test_completion_tgi()
|
# hf_test_completion_tgi()
|
||||||
|
|
||||||
def hf_test_completion_tgi_stream():
|
|
||||||
try:
|
|
||||||
response = completion(
|
|
||||||
model = 'huggingface/HuggingFaceH4/zephyr-7b-beta',
|
|
||||||
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
|
||||||
stream=True
|
|
||||||
)
|
|
||||||
# Add any assertions here to check the response
|
|
||||||
print(response)
|
|
||||||
for chunk in response:
|
|
||||||
print(chunk["choices"][0]["delta"]["content"])
|
|
||||||
except Exception as e:
|
|
||||||
pytest.fail(f"Error occurred: {e}")
|
|
||||||
# hf_test_completion_tgi_stream()
|
|
||||||
|
|
||||||
# ################### Hugging Face Conversational models ########################
|
# ################### Hugging Face Conversational models ########################
|
||||||
# def hf_test_completion_conv():
|
# def hf_test_completion_conv():
|
||||||
# try:
|
# try:
|
||||||
|
|
|
@ -631,6 +631,29 @@ def ai21_completion_call_bad_key():
|
||||||
|
|
||||||
# ai21_completion_call_bad_key()
|
# ai21_completion_call_bad_key()
|
||||||
|
|
||||||
|
def hf_test_completion_tgi_stream():
|
||||||
|
try:
|
||||||
|
response = completion(
|
||||||
|
model = 'huggingface/HuggingFaceH4/zephyr-7b-beta',
|
||||||
|
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
print(f"response: {response}")
|
||||||
|
complete_response = ""
|
||||||
|
start_time = time.time()
|
||||||
|
for idx, chunk in enumerate(response):
|
||||||
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
|
complete_response += chunk
|
||||||
|
if finished:
|
||||||
|
break
|
||||||
|
if complete_response.strip() == "":
|
||||||
|
raise Exception("Empty response received")
|
||||||
|
print(f"completion_response: {complete_response}")
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
hf_test_completion_tgi_stream()
|
||||||
|
|
||||||
# def test_completion_aleph_alpha():
|
# def test_completion_aleph_alpha():
|
||||||
# try:
|
# try:
|
||||||
# response = completion(
|
# response = completion(
|
||||||
|
@ -706,7 +729,7 @@ def test_openai_chat_completion_call():
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
print(f"error occurred: {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
test_openai_chat_completion_call()
|
# test_openai_chat_completion_call()
|
||||||
|
|
||||||
def test_openai_chat_completion_complete_response_call():
|
def test_openai_chat_completion_complete_response_call():
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -4538,8 +4538,14 @@ class CustomStreamWrapper:
|
||||||
if self.logging_obj:
|
if self.logging_obj:
|
||||||
self.logging_obj.post_call(text)
|
self.logging_obj.post_call(text)
|
||||||
|
|
||||||
def check_special_tokens(self, chunk: str):
|
def check_special_tokens(self, chunk: str, finish_reason: Optional[str]):
|
||||||
hold = False
|
hold = False
|
||||||
|
if finish_reason:
|
||||||
|
for token in self.special_tokens:
|
||||||
|
if token in chunk:
|
||||||
|
chunk = chunk.replace(token, "")
|
||||||
|
return hold, chunk
|
||||||
|
|
||||||
if self.sent_first_chunk is True:
|
if self.sent_first_chunk is True:
|
||||||
return hold, chunk
|
return hold, chunk
|
||||||
|
|
||||||
|
@ -4996,8 +5002,9 @@ class CustomStreamWrapper:
|
||||||
model_response.model = self.model
|
model_response.model = self.model
|
||||||
print_verbose(f"model_response: {model_response}; completion_obj: {completion_obj}")
|
print_verbose(f"model_response: {model_response}; completion_obj: {completion_obj}")
|
||||||
print_verbose(f"model_response finish reason 3: {model_response.choices[0].finish_reason}")
|
print_verbose(f"model_response finish reason 3: {model_response.choices[0].finish_reason}")
|
||||||
|
|
||||||
if len(completion_obj["content"]) > 0: # cannot set content of an OpenAI Object to be an empty string
|
if len(completion_obj["content"]) > 0: # cannot set content of an OpenAI Object to be an empty string
|
||||||
hold, model_response_str = self.check_special_tokens(completion_obj["content"])
|
hold, model_response_str = self.check_special_tokens(chunk=completion_obj["content"], finish_reason=model_response.choices[0].finish_reason)
|
||||||
if hold is False:
|
if hold is False:
|
||||||
completion_obj["content"] = model_response_str
|
completion_obj["content"] = model_response_str
|
||||||
if self.sent_first_chunk == False:
|
if self.sent_first_chunk == False:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue