diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 96ddee40a..3d8b9a1b2 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -26,7 +26,7 @@ def test_completion_custom_provider_model_name(): try: litellm.cache = None response = completion( - model="together_ai/togethercomputer/llama-2-70b-chat", + model="together_ai/mistralai/Mistral-7B-Instruct-v0.1", messages=messages, logger_fn=logger_fn, ) @@ -287,7 +287,7 @@ def hf_test_completion_tgi(): print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") -# hf_test_completion_tgi() +hf_test_completion_tgi() # ################### Hugging Face Conversational models ######################## # def hf_test_completion_conv(): @@ -1023,7 +1023,7 @@ def test_customprompt_together_ai(): litellm.set_verbose = False litellm.num_retries = 0 response = completion( - model="together_ai/togethercomputer/llama-2-70b-chat", + model="together_ai/mistralai/Mistral-7B-Instruct-v0.1", messages=messages, roles={"system":{"pre_message":"<|im_start|>system\n", "post_message":"<|im_end|>"}, "assistant":{"pre_message":"<|im_start|>assistant\n","post_message":"<|im_end|>"}, "user":{"pre_message":"<|im_start|>user\n","post_message":"<|im_end|>"}} ) @@ -1599,7 +1599,7 @@ def test_completion_together_ai_stream(): messages = [{ "content": user_message,"role": "user"}] try: response = completion( - model="together_ai/togethercomputer/llama-2-70b-chat", + model="together_ai/mistralai/Mistral-7B-Instruct-v0.1", messages=messages, stream=True, max_tokens=5 ) diff --git a/litellm/utils.py b/litellm/utils.py index 4df06f52e..c6d855fd1 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -132,6 +132,9 @@ def map_finish_reason(finish_reason: str): # openai supports 5 stop sequences - return "content_filter" elif finish_reason == "ERROR": # openai currently doesn't support an 'error' finish reason return "stop" + # huggingface mapping https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/generate_stream + elif finish_reason == "eos_token" or finish_reason == "stop_sequence": + return "stop" return finish_reason class FunctionCall(OpenAIObject): diff --git a/requirements.txt b/requirements.txt index b6a15fb7a..f3be861b8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ litellm openai fastapi tomli +pydantic>=2.5 appdirs tomli_w backoff