diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index e2293109d..d8a3221f4 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 93adb354b..7e2174fb4 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 0ff9280c4..f71218ef0 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -245,9 +245,8 @@ def deploy_proxy(model, api_base, debug, temperature, max_tokens, telemetry, dep # for streaming def data_generator(response): - print("inside generator") + print_verbose("inside generator") for chunk in response: - print(f"chunk: {chunk}") print_verbose(f"returned chunk: {chunk}") yield f"data: {json.dumps(chunk)}\n\n" @@ -302,26 +301,6 @@ def litellm_completion(data, type): data["max_tokens"] = user_max_tokens if user_api_base: data["api_base"] = user_api_base - ## CUSTOM PROMPT TEMPLATE ## - run `litellm --config` to set this - litellm.register_prompt_template( - model=user_model, - roles={ - "system": { - "pre_message": os.getenv("MODEL_SYSTEM_MESSAGE_START_TOKEN", ""), - "post_message": os.getenv("MODEL_SYSTEM_MESSAGE_END_TOKEN", ""), - }, - "assistant": { - "pre_message": os.getenv("MODEL_ASSISTANT_MESSAGE_START_TOKEN", ""), - "post_message": os.getenv("MODEL_ASSISTANT_MESSAGE_END_TOKEN", "") - }, - "user": { - "pre_message": os.getenv("MODEL_USER_MESSAGE_START_TOKEN", ""), - "post_message": os.getenv("MODEL_USER_MESSAGE_END_TOKEN", "") - } - }, - initial_prompt_value=os.getenv("MODEL_PRE_PROMPT", ""), - final_prompt_value=os.getenv("MODEL_POST_PROMPT", "") - ) if type == "completion": response = litellm.text_completion(**data) elif type == "chat_completion": diff --git a/litellm/tests/test_prompt_factory.py b/litellm/tests/test_prompt_factory.py new file mode 100644 index 000000000..11ebbb424 --- /dev/null +++ b/litellm/tests/test_prompt_factory.py @@ -0,0 +1,23 @@ +#### What this tests #### +# This tests if prompts are being correctly formatted +import sys +import os +import io + +sys.path.insert(0, os.path.abspath('../..')) + +# from litellm.llms.prompt_templates.factory import prompt_factory +from litellm import completion + +def codellama_prompt_format(): + model = "huggingface/codellama/CodeLlama-7b-Instruct-hf" + messages = [{"role": "system", "content": "You are a good bot"}, {"role": "user", "content": "Hey, how's it going?"}] + expected_response = """[INST] <> +You are a good bot +<> + [/INST] +[INST] Hey, how's it going? [/INST]""" + response = completion(model=model, messages=messages) + print(response) + +# codellama_prompt_format() \ No newline at end of file