From b0e29922440fbba2fd2da02d74f091bb6d4b68a2 Mon Sep 17 00:00:00 2001 From: dcruiz01 Date: Wed, 18 Oct 2023 15:31:59 -0700 Subject: [PATCH] Added Gradio integration tutorial --- .../docs/tutorials/gradio_integration.md | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 docs/my-website/docs/tutorials/gradio_integration.md diff --git a/docs/my-website/docs/tutorials/gradio_integration.md b/docs/my-website/docs/tutorials/gradio_integration.md new file mode 100644 index 000000000..f1d164810 --- /dev/null +++ b/docs/my-website/docs/tutorials/gradio_integration.md @@ -0,0 +1,62 @@ +# Gradio Chatbot + LiteLLM Tutorial +Simple tutorial for integrating LiteLLM completion calls with streaming Gradio chatbot demos + +### Install & Import Dependencies +```python +!pip install gradio litellm +import gradio +import litellm +``` + +### Define Inference Function +Remember to set `model` and `api_base` as expected by the server hosting your LLM. +```python +def inference(message, history): + try: + flattened_history = [item for sublist in history for item in sublist] + full_message = " ".join(flattened_history + [message]) + messages_litellm = [{"role": "user", "content": full_message}] # litellm message format + partial_message = "" + for chunk in litellm.completion(model="huggingface/meta-llama/Llama-2-7b-chat-hf", + api_base="x.x.x.x:xxxx", + messages=messages_litellm, + max_new_tokens=512, + temperature=.7, + top_k=100, + top_p=.9, + repetition_penalty=1.18, + stream=True): + partial_message += chunk['choices'][0]['delta']['content'] # extract text from streamed litellm chunks + yield partial_message + except Exception as e: + print("Exception encountered:", str(e)) + yield f"An Error occured please 'Clear' the error and try your question again" +``` + +### Define Chat Interface +```python +gr.ChatInterface( + inference, + chatbot=gr.Chatbot(height=400), + textbox=gr.Textbox(placeholder="Enter text here...", container=False, scale=5), + description=f""" + CURRENT PROMPT TEMPLATE: {model_name}. + An incorrect prompt template will cause performance to suffer. + Check the API specifications to ensure this format matches the target LLM.""", + title="Simple Chatbot Test Application", + examples=["Define 'deep learning' in once sentence."], + retry_btn="Retry", + undo_btn="Undo", + clear_btn="Clear", + theme=theme, +).queue().launch() +``` +### Launch Gradio App +1. From command line: `python app.py` or `gradio app.py` (latter enables live deployment updates) +2. Visit provided hyperlink in your browser. +3. Enjoy prompt-agnostic interaction with remote LLM server. + +### Recommended Extensions: +* Add command line arguments to define target model & inference endpoints + +Credits to [ZQ](https://x.com/ZQ), for this tutorial. \ No newline at end of file