From 441fff406cc1876a48e641cff305e497c26be4f4 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 9 Aug 2023 14:29:21 -0700 Subject: [PATCH] add streaming example cookbook --- cookbook/liteLLM_Streaming_Demo.ipynb | 226 ++++++++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 cookbook/liteLLM_Streaming_Demo.ipynb diff --git a/cookbook/liteLLM_Streaming_Demo.ipynb b/cookbook/liteLLM_Streaming_Demo.ipynb new file mode 100644 index 000000000..0456c5451 --- /dev/null +++ b/cookbook/liteLLM_Streaming_Demo.ipynb @@ -0,0 +1,226 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# [STREAMING] OpenAI, Anthropic, Replicate, Cohere using liteLLM\n", + "In this tutorial:\n", + "Note: All inputs/outputs are in the format used by `gpt-3.5-turbo`\n", + "\n", + "- Call all models in the same input format [**with streaming**]:\n", + "\n", + " `completion(model, messages, stream=True)`\n", + "- All streaming generators are accessed at `chunk['choices'][0]['delta']`\n", + "\n", + "The following Models are covered in this tutorial\n", + "- [GPT-3.5-Turbo](https://platform.openai.com/docs/models/gpt-3-5)\n", + "- [Claude-2](https://www.anthropic.com/index/claude-2)\n", + "- [StableLM Tuned Alpha 7B](https://replicate.com/stability-ai/stablelm-tuned-alpha-7b)\n", + "- [A16z infra-LLAMA-2 7B Chat](https://replicate.com/a16z-infra/llama-2-7b-chat)\n", + "- [Vicuna 13B](https://replicate.com/replicate/vicuna-13b)\n", + "- [Cohere - Command Nightly]()\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "YV6L5fNv7Kep" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TO-EdF84O9QT" + }, + "outputs": [], + "source": [ + "# install liteLLM\n", + "!pip install litellm==0.1.369" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Imports & Set ENV variables\n", + "Get your API Keys\n", + "\n", + "https://platform.openai.com/account/api-keys\n", + "\n", + "https://replicate.com/account/api-tokens\n", + "\n", + "https://console.anthropic.com/account/keys\n", + "\n", + "https://dashboard.cohere.ai/api-keys\n" + ], + "metadata": { + "id": "mpHTbTqQ8fey" + } + }, + { + "cell_type": "code", + "source": [ + "from litellm import completion\n", + "import os\n", + "\n", + "os.environ['OPENAI_API_KEY'] = '' # @param\n", + "os.environ['REPLICATE_API_TOKEN'] = '' # @param\n", + "os.environ['ANTHROPIC_API_KEY'] = '' # @param\n", + "os.environ['COHERE_API_KEY'] = '' # @param" + ], + "metadata": { + "id": "kDbgfcU8O-dW" + }, + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Set Messages" + ], + "metadata": { + "id": "1KmkOdzLSOmJ" + } + }, + { + "cell_type": "code", + "source": [ + "user_message = \"Hello, whats the weather in San Francisco??\"\n", + "messages = [{ \"content\": user_message,\"role\": \"user\"}]" + ], + "metadata": { + "id": "xIEeOhVH-oh6" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Calling Models using liteLLM Streaming -\n", + "\n", + "## `completion(model, messages, stream)`" + ], + "metadata": { + "id": "9SOCVRC1L-G3" + } + }, + { + "cell_type": "code", + "source": [ + "# replicate models #######\n", + "stability_ai = \"stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb\"\n", + "llama_2_7b = \"a16z-infra/llama-2-7b-chat:4f0b260b6a13eb53a6b1891f089d57c08f41003ae79458be5011303d81a394dc\"\n", + "vicuna = \"replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b\"\n", + "\n", + "models = [\"gpt-3.5-turbo\", \"claude-2\", stability_ai, llama_2_7b, vicuna, \"command-nightly\"] # command-nightly is Cohere\n", + "for model in models:\n", + " replicate = (model == stability_ai or model==llama_2_7b or model==vicuna) # let liteLLM know if a model is replicate, using this optional param, `replicate=True`\n", + " response = completion(model=model, messages=messages, stream=True, replicate=replicate)\n", + " print(f\"####################\\n\\nResponse from {model}\")\n", + " for i, chunk in enumerate(response):\n", + " if i < 5: # NOTE: LIMITING CHUNKS FOR THIS DEMO\n", + " print((chunk['choices'][0]['delta']))\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XJ4nh4SnRzHP", + "outputId": "26b9fe10-b499-4a97-d60d-a8cb8f8030b8" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "####################\n", + "\n", + "Response from gpt-3.5-turbo\n", + "{\n", + " \"role\": \"assistant\",\n", + " \"content\": \"\"\n", + "}\n", + "{\n", + " \"content\": \"I\"\n", + "}\n", + "{\n", + " \"content\": \"'m\"\n", + "}\n", + "{\n", + " \"content\": \" sorry\"\n", + "}\n", + "{\n", + " \"content\": \",\"\n", + "}\n", + "####################\n", + "\n", + "Response from claude-2\n", + "{'role': 'assistant', 'content': ' Unfortunately'}\n", + "{'role': 'assistant', 'content': ' I'}\n", + "{'role': 'assistant', 'content': ' don'}\n", + "{'role': 'assistant', 'content': \"'t\"}\n", + "{'role': 'assistant', 'content': ' have'}\n", + "####################\n", + "\n", + "Response from stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb\n", + "{'role': 'assistant', 'content': \"I'm\"}\n", + "{'role': 'assistant', 'content': ' sorry,'}\n", + "{'role': 'assistant', 'content': ' I'}\n", + "{'role': 'assistant', 'content': ' cannot'}\n", + "{'role': 'assistant', 'content': ' answer'}\n", + "####################\n", + "\n", + "Response from a16z-infra/llama-2-7b-chat:4f0b260b6a13eb53a6b1891f089d57c08f41003ae79458be5011303d81a394dc\n", + "{'role': 'assistant', 'content': ''}\n", + "{'role': 'assistant', 'content': ' Hello'}\n", + "{'role': 'assistant', 'content': '!'}\n", + "{'role': 'assistant', 'content': ' I'}\n", + "{'role': 'assistant', 'content': \"'\"}\n", + "####################\n", + "\n", + "Response from replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b\n", + "{'role': 'assistant', 'content': 'Comment:'}\n", + "{'role': 'assistant', 'content': 'Hi! '}\n", + "{'role': 'assistant', 'content': 'How '}\n", + "{'role': 'assistant', 'content': 'are '}\n", + "{'role': 'assistant', 'content': 'you '}\n", + "####################\n", + "\n", + "Response from command-nightly\n", + "{'role': 'assistant', 'content': ' Hello'}\n", + "{'role': 'assistant', 'content': '!'}\n", + "{'role': 'assistant', 'content': ' '}\n", + "{'role': 'assistant', 'content': ' I'}\n", + "{'role': 'assistant', 'content': \"'m\"}\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "t7WMRuL-8NrO" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file