forked from phoenix/litellm-mirror
add streaming example cookbook
This commit is contained in:
parent
8da3d382ff
commit
441fff406c
1 changed files with 226 additions and 0 deletions
226
cookbook/liteLLM_Streaming_Demo.ipynb
Normal file
226
cookbook/liteLLM_Streaming_Demo.ipynb
Normal file
|
@ -0,0 +1,226 @@
|
||||||
|
{
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 0,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"provenance": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"name": "python3",
|
||||||
|
"display_name": "Python 3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"# [STREAMING] OpenAI, Anthropic, Replicate, Cohere using liteLLM\n",
|
||||||
|
"In this tutorial:\n",
|
||||||
|
"Note: All inputs/outputs are in the format used by `gpt-3.5-turbo`\n",
|
||||||
|
"\n",
|
||||||
|
"- Call all models in the same input format [**with streaming**]:\n",
|
||||||
|
"\n",
|
||||||
|
" `completion(model, messages, stream=True)`\n",
|
||||||
|
"- All streaming generators are accessed at `chunk['choices'][0]['delta']`\n",
|
||||||
|
"\n",
|
||||||
|
"The following Models are covered in this tutorial\n",
|
||||||
|
"- [GPT-3.5-Turbo](https://platform.openai.com/docs/models/gpt-3-5)\n",
|
||||||
|
"- [Claude-2](https://www.anthropic.com/index/claude-2)\n",
|
||||||
|
"- [StableLM Tuned Alpha 7B](https://replicate.com/stability-ai/stablelm-tuned-alpha-7b)\n",
|
||||||
|
"- [A16z infra-LLAMA-2 7B Chat](https://replicate.com/a16z-infra/llama-2-7b-chat)\n",
|
||||||
|
"- [Vicuna 13B](https://replicate.com/replicate/vicuna-13b)\n",
|
||||||
|
"- [Cohere - Command Nightly]()\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "YV6L5fNv7Kep"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "TO-EdF84O9QT"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# install liteLLM\n",
|
||||||
|
"!pip install litellm==0.1.369"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"## Imports & Set ENV variables\n",
|
||||||
|
"Get your API Keys\n",
|
||||||
|
"\n",
|
||||||
|
"https://platform.openai.com/account/api-keys\n",
|
||||||
|
"\n",
|
||||||
|
"https://replicate.com/account/api-tokens\n",
|
||||||
|
"\n",
|
||||||
|
"https://console.anthropic.com/account/keys\n",
|
||||||
|
"\n",
|
||||||
|
"https://dashboard.cohere.ai/api-keys\n"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "mpHTbTqQ8fey"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"from litellm import completion\n",
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"os.environ['OPENAI_API_KEY'] = '' # @param\n",
|
||||||
|
"os.environ['REPLICATE_API_TOKEN'] = '' # @param\n",
|
||||||
|
"os.environ['ANTHROPIC_API_KEY'] = '' # @param\n",
|
||||||
|
"os.environ['COHERE_API_KEY'] = '' # @param"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "kDbgfcU8O-dW"
|
||||||
|
},
|
||||||
|
"execution_count": 8,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"### Set Messages"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "1KmkOdzLSOmJ"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"user_message = \"Hello, whats the weather in San Francisco??\"\n",
|
||||||
|
"messages = [{ \"content\": user_message,\"role\": \"user\"}]"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "xIEeOhVH-oh6"
|
||||||
|
},
|
||||||
|
"execution_count": 4,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"## Calling Models using liteLLM Streaming -\n",
|
||||||
|
"\n",
|
||||||
|
"## `completion(model, messages, stream)`"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"id": "9SOCVRC1L-G3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"# replicate models #######\n",
|
||||||
|
"stability_ai = \"stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb\"\n",
|
||||||
|
"llama_2_7b = \"a16z-infra/llama-2-7b-chat:4f0b260b6a13eb53a6b1891f089d57c08f41003ae79458be5011303d81a394dc\"\n",
|
||||||
|
"vicuna = \"replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b\"\n",
|
||||||
|
"\n",
|
||||||
|
"models = [\"gpt-3.5-turbo\", \"claude-2\", stability_ai, llama_2_7b, vicuna, \"command-nightly\"] # command-nightly is Cohere\n",
|
||||||
|
"for model in models:\n",
|
||||||
|
" replicate = (model == stability_ai or model==llama_2_7b or model==vicuna) # let liteLLM know if a model is replicate, using this optional param, `replicate=True`\n",
|
||||||
|
" response = completion(model=model, messages=messages, stream=True, replicate=replicate)\n",
|
||||||
|
" print(f\"####################\\n\\nResponse from {model}\")\n",
|
||||||
|
" for i, chunk in enumerate(response):\n",
|
||||||
|
" if i < 5: # NOTE: LIMITING CHUNKS FOR THIS DEMO\n",
|
||||||
|
" print((chunk['choices'][0]['delta']))\n"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "XJ4nh4SnRzHP",
|
||||||
|
"outputId": "26b9fe10-b499-4a97-d60d-a8cb8f8030b8"
|
||||||
|
},
|
||||||
|
"execution_count": 13,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": [
|
||||||
|
"####################\n",
|
||||||
|
"\n",
|
||||||
|
"Response from gpt-3.5-turbo\n",
|
||||||
|
"{\n",
|
||||||
|
" \"role\": \"assistant\",\n",
|
||||||
|
" \"content\": \"\"\n",
|
||||||
|
"}\n",
|
||||||
|
"{\n",
|
||||||
|
" \"content\": \"I\"\n",
|
||||||
|
"}\n",
|
||||||
|
"{\n",
|
||||||
|
" \"content\": \"'m\"\n",
|
||||||
|
"}\n",
|
||||||
|
"{\n",
|
||||||
|
" \"content\": \" sorry\"\n",
|
||||||
|
"}\n",
|
||||||
|
"{\n",
|
||||||
|
" \"content\": \",\"\n",
|
||||||
|
"}\n",
|
||||||
|
"####################\n",
|
||||||
|
"\n",
|
||||||
|
"Response from claude-2\n",
|
||||||
|
"{'role': 'assistant', 'content': ' Unfortunately'}\n",
|
||||||
|
"{'role': 'assistant', 'content': ' I'}\n",
|
||||||
|
"{'role': 'assistant', 'content': ' don'}\n",
|
||||||
|
"{'role': 'assistant', 'content': \"'t\"}\n",
|
||||||
|
"{'role': 'assistant', 'content': ' have'}\n",
|
||||||
|
"####################\n",
|
||||||
|
"\n",
|
||||||
|
"Response from stability-ai/stablelm-tuned-alpha-7b:c49dae362cbaecd2ceabb5bd34fdb68413c4ff775111fea065d259d577757beb\n",
|
||||||
|
"{'role': 'assistant', 'content': \"I'm\"}\n",
|
||||||
|
"{'role': 'assistant', 'content': ' sorry,'}\n",
|
||||||
|
"{'role': 'assistant', 'content': ' I'}\n",
|
||||||
|
"{'role': 'assistant', 'content': ' cannot'}\n",
|
||||||
|
"{'role': 'assistant', 'content': ' answer'}\n",
|
||||||
|
"####################\n",
|
||||||
|
"\n",
|
||||||
|
"Response from a16z-infra/llama-2-7b-chat:4f0b260b6a13eb53a6b1891f089d57c08f41003ae79458be5011303d81a394dc\n",
|
||||||
|
"{'role': 'assistant', 'content': ''}\n",
|
||||||
|
"{'role': 'assistant', 'content': ' Hello'}\n",
|
||||||
|
"{'role': 'assistant', 'content': '!'}\n",
|
||||||
|
"{'role': 'assistant', 'content': ' I'}\n",
|
||||||
|
"{'role': 'assistant', 'content': \"'\"}\n",
|
||||||
|
"####################\n",
|
||||||
|
"\n",
|
||||||
|
"Response from replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b\n",
|
||||||
|
"{'role': 'assistant', 'content': 'Comment:'}\n",
|
||||||
|
"{'role': 'assistant', 'content': 'Hi! '}\n",
|
||||||
|
"{'role': 'assistant', 'content': 'How '}\n",
|
||||||
|
"{'role': 'assistant', 'content': 'are '}\n",
|
||||||
|
"{'role': 'assistant', 'content': 'you '}\n",
|
||||||
|
"####################\n",
|
||||||
|
"\n",
|
||||||
|
"Response from command-nightly\n",
|
||||||
|
"{'role': 'assistant', 'content': ' Hello'}\n",
|
||||||
|
"{'role': 'assistant', 'content': '!'}\n",
|
||||||
|
"{'role': 'assistant', 'content': ' '}\n",
|
||||||
|
"{'role': 'assistant', 'content': ' I'}\n",
|
||||||
|
"{'role': 'assistant', 'content': \"'m\"}\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [],
|
||||||
|
"metadata": {
|
||||||
|
"id": "t7WMRuL-8NrO"
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue