From b784e9dfe5c354c0a23d2b1a996ec21cb2285f96 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 14 Aug 2023 11:15:17 -0700 Subject: [PATCH] with notebook working --- cookbook/liteLLM_VertextAI_Example.ipynb | 107 +++++++++++------------ 1 file changed, 52 insertions(+), 55 deletions(-) diff --git a/cookbook/liteLLM_VertextAI_Example.ipynb b/cookbook/liteLLM_VertextAI_Example.ipynb index 7e8e13589..d94d24cce 100644 --- a/cookbook/liteLLM_VertextAI_Example.ipynb +++ b/cookbook/liteLLM_VertextAI_Example.ipynb @@ -11,40 +11,11 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Defaulting to user installation because normal site-packages is not writeable\n", - "Requirement already satisfied: litellm==0.1.387 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (0.1.387)\n", - "Requirement already satisfied: openai<0.28.0,>=0.27.8 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from litellm==0.1.387) (0.27.8)\n", - "Requirement already satisfied: python-dotenv<2.0.0,>=1.0.0 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from litellm==0.1.387) (1.0.0)\n", - "Requirement already satisfied: tiktoken<0.5.0,>=0.4.0 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from litellm==0.1.387) (0.4.0)\n", - "Requirement already satisfied: requests>=2.20 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.387) (2.28.2)\n", - "Requirement already satisfied: tqdm in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.387) (4.65.0)\n", - "Requirement already satisfied: aiohttp in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.387) (3.8.4)\n", - "Requirement already satisfied: regex>=2022.1.18 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from tiktoken<0.5.0,>=0.4.0->litellm==0.1.387) (2023.6.3)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.387) (3.1.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.387) (3.4)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.387) (1.26.6)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.387) (2023.5.7)\n", - "Requirement already satisfied: attrs>=17.3.0 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.387) (23.1.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.387) (6.0.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.387) (4.0.2)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.387) (1.9.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.387) (1.3.3)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.387) (1.3.1)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ - "!pip install litellm==0.1.387" + "!pip install litellm==0.1.388" ] }, { @@ -88,14 +59,14 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': LiteLLM LiteLLM is a large language model from Google AI that is designed to be lightweight and efficient. It is based on the Transformer architecture and has been trained on a massive dataset of text. LiteLLM is available as a pre-trained model that can be used for a variety of natural language processing tasks, such as text classification, question answering, and summarization.}}], 'created': 1692035587.573185, 'model': 'chat-bison'}\n" + "{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': LiteLLM LiteLLM is a large language model from Google AI that is designed to be lightweight and efficient. It is based on the Transformer architecture and has been trained on a massive dataset of text. LiteLLM is available as a pre-trained model that can be used for a variety of natural language processing tasks, such as text classification, question answering, and summarization.}}], 'created': 1692036777.831989, 'model': 'chat-bison'}\n" ] } ], @@ -118,19 +89,19 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "['claude-2', 'claude-instant-1', 'claude-instant-1.2']\n" + "['text-bison', 'text-bison@001']\n" ] } ], "source": [ - "print(litellm.anthropic_models)" + "print(litellm.vertex_text_models)" ] }, { @@ -139,24 +110,10 @@ "metadata": {}, "outputs": [ { - "ename": "ValueError", - "evalue": "Invalid completion model args passed in. Check your input - {'model': 'text-bison@001', 'messages': [{'content': 'what is liteLLM ', 'role': 'user'}], 'functions': [], 'function_call': '', 'temperature': 1, 'top_p': 1, 'n': 1, 'stream': False, 'stop': None, 'max_tokens': inf, 'presence_penalty': 0, 'frequency_penalty': 0, 'logit_bias': {}, 'user': '', 'deployment_id': None, 'return_async': False, 'api_key': None, 'force_timeout': 600, 'logger_fn': None, 'verbose': False, 'azure': False, 'custom_llm_provider': None, 'custom_api_base': None, 'args': {...}, 'model_response': {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant'}}]}, 'optional_params': {}, 'litellm_params': {'return_async': False, 'api_key': None, 'force_timeout': 600, 'logger_fn': None, 'verbose': False, 'custom_llm_provider': None, 'custom_api_base': None}}", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[4], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m messages \u001b[39m=\u001b[39m [{ \u001b[39m\"\u001b[39m\u001b[39mcontent\u001b[39m\u001b[39m\"\u001b[39m: user_message,\u001b[39m\"\u001b[39m\u001b[39mrole\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39muser\u001b[39m\u001b[39m\"\u001b[39m}]\n\u001b[1;32m 4\u001b[0m \u001b[39m# text-bison or text-bison@001 supported by Vertex AI (As of Aug 2023)\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m response \u001b[39m=\u001b[39m completion(model\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mtext-bison@001\u001b[39;49m\u001b[39m\"\u001b[39;49m, messages\u001b[39m=\u001b[39;49mmessages)\n\u001b[1;32m 6\u001b[0m \u001b[39mprint\u001b[39m(response)\n", - "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/litellm/utils.py:158\u001b[0m, in \u001b[0;36mclient..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 156\u001b[0m my_thread \u001b[39m=\u001b[39m threading\u001b[39m.\u001b[39mThread(target\u001b[39m=\u001b[39mhandle_failure, args\u001b[39m=\u001b[39m(e, traceback_exception, start_time, end_time, args, kwargs)) \u001b[39m# don't interrupt execution of main thread\u001b[39;00m\n\u001b[1;32m 157\u001b[0m my_thread\u001b[39m.\u001b[39mstart()\n\u001b[0;32m--> 158\u001b[0m \u001b[39mraise\u001b[39;00m e\n", - "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/litellm/utils.py:145\u001b[0m, in \u001b[0;36mclient..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 143\u001b[0m \u001b[39m## MODEL CALL\u001b[39;00m\n\u001b[1;32m 144\u001b[0m start_time \u001b[39m=\u001b[39m datetime\u001b[39m.\u001b[39mdatetime\u001b[39m.\u001b[39mnow()\n\u001b[0;32m--> 145\u001b[0m result \u001b[39m=\u001b[39m original_function(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 146\u001b[0m end_time \u001b[39m=\u001b[39m datetime\u001b[39m.\u001b[39mdatetime\u001b[39m.\u001b[39mnow()\n\u001b[1;32m 147\u001b[0m \u001b[39m## LOG SUCCESS \u001b[39;00m\n", - "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/litellm/timeout.py:44\u001b[0m, in \u001b[0;36mtimeout..decorator..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 42\u001b[0m local_timeout_duration \u001b[39m=\u001b[39m kwargs[\u001b[39m\"\u001b[39m\u001b[39mforce_timeout\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[1;32m 43\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 44\u001b[0m result \u001b[39m=\u001b[39m future\u001b[39m.\u001b[39;49mresult(timeout\u001b[39m=\u001b[39;49mlocal_timeout_duration)\n\u001b[1;32m 45\u001b[0m \u001b[39mexcept\u001b[39;00m futures\u001b[39m.\u001b[39mTimeoutError:\n\u001b[1;32m 46\u001b[0m thread\u001b[39m.\u001b[39mstop_loop()\n", - "File \u001b[0;32m/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/concurrent/futures/_base.py:445\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 443\u001b[0m \u001b[39mraise\u001b[39;00m CancelledError()\n\u001b[1;32m 444\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_state \u001b[39m==\u001b[39m FINISHED:\n\u001b[0;32m--> 445\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m__get_result()\n\u001b[1;32m 446\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 447\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mTimeoutError\u001b[39;00m()\n", - "File \u001b[0;32m/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/concurrent/futures/_base.py:390\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 388\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_exception:\n\u001b[1;32m 389\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 390\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_exception\n\u001b[1;32m 391\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m 392\u001b[0m \u001b[39m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[1;32m 393\u001b[0m \u001b[39mself\u001b[39m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n", - "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/litellm/timeout.py:35\u001b[0m, in \u001b[0;36mtimeout..decorator..wrapper..async_func\u001b[0;34m()\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[39masync\u001b[39;00m \u001b[39mdef\u001b[39;00m \u001b[39masync_func\u001b[39m():\n\u001b[0;32m---> 35\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", - "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/litellm/main.py:414\u001b[0m, in \u001b[0;36mcompletion\u001b[0;34m(model, messages, functions, function_call, temperature, top_p, n, stream, stop, max_tokens, presence_penalty, frequency_penalty, logit_bias, user, deployment_id, return_async, api_key, force_timeout, logger_fn, verbose, azure, custom_llm_provider, custom_api_base)\u001b[0m\n\u001b[1;32m 412\u001b[0m logging(model\u001b[39m=\u001b[39mmodel, \u001b[39minput\u001b[39m\u001b[39m=\u001b[39mmessages, custom_llm_provider\u001b[39m=\u001b[39mcustom_llm_provider, additional_args\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mmax_tokens\u001b[39m\u001b[39m\"\u001b[39m: max_tokens}, logger_fn\u001b[39m=\u001b[39mlogger_fn, exception\u001b[39m=\u001b[39me)\n\u001b[1;32m 413\u001b[0m \u001b[39m## Map to OpenAI Exception\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[39mraise\u001b[39;00m exception_type(model\u001b[39m=\u001b[39;49mmodel, original_exception\u001b[39m=\u001b[39;49me)\n", - "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/litellm/utils.py:644\u001b[0m, in \u001b[0;36mexception_type\u001b[0;34m(model, original_exception)\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[39mraise\u001b[39;00m e\n\u001b[1;32m 643\u001b[0m \u001b[39melse\u001b[39;00m: \u001b[39m# don't let an error with mapping interrupt the user from receiving an error from the llm api calls \u001b[39;00m\n\u001b[0;32m--> 644\u001b[0m \u001b[39mraise\u001b[39;00m original_exception\n", - "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/litellm/utils.py:635\u001b[0m, in \u001b[0;36mexception_type\u001b[0;34m(model, original_exception)\u001b[0m\n\u001b[1;32m 633\u001b[0m exception_mapping_worked \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n\u001b[1;32m 634\u001b[0m \u001b[39mraise\u001b[39;00m RateLimitError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mCohereException - \u001b[39m\u001b[39m{\u001b[39;00moriginal_exception\u001b[39m.\u001b[39mmessage\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 635\u001b[0m \u001b[39mraise\u001b[39;00m original_exception \u001b[39m# base case - return the original exception\u001b[39;00m\n\u001b[1;32m 636\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 637\u001b[0m \u001b[39mraise\u001b[39;00m original_exception\n", - "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/litellm/main.py:408\u001b[0m, in \u001b[0;36mcompletion\u001b[0;34m(model, messages, functions, function_call, temperature, top_p, n, stream, stop, max_tokens, presence_penalty, frequency_penalty, logit_bias, user, deployment_id, return_async, api_key, force_timeout, logger_fn, verbose, azure, custom_llm_provider, custom_api_base)\u001b[0m\n\u001b[1;32m 406\u001b[0m logging(model\u001b[39m=\u001b[39mmodel, \u001b[39minput\u001b[39m\u001b[39m=\u001b[39mmessages, custom_llm_provider\u001b[39m=\u001b[39mcustom_llm_provider, logger_fn\u001b[39m=\u001b[39mlogger_fn)\n\u001b[1;32m 407\u001b[0m args \u001b[39m=\u001b[39m \u001b[39mlocals\u001b[39m()\n\u001b[0;32m--> 408\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mInvalid completion model args passed in. Check your input - \u001b[39m\u001b[39m{\u001b[39;00margs\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 409\u001b[0m \u001b[39mreturn\u001b[39;00m response\n\u001b[1;32m 410\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 411\u001b[0m \u001b[39m## LOGGING\u001b[39;00m\n", - "\u001b[0;31mValueError\u001b[0m: Invalid completion model args passed in. Check your input - {'model': 'text-bison@001', 'messages': [{'content': 'what is liteLLM ', 'role': 'user'}], 'functions': [], 'function_call': '', 'temperature': 1, 'top_p': 1, 'n': 1, 'stream': False, 'stop': None, 'max_tokens': inf, 'presence_penalty': 0, 'frequency_penalty': 0, 'logit_bias': {}, 'user': '', 'deployment_id': None, 'return_async': False, 'api_key': None, 'force_timeout': 600, 'logger_fn': None, 'verbose': False, 'azure': False, 'custom_llm_provider': None, 'custom_api_base': None, 'args': {...}, 'model_response': {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant'}}]}, 'optional_params': {}, 'litellm_params': {'return_async': False, 'api_key': None, 'force_timeout': 600, 'logger_fn': None, 'verbose': False, 'custom_llm_provider': None, 'custom_api_base': None}}" + "name": "stdout", + "output_type": "stream", + "text": [ + "{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': liteLLM is a low-precision variant of the large language model LLM 5. For a given text prompt, liteLLM can continue the text in a way that is both coherent and informative.}}], 'created': 1692036813.052487, 'model': 'text-bison@001'}\n" ] } ], @@ -169,6 +126,46 @@ "print(response)" ] }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': liteLLM was originally developed by Google engineers as a lite version of LLM, which stands for large language model. It is a deep learning language model that is designed to be more efficient than traditional LLMs while still achieving comparable performance. liteLLM is built on Tensor2Tensor, a framework for building and training large neural networks. It is able to learn from massive amounts of text data and generate text that is both coherent and informative. liteLLM has been shown to be effective for a variety of tasks, including machine translation, text summarization, and question answering.}}], 'created': 1692036821.60951, 'model': 'text-bison'}\n" + ] + } + ], + "source": [ + "response = completion(model=\"text-bison\", messages=messages)\n", + "print(response)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "liteLLM is a lightweight language model that is designed to be fast and efficient. It is based on the Transformer architecture, but it has been modified to reduce the number of parameters and the amount of computation required. This makes it suitable for use on devices with limited resources, such as mobile phones and embedded systems.\n", + "\n", + "liteLLM is still under development, but it has already been shown to be effective on a variety of tasks, including text classification, natural language inference, and machine translation. It is also being used to develop new applications, such as chatbots and language assistants.\n", + "\n", + "If you are interested in learning more about lite\n" + ] + } + ], + "source": [ + "response = completion(model=\"text-bison@001\", messages=messages, temperature=0.4, top_k=10, top_p=0.2)\n", + "print(response['choices'][0]['message']['content'])" + ] + }, { "cell_type": "code", "execution_count": null,