new v bump

2023-08-14 10:57:14 -07:00 · 2023-08-14 10:57:14 -07:00 · 828f4e7bf8
commit 828f4e7bf8
parent 1b4aadbb25
4 changed files with 175 additions and 45 deletions
--- a/cookbook/liteLLM_VertextAI_Example.ipynb
+++ b/cookbook/liteLLM_VertextAI_Example.ipynb
@ -6,16 +6,45 @@
   "metadata": {},
   "source": [
    "## Using Google Palm (VertexAI) with liteLLM \n",
-    "### chat-bison & chat-bison@001"
+    "### chat-bison, chat-bison@001, text-bison, text-bison@001"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Defaulting to user installation because normal site-packages is not writeable\n",
+      "Requirement already satisfied: litellm==0.1.387 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (0.1.387)\n",
+      "Requirement already satisfied: openai<0.28.0,>=0.27.8 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from litellm==0.1.387) (0.27.8)\n",
+      "Requirement already satisfied: python-dotenv<2.0.0,>=1.0.0 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from litellm==0.1.387) (1.0.0)\n",
+      "Requirement already satisfied: tiktoken<0.5.0,>=0.4.0 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from litellm==0.1.387) (0.4.0)\n",
+      "Requirement already satisfied: requests>=2.20 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.387) (2.28.2)\n",
+      "Requirement already satisfied: tqdm in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.387) (4.65.0)\n",
+      "Requirement already satisfied: aiohttp in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from openai<0.28.0,>=0.27.8->litellm==0.1.387) (3.8.4)\n",
+      "Requirement already satisfied: regex>=2022.1.18 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from tiktoken<0.5.0,>=0.4.0->litellm==0.1.387) (2023.6.3)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.387) (3.1.0)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.387) (3.4)\n",
+      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.387) (1.26.6)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from requests>=2.20->openai<0.28.0,>=0.27.8->litellm==0.1.387) (2023.5.7)\n",
+      "Requirement already satisfied: attrs>=17.3.0 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.387) (23.1.0)\n",
+      "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.387) (6.0.4)\n",
+      "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.387) (4.0.2)\n",
+      "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.387) (1.9.2)\n",
+      "Requirement already satisfied: frozenlist>=1.1.1 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.387) (1.3.3)\n",
+      "Requirement already satisfied: aiosignal>=1.1.2 in /Users/ishaanjaffer/Library/Python/3.9/lib/python/site-packages (from aiohttp->openai<0.28.0,>=0.27.8->litellm==0.1.387) (1.3.1)\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n"
+     ]
+    }
+   ],
   "source": [
-    "!pip install litellm==0.1.379"
+    "!pip install litellm==0.1.387"
   ]
  },
  {
@ -31,12 +60,13 @@
    "\n",
    "VertexAI uses Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc for more information on setting this up\n",
    "\n",
-    "NOTE: VertexAI requires you to set `application_default_credentials.json` \n"
+    "NOTE: VertexAI requires you to set `application_default_credentials.json`, this can be set by running `gcloud auth application-default login` in your terminal\n",
+    "\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
@ -53,14 +83,22 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Call VertexAI using liteLLM"
+    "## Call VertexAI - chat-bison using liteLLM"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': LiteLLM LiteLLM is a large language model from Google AI that is designed to be lightweight and efficient. It is based on the Transformer architecture and has been trained on a massive dataset of text. LiteLLM is available as a pre-trained model that can be used for a variety of natural language processing tasks, such as text classification, question answering, and summarization.}}], 'created': 1692035587.573185, 'model': 'chat-bison'}\n"
+     ]
+    }
+   ],
   "source": [
    "user_message = \"what is liteLLM \"\n",
    "messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
@ -69,6 +107,74 @@
    "response = completion(model=\"chat-bison\", messages=messages)\n",
    "print(response)"
   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Call VertexAI - text-bison using liteLLM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['claude-2', 'claude-instant-1', 'claude-instant-1.2']\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(litellm.anthropic_models)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "Invalid completion model args passed in. Check your input - {'model': 'text-bison@001', 'messages': [{'content': 'what is liteLLM ', 'role': 'user'}], 'functions': [], 'function_call': '', 'temperature': 1, 'top_p': 1, 'n': 1, 'stream': False, 'stop': None, 'max_tokens': inf, 'presence_penalty': 0, 'frequency_penalty': 0, 'logit_bias': {}, 'user': '', 'deployment_id': None, 'return_async': False, 'api_key': None, 'force_timeout': 600, 'logger_fn': None, 'verbose': False, 'azure': False, 'custom_llm_provider': None, 'custom_api_base': None, 'args': {...}, 'model_response': {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant'}}]}, 'optional_params': {}, 'litellm_params': {'return_async': False, 'api_key': None, 'force_timeout': 600, 'logger_fn': None, 'verbose': False, 'custom_llm_provider': None, 'custom_api_base': None}}",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[4], line 5\u001b[0m\n\u001b[1;32m      2\u001b[0m messages \u001b[39m=\u001b[39m [{ \u001b[39m\"\u001b[39m\u001b[39mcontent\u001b[39m\u001b[39m\"\u001b[39m: user_message,\u001b[39m\"\u001b[39m\u001b[39mrole\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39muser\u001b[39m\u001b[39m\"\u001b[39m}]\n\u001b[1;32m      4\u001b[0m \u001b[39m# text-bison or text-bison@001 supported by Vertex AI (As of Aug 2023)\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m response \u001b[39m=\u001b[39m completion(model\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mtext-bison@001\u001b[39;49m\u001b[39m\"\u001b[39;49m, messages\u001b[39m=\u001b[39;49mmessages)\n\u001b[1;32m      6\u001b[0m \u001b[39mprint\u001b[39m(response)\n",
+      "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/litellm/utils.py:158\u001b[0m, in \u001b[0;36mclient.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    156\u001b[0m my_thread \u001b[39m=\u001b[39m threading\u001b[39m.\u001b[39mThread(target\u001b[39m=\u001b[39mhandle_failure, args\u001b[39m=\u001b[39m(e, traceback_exception, start_time, end_time, args, kwargs)) \u001b[39m# don't interrupt execution of main thread\u001b[39;00m\n\u001b[1;32m    157\u001b[0m my_thread\u001b[39m.\u001b[39mstart()\n\u001b[0;32m--> 158\u001b[0m \u001b[39mraise\u001b[39;00m e\n",
+      "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/litellm/utils.py:145\u001b[0m, in \u001b[0;36mclient.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    143\u001b[0m \u001b[39m## MODEL CALL\u001b[39;00m\n\u001b[1;32m    144\u001b[0m start_time \u001b[39m=\u001b[39m datetime\u001b[39m.\u001b[39mdatetime\u001b[39m.\u001b[39mnow()\n\u001b[0;32m--> 145\u001b[0m result \u001b[39m=\u001b[39m original_function(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m    146\u001b[0m end_time \u001b[39m=\u001b[39m datetime\u001b[39m.\u001b[39mdatetime\u001b[39m.\u001b[39mnow()\n\u001b[1;32m    147\u001b[0m \u001b[39m## LOG SUCCESS \u001b[39;00m\n",
+      "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/litellm/timeout.py:44\u001b[0m, in \u001b[0;36mtimeout.<locals>.decorator.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     42\u001b[0m     local_timeout_duration \u001b[39m=\u001b[39m kwargs[\u001b[39m\"\u001b[39m\u001b[39mforce_timeout\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[1;32m     43\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 44\u001b[0m     result \u001b[39m=\u001b[39m future\u001b[39m.\u001b[39;49mresult(timeout\u001b[39m=\u001b[39;49mlocal_timeout_duration)\n\u001b[1;32m     45\u001b[0m \u001b[39mexcept\u001b[39;00m futures\u001b[39m.\u001b[39mTimeoutError:\n\u001b[1;32m     46\u001b[0m     thread\u001b[39m.\u001b[39mstop_loop()\n",
+      "File \u001b[0;32m/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/concurrent/futures/_base.py:445\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    443\u001b[0m     \u001b[39mraise\u001b[39;00m CancelledError()\n\u001b[1;32m    444\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_state \u001b[39m==\u001b[39m FINISHED:\n\u001b[0;32m--> 445\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m__get_result()\n\u001b[1;32m    446\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    447\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mTimeoutError\u001b[39;00m()\n",
+      "File \u001b[0;32m/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/concurrent/futures/_base.py:390\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    388\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_exception:\n\u001b[1;32m    389\u001b[0m     \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 390\u001b[0m         \u001b[39mraise\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_exception\n\u001b[1;32m    391\u001b[0m     \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m    392\u001b[0m         \u001b[39m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[1;32m    393\u001b[0m         \u001b[39mself\u001b[39m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n",
+      "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/litellm/timeout.py:35\u001b[0m, in \u001b[0;36mtimeout.<locals>.decorator.<locals>.wrapper.<locals>.async_func\u001b[0;34m()\u001b[0m\n\u001b[1;32m     34\u001b[0m \u001b[39masync\u001b[39;00m \u001b[39mdef\u001b[39;00m \u001b[39masync_func\u001b[39m():\n\u001b[0;32m---> 35\u001b[0m     \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
+      "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/litellm/main.py:414\u001b[0m, in \u001b[0;36mcompletion\u001b[0;34m(model, messages, functions, function_call, temperature, top_p, n, stream, stop, max_tokens, presence_penalty, frequency_penalty, logit_bias, user, deployment_id, return_async, api_key, force_timeout, logger_fn, verbose, azure, custom_llm_provider, custom_api_base)\u001b[0m\n\u001b[1;32m    412\u001b[0m logging(model\u001b[39m=\u001b[39mmodel, \u001b[39minput\u001b[39m\u001b[39m=\u001b[39mmessages, custom_llm_provider\u001b[39m=\u001b[39mcustom_llm_provider, additional_args\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mmax_tokens\u001b[39m\u001b[39m\"\u001b[39m: max_tokens}, logger_fn\u001b[39m=\u001b[39mlogger_fn, exception\u001b[39m=\u001b[39me)\n\u001b[1;32m    413\u001b[0m \u001b[39m## Map to OpenAI Exception\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[39mraise\u001b[39;00m exception_type(model\u001b[39m=\u001b[39;49mmodel, original_exception\u001b[39m=\u001b[39;49me)\n",
+      "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/litellm/utils.py:644\u001b[0m, in \u001b[0;36mexception_type\u001b[0;34m(model, original_exception)\u001b[0m\n\u001b[1;32m    642\u001b[0m   \u001b[39mraise\u001b[39;00m e\n\u001b[1;32m    643\u001b[0m \u001b[39melse\u001b[39;00m: \u001b[39m# don't let an error with mapping interrupt the user from receiving an error from the llm api calls \u001b[39;00m\n\u001b[0;32m--> 644\u001b[0m    \u001b[39mraise\u001b[39;00m original_exception\n",
+      "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/litellm/utils.py:635\u001b[0m, in \u001b[0;36mexception_type\u001b[0;34m(model, original_exception)\u001b[0m\n\u001b[1;32m    633\u001b[0m       exception_mapping_worked \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n\u001b[1;32m    634\u001b[0m       \u001b[39mraise\u001b[39;00m RateLimitError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mCohereException - \u001b[39m\u001b[39m{\u001b[39;00moriginal_exception\u001b[39m.\u001b[39mmessage\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 635\u001b[0m   \u001b[39mraise\u001b[39;00m original_exception \u001b[39m# base case - return the original exception\u001b[39;00m\n\u001b[1;32m    636\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    637\u001b[0m   \u001b[39mraise\u001b[39;00m original_exception\n",
+      "File \u001b[0;32m~/Library/Python/3.9/lib/python/site-packages/litellm/main.py:408\u001b[0m, in \u001b[0;36mcompletion\u001b[0;34m(model, messages, functions, function_call, temperature, top_p, n, stream, stop, max_tokens, presence_penalty, frequency_penalty, logit_bias, user, deployment_id, return_async, api_key, force_timeout, logger_fn, verbose, azure, custom_llm_provider, custom_api_base)\u001b[0m\n\u001b[1;32m    406\u001b[0m     logging(model\u001b[39m=\u001b[39mmodel, \u001b[39minput\u001b[39m\u001b[39m=\u001b[39mmessages, custom_llm_provider\u001b[39m=\u001b[39mcustom_llm_provider, logger_fn\u001b[39m=\u001b[39mlogger_fn)\n\u001b[1;32m    407\u001b[0m     args \u001b[39m=\u001b[39m \u001b[39mlocals\u001b[39m()\n\u001b[0;32m--> 408\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mInvalid completion model args passed in. Check your input - \u001b[39m\u001b[39m{\u001b[39;00margs\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[1;32m    409\u001b[0m   \u001b[39mreturn\u001b[39;00m response\n\u001b[1;32m    410\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m    411\u001b[0m   \u001b[39m## LOGGING\u001b[39;00m\n",
+      "\u001b[0;31mValueError\u001b[0m: Invalid completion model args passed in. Check your input - {'model': 'text-bison@001', 'messages': [{'content': 'what is liteLLM ', 'role': 'user'}], 'functions': [], 'function_call': '', 'temperature': 1, 'top_p': 1, 'n': 1, 'stream': False, 'stop': None, 'max_tokens': inf, 'presence_penalty': 0, 'frequency_penalty': 0, 'logit_bias': {}, 'user': '', 'deployment_id': None, 'return_async': False, 'api_key': None, 'force_timeout': 600, 'logger_fn': None, 'verbose': False, 'azure': False, 'custom_llm_provider': None, 'custom_api_base': None, 'args': {...}, 'model_response': {'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant'}}]}, 'optional_params': {}, 'litellm_params': {'return_async': False, 'api_key': None, 'force_timeout': 600, 'logger_fn': None, 'verbose': False, 'custom_llm_provider': None, 'custom_api_base': None}}"
+     ]
+    }
+   ],
+   "source": [
+    "user_message = \"what is liteLLM \"\n",
+    "messages = [{ \"content\": user_message,\"role\": \"user\"}]\n",
+    "\n",
+    "# text-bison or text-bison@001 supported by Vertex AI (As of Aug 2023)\n",
+    "response = completion(model=\"text-bison@001\", messages=messages)\n",
+    "print(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {