getting started cookbook litellm

2025-04-24 10:14:26 +00:00 · 2023-09-23 10:38:59 -07:00 · 2023-09-23 10:38:59 -07:00 · a775e9b278
commit a775e9b278
parent eb970967fd
1 changed files with 123 additions and 63 deletions
--- a/cookbook/liteLLM_Getting_Started.ipynb
+++ b/cookbook/liteLLM_Getting_Started.ipynb
@ -7,15 +7,11 @@
        "id": "MZ01up0p7wOJ"
      },
      "source": [
-        "## 🚅 liteLLM Demo\n",
+        "## 🚅 liteLLM Quick Start Demo\n",
        "### TLDR: Call 50+ LLM APIs using chatGPT Input/Output format\n",
        "https://github.com/BerriAI/litellm\n",
        "\n",
        "liteLLM is package to simplify calling **OpenAI, Azure, Llama2, Cohere, Anthropic, Huggingface API Endpoints**. LiteLLM manages\n",
-        "\n",
-        "* Translating inputs to the provider's `completion()` and `embedding()` endpoints\n",
-        "* Guarantees consistent output, text responses will always be available at `['choices'][0]['message']['content']`\n",
-        "* Exception mapping - common exceptions across providers are mapped to the OpenAI exception types\n",
        "\n"
      ]
    },
@ -42,7 +38,7 @@
    },
    {
      "cell_type": "code",
-      "execution_count": 1,
+      "execution_count": 2,
      "metadata": {
        "id": "ArrWyG5b7QAG"
      },
@ -67,7 +63,7 @@
    },
    {
      "cell_type": "code",
-      "execution_count": 4,
+      "execution_count": 18,
      "metadata": {
        "id": "-h8Ga5cR7SvV"
      },
@ -76,23 +72,11 @@
        "# Only set keys for the LLMs you want to use\n",
        "os.environ['OPENAI_API_KEY'] = \"\" #@param\n",
        "os.environ[\"ANTHROPIC_API_KEY\"] = \"\" #@param\n",
+        "os.environ[\"REPLICATE_API_KEY\"] = \"\" #@param\n",
+        "os.environ[\"COHERE_API_KEY\"] = \"\" #@param\n",
        "os.environ[\"AZURE_API_BASE\"] = \"\" #@param\n",
        "os.environ[\"AZURE_API_VERSION\"] = \"\" #@param\n",
-        "os.environ[\"AZURE_API_KEY\"] = \"\" #@param\n",
-        "os.environ[\"REPLICATE_API_TOKEN\"] = \"\" #@param\n",
-        "os.environ[\"COHERE_API_KEY\"] = \"\" #@param\n",
-        "os.environ[\"HF_TOKEN\"] = \"\" #@param"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 2,
-      "metadata": {
-        "id": "MBujGiby8YBu"
-      },
-      "outputs": [],
-      "source": [
-        "messages = [{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}]"
+        "os.environ[\"AZURE_API_KEY\"] = \"\" #@param"
      ]
    },
    {
@ -107,7 +91,7 @@
    },
    {
      "cell_type": "code",
-      "execution_count": 3,
+      "execution_count": 13,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
@ -119,36 +103,37 @@
        {
          "data": {
            "text/plain": [
-              "<OpenAIObject chat.completion id=chatcmpl-7vYWJYYUeFuhjCiOjI9JXK6gNmWk3 at 0x1067d42c0> JSON: {\n",
-              "  \"id\": \"chatcmpl-7vYWJYYUeFuhjCiOjI9JXK6gNmWk3\",\n",
+              "<OpenAIObject chat.completion id=chatcmpl-820kPkRwSLml4X6165fWbZlEDOedr at 0x12ff93630> JSON: {\n",
+              "  \"id\": \"chatcmpl-820kPkRwSLml4X6165fWbZlEDOedr\",\n",
              "  \"object\": \"chat.completion\",\n",
-              "  \"created\": 1693951747,\n",
+              "  \"created\": 1695490221,\n",
              "  \"model\": \"gpt-3.5-turbo-0613\",\n",
              "  \"choices\": [\n",
              "    {\n",
              "      \"index\": 0,\n",
              "      \"message\": {\n",
              "        \"role\": \"assistant\",\n",
-              "        \"content\": \"I apologize, but as an AI language model, I do not have real-time data. However, you can easily find the current weather conditions in San Francisco, California by checking a trusted weather website or using a weather app on your smartphone.\"\n",
+              "        \"content\": \"I'm sorry, but as an AI text-based model, I don't have real-time information. However, you can check the current weather in San Francisco by searching for \\\"weather in SF\\\" on any search engine or checking a weather website or app.\"\n",
              "      },\n",
              "      \"finish_reason\": \"stop\"\n",
              "    }\n",
              "  ],\n",
              "  \"usage\": {\n",
              "    \"prompt_tokens\": 13,\n",
-              "    \"completion_tokens\": 48,\n",
-              "    \"total_tokens\": 61\n",
-              "  }\n",
+              "    \"completion_tokens\": 51,\n",
+              "    \"total_tokens\": 64\n",
+              "  },\n",
+              "  \"response_ms\": 2385.592\n",
              "}"
            ]
          },
-          "execution_count": 3,
+          "execution_count": 13,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
-        "completion(model=\"gpt-3.5-turbo\", messages=messages)"
+        "completion(model=\"gpt-3.5-turbo\", messages=[{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}])"
      ]
    },
    {
@ -163,7 +148,7 @@
    },
    {
      "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 19,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
@ -175,22 +160,38 @@
        {
          "data": {
            "text/plain": [
-              "{'choices': [{'finish_reason': 'stop',\n",
-              "   'index': 0,\n",
-              "   'message': {'role': 'assistant',\n",
-              "    'content': \" Unfortunately I do not have enough context to provide the current weather in San Francisco. To get the most accurate weather report, it's helpful if I know details like:\\n\\n- Exact location (city name, zip code, etc)\\n- Time frame (current conditions, forecast for a certain day/week, etc)\\n\\nIf you can provide some more specifics about what weather information you need for San Francisco, I'd be happy to look that up for you!\"}}],\n",
-              " 'created': 1691880836.974166,\n",
-              " 'model': 'claude-2',\n",
-              " 'usage': {'prompt_tokens': 18, 'completion_tokens': 95, 'total_tokens': 113}}"
+              "<ModelResponse chat.completion id=chatcmpl-6d1a40c0-19c0-4bd7-9ca2-a91d8b8c2295 at 0x12ff85a40> JSON: {\n",
+              "  \"object\": \"chat.completion\",\n",
+              "  \"choices\": [\n",
+              "    {\n",
+              "      \"finish_reason\": \"stop_sequence\",\n",
+              "      \"index\": 0,\n",
+              "      \"message\": {\n",
+              "        \"content\": \" Unfortunately I don't have enough context to know the exact location you are asking about when you say \\\"SF\\\". SF could refer to San Francisco, California, or potentially other cities that go by SF as an abbreviation. To get an accurate weather report, it would be helpful if you could provide the full city name and state/country. If you are looking for the weather in San Francisco, California, I would be happy to provide that forecast. Please let me know the specific location you want the weather for.\",\n",
+              "        \"role\": \"assistant\",\n",
+              "        \"logprobs\": null\n",
+              "      }\n",
+              "    }\n",
+              "  ],\n",
+              "  \"id\": \"chatcmpl-6d1a40c0-19c0-4bd7-9ca2-a91d8b8c2295\",\n",
+              "  \"created\": 1695490260.983768,\n",
+              "  \"response_ms\": 6351.544,\n",
+              "  \"model\": \"claude-2\",\n",
+              "  \"usage\": {\n",
+              "    \"prompt_tokens\": 14,\n",
+              "    \"completion_tokens\": 102,\n",
+              "    \"total_tokens\": 116\n",
+              "  }\n",
+              "}"
            ]
          },
-          "execution_count": 11,
+          "execution_count": 19,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
-        "completion(model=\"claude-2\", messages=messages)"
+        "completion(model=\"claude-2\", messages=[{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}])"
      ]
    },
    {
@ -205,7 +206,7 @@
    },
    {
      "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 17,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
@ -217,23 +218,40 @@
        {
          "data": {
            "text/plain": [
-              "{'choices': [{'finish_reason': 'stop',\n",
-              "   'index': 0,\n",
-              "   'message': {'role': 'assistant',\n",
-              "    'content': ' I\\'m happy to help! However, I must point out that the question \"what\\'s the weather in SF\" doesn\\'t make sense as \"SF\" could refer to multiple locations (San Francisco, South Florida, San Fernando, etc.). Could you please provide more context or specify which location you\\'re referring to? That way, I can give you an accurate answer.'}}],\n",
-              " 'created': 1691880930.9003325,\n",
-              " 'model': 'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1',\n",
-              " 'usage': {'prompt_tokens': 6, 'completion_tokens': 74, 'total_tokens': 80}}"
+              "<ModelResponse chat.completion id=chatcmpl-3151c2eb-b26f-4c96-89b5-ed1746b219e0 at 0x138b87e50> JSON: {\n",
+              "  \"object\": \"chat.completion\",\n",
+              "  \"choices\": [\n",
+              "    {\n",
+              "      \"finish_reason\": \"stop\",\n",
+              "      \"index\": 0,\n",
+              "      \"message\": {\n",
+              "        \"content\": \" I'm happy to help! However, I must point out that the question \\\"what's the weather in SF\\\" doesn't make sense as \\\"SF\\\" could refer to multiple locations. Could you please clarify which location you are referring to? San Francisco, California or Sioux Falls, South Dakota? Once I have more context, I would be happy to provide you with accurate and reliable information.\",\n",
+              "        \"role\": \"assistant\",\n",
+              "        \"logprobs\": null\n",
+              "      }\n",
+              "    }\n",
+              "  ],\n",
+              "  \"id\": \"chatcmpl-3151c2eb-b26f-4c96-89b5-ed1746b219e0\",\n",
+              "  \"created\": 1695490237.714101,\n",
+              "  \"response_ms\": 12109.565,\n",
+              "  \"model\": \"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\",\n",
+              "  \"usage\": {\n",
+              "    \"prompt_tokens\": 6,\n",
+              "    \"completion_tokens\": 78,\n",
+              "    \"total_tokens\": 84\n",
+              "  },\n",
+              "  \"ended\": 1695490249.821266\n",
+              "}"
            ]
          },
-          "execution_count": 13,
+          "execution_count": 17,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "model = \"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1\"\n",
-        "completion(model=model, messages=messages)"
+        "completion(model=model, messages=[{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}])"
      ]
    },
    {
@ -248,7 +266,7 @@
    },
    {
      "cell_type": "code",
-      "execution_count": 7,
+      "execution_count": 16,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
@ -260,35 +278,38 @@
        {
          "data": {
            "text/plain": [
-              "<ModelResponse at 0x11cb0c3b0> JSON: {\n",
+              "<ModelResponse chat.completion id=chatcmpl-dc0d8ead-071d-486c-a111-78975b38794b at 0x1389725e0> JSON: {\n",
+              "  \"object\": \"chat.completion\",\n",
              "  \"choices\": [\n",
              "    {\n",
              "      \"finish_reason\": \"stop\",\n",
              "      \"index\": 0,\n",
              "      \"message\": {\n",
-              "        \"content\": \" The weather in San Francisco can be quite unpredictable and varies throughout the year. In general, the city\",\n",
+              "        \"content\": \" As an AI model I don't have access to real-time data, so I can't tell\",\n",
              "        \"role\": \"assistant\",\n",
              "        \"logprobs\": null\n",
              "      }\n",
              "    }\n",
              "  ],\n",
-              "  \"created\": 1693951797.3149078,\n",
+              "  \"id\": \"chatcmpl-dc0d8ead-071d-486c-a111-78975b38794b\",\n",
+              "  \"created\": 1695490235.936903,\n",
+              "  \"response_ms\": 1022.6759999999999,\n",
              "  \"model\": \"command-nightly\",\n",
              "  \"usage\": {\n",
              "    \"prompt_tokens\": 6,\n",
-              "    \"completion_tokens\": 20,\n",
-              "    \"total_tokens\": 26\n",
+              "    \"completion_tokens\": 19,\n",
+              "    \"total_tokens\": 25\n",
              "  }\n",
              "}"
            ]
          },
-          "execution_count": 7,
+          "execution_count": 16,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
-        "completion(model=\"command-nightly\", messages=messages)"
+        "completion(model=\"command-nightly\", messages=[{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}])"
      ]
    },
    {
@ -311,7 +332,7 @@
    },
    {
      "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 15,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
@ -319,10 +340,49 @@
        "id": "AvLjR-PF-lt0",
        "outputId": "deff2db3-b003-48cd-ea62-c03a68a4464a"
      },
-      "outputs": [],
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "<OpenAIObject chat.completion id=chatcmpl-820kZyCwbNvZATiLkNmXmpxxzvTKO at 0x138b84ae0> JSON: {\n",
+              "  \"id\": \"chatcmpl-820kZyCwbNvZATiLkNmXmpxxzvTKO\",\n",
+              "  \"object\": \"chat.completion\",\n",
+              "  \"created\": 1695490231,\n",
+              "  \"model\": \"gpt-35-turbo\",\n",
+              "  \"choices\": [\n",
+              "    {\n",
+              "      \"index\": 0,\n",
+              "      \"finish_reason\": \"stop\",\n",
+              "      \"message\": {\n",
+              "        \"role\": \"assistant\",\n",
+              "        \"content\": \"Sorry, as an AI language model, I don't have real-time information. Please check your preferred weather website or app for the latest weather updates of San Francisco.\"\n",
+              "      }\n",
+              "    }\n",
+              "  ],\n",
+              "  \"usage\": {\n",
+              "    \"completion_tokens\": 33,\n",
+              "    \"prompt_tokens\": 14,\n",
+              "    \"total_tokens\": 47\n",
+              "  },\n",
+              "  \"response_ms\": 1499.529\n",
+              "}"
+            ]
+          },
+          "execution_count": 15,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
      "source": [
-        "completion(model=\"azure/chatgpt-test\", messages=messages)"
+        "completion(model=\"azure/chatgpt-v-2\", messages=[{ \"content\": \"what's the weather in SF\",\"role\": \"user\"}])"
      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": []
    }
  ],
  "metadata": {