litellm-mirror/cookbook/LiteLLM_Azure_OpenAI.ipynb

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "## Use Azure OpenAI with LiteLLM"
      ],
      "metadata": {
        "id": "oTA-1bG_wBVw"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "RreFKTyKv2nt"
      },
      "outputs": [],
      "source": [
        "!pip install litellm"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Pass API_BASE, API_VERSION, API_KEY in COMPLETION()"
      ],
      "metadata": {
        "id": "kSOo9lbKv_7H"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import litellm\n",
        "response = litellm.completion(\n",
        "    model = \"azure/chatgpt-v-2\",                                  # model = azure/<your deployment name>\n",
        "    api_base = \"https://openai-gpt-4-test-v-1.openai.azure.com/\", # azure api base\n",
        "    api_version = \"2023-05-15\",                                   # azure api version\n",
        "    api_key = \"\",                                                 # azure api key\n",
        "    messages = [{\"role\": \"user\", \"content\": \"good morning\"}],\n",
        "    max_tokens=10,\n",
        ")\n",
        "print(response)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gWIsjrHMv_DM",
        "outputId": "732e9daa-8dca-4bc1-bb8a-aee90ee14c8d"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{\n",
            "  \"id\": \"chatcmpl-877x4J2JUSReOuxVGE3THLjcmdrI8\",\n",
            "  \"object\": \"chat.completion\",\n",
            "  \"created\": 1696709554,\n",
            "  \"model\": \"gpt-35-turbo\",\n",
            "  \"choices\": [\n",
            "    {\n",
            "      \"index\": 0,\n",
            "      \"finish_reason\": \"length\",\n",
            "      \"message\": {\n",
            "        \"role\": \"assistant\",\n",
            "        \"content\": \"Good morning! How can I assist you today?\"\n",
            "      }\n",
            "    }\n",
            "  ],\n",
            "  \"usage\": {\n",
            "    \"completion_tokens\": 10,\n",
            "    \"prompt_tokens\": 10,\n",
            "    \"total_tokens\": 20\n",
            "  }\n",
            "}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [],
      "metadata": {
        "id": "PR5uhvVHxe-C"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Set .env variables with Azure / LiteLLM"
      ],
      "metadata": {
        "id": "1P2hprlLxfDc"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import litellm\n",
        "import os\n",
        "\n",
        "os.environ['AZURE_API_KEY'] = \"\"\n",
        "os.environ['AZURE_API_BASE'] = \"\"\n",
        "os.environ['AZURE_API_VERSION'] = \"\"\n",
        "\n",
        "response = litellm.completion(\n",
        "    model = \"azure/chatgpt-v-2\", # model = azure/<your deployment name>\n",
        "    messages = [{\"role\": \"user\", \"content\": \"good morning\"}],\n",
        "    max_tokens=10,\n",
        ")\n",
        "print(response)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "mGi9Gae1wMjK",
        "outputId": "29f2a9cf-f6ee-416b-9b24-02588d96fe59"
      },
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{\n",
            "  \"id\": \"chatcmpl-877zB0GWZl4zswopLt12yQEzEfYWy\",\n",
            "  \"object\": \"chat.completion\",\n",
            "  \"created\": 1696709685,\n",
            "  \"model\": \"gpt-35-turbo\",\n",
            "  \"choices\": [\n",
            "    {\n",
            "      \"index\": 0,\n",
            "      \"finish_reason\": \"length\",\n",
            "      \"message\": {\n",
            "        \"role\": \"assistant\",\n",
            "        \"content\": \"Good morning! How can I assist you today?\"\n",
            "      }\n",
            "    }\n",
            "  ],\n",
            "  \"usage\": {\n",
            "    \"completion_tokens\": 10,\n",
            "    \"prompt_tokens\": 10,\n",
            "    \"total_tokens\": 20\n",
            "  }\n",
            "}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## With Streaming"
      ],
      "metadata": {
        "id": "uIhyvSVNx4hX"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "response = litellm.completion(\n",
        "    model = \"azure/chatgpt-v-2\",\n",
        "    messages = [{\"role\": \"user\", \"content\": \"good morning\"}],\n",
        "    max_tokens=10,\n",
        "    stream=True\n",
        ")\n",
        "\n",
        "for chunk in response:\n",
        "  print(chunk)"
      ],
      "metadata": {
        "id": "R4KYKLOHxy9r"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## With Rate Limit Handler"
      ],
      "metadata": {
        "id": "hB8jLz94ybTC"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from litellm import RateLimitManager\n",
        "\n",
        "handler = RateLimitManager(max_requests_per_minute=10, max_tokens_per_minute=200)\n",
        "\n",
        "response = await handler.acompletion(\n",
        "    model = \"azure/chatgpt-v-2\",\n",
        "    messages = [{\"role\": \"user\", \"content\": \"good morning\"}],\n",
        "    max_tokens=10,\n",
        ")\n",
        "print(response)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "CQECDwpix7Hl",
        "outputId": "18dc4041-3262-4ab7-a451-34ceaf70ca31"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{\n",
            "  \"id\": \"chatcmpl-8781gvDKwPbp44CliumABgAuIDnSf\",\n",
            "  \"object\": \"chat.completion\",\n",
            "  \"created\": 1696709840,\n",
            "  \"model\": \"gpt-35-turbo\",\n",
            "  \"choices\": [\n",
            "    {\n",
            "      \"index\": 0,\n",
            "      \"finish_reason\": \"length\",\n",
            "      \"message\": {\n",
            "        \"role\": \"assistant\",\n",
            "        \"content\": \"Good morning! How can I assist you today?\"\n",
            "      }\n",
            "    }\n",
            "  ],\n",
            "  \"usage\": {\n",
            "    \"completion_tokens\": 10,\n",
            "    \"prompt_tokens\": 10,\n",
            "    \"total_tokens\": 20\n",
            "  }\n",
            "}\n"
          ]
        }
      ]
    }
  ]
}