{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# LiteLLM - Azure OpenAI + OpenAI Calls\n", "This notebook covers the following for Azure OpenAI + OpenAI:\n", "* Completion - Quick start\n", "* Completion - Streaming\n", "* Completion - Azure, OpenAI in separate threads\n", "* Completion - Stress Test 10 requests in parallel\n", "* Completion - Azure, OpenAI in the same thread" ], "metadata": { "id": "BmX0b5Ueh91v" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "iHq4d0dpfawS" }, "outputs": [], "source": [ "!pip install litellm" ] }, { "cell_type": "code", "source": [ "import os, litellm" ], "metadata": { "id": "mnveHO5dfcB0" }, "execution_count": 2, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Completion - Quick start" ], "metadata": { "id": "eo88QUdbiDIE" } }, { "cell_type": "code", "source": [ "import os\n", "from litellm import completion\n", "\n", "# openai configs\n", "os.environ[\"OPENAI_API_KEY\"] = \"\"\n", "\n", "# azure openai configs\n", "os.environ[\"AZURE_API_KEY\"] = \"\"\n", "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n", "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n", "\n", "\n", "# openai call\n", "response = completion(\n", " model = \"gpt-3.5-turbo\",\n", " messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n", ")\n", "print(\"Openai Response\\n\")\n", "print(response)\n", "\n", "\n", "\n", "# azure call\n", "response = completion(\n", " model = \"azure/your-azure-deployment\",\n", " messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n", ")\n", "print(\"Azure Response\\n\")\n", "print(response)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5OSosWNCfc_2", "outputId": "c52344b1-2458-4695-a7eb-a9b076893348" }, "execution_count": 12, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Openai Response\n", "\n", "{\n", " \"id\": \"chatcmpl-7yjVOEKCPw2KdkfIaM3Ao1tIXp8EM\",\n", " \"object\": \"chat.completion\",\n", " \"created\": 1694708958,\n", " \"model\": \"gpt-3.5-turbo-0613\",\n", " \"choices\": [\n", " {\n", " \"index\": 0,\n", " \"message\": {\n", " \"role\": \"assistant\",\n", " \"content\": \"I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you?\"\n", " },\n", " \"finish_reason\": \"stop\"\n", " }\n", " ],\n", " \"usage\": {\n", " \"prompt_tokens\": 13,\n", " \"completion_tokens\": 26,\n", " \"total_tokens\": 39\n", " }\n", "}\n", "Azure Response\n", "\n", "{\n", " \"id\": \"chatcmpl-7yjVQ6m2R2HRtnKHRRFp6JzL4Fjez\",\n", " \"object\": \"chat.completion\",\n", " \"created\": 1694708960,\n", " \"model\": \"gpt-35-turbo\",\n", " \"choices\": [\n", " {\n", " \"index\": 0,\n", " \"finish_reason\": \"stop\",\n", " \"message\": {\n", " \"role\": \"assistant\",\n", " \"content\": \"Hello there! As an AI language model, I don't have feelings but I'm functioning well. How can I assist you today?\"\n", " }\n", " }\n", " ],\n", " \"usage\": {\n", " \"completion_tokens\": 27,\n", " \"prompt_tokens\": 14,\n", " \"total_tokens\": 41\n", " }\n", "}\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Completion - Streaming" ], "metadata": { "id": "dQMkM-diiKdE" } }, { "cell_type": "code", "source": [ "import os\n", "from litellm import completion\n", "\n", "# openai configs\n", "os.environ[\"OPENAI_API_KEY\"] = \"\"\n", "\n", "# azure openai configs\n", "os.environ[\"AZURE_API_KEY\"] = \"\"\n", "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n", "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n", "\n", "\n", "# openai call\n", "response = completion(\n", " model = \"gpt-3.5-turbo\",\n", " messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n", " stream=True\n", ")\n", "print(\"OpenAI Streaming response\")\n", "for chunk in response:\n", " print(chunk)\n", "\n", "# azure call\n", "response = completion(\n", " model = \"azure/your-azure-deployment\",\n", " messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n", " stream=True\n", ")\n", "print(\"Azure Streaming response\")\n", "for chunk in response:\n", " print(chunk)\n" ], "metadata": { "id": "uVvJDVn4g1i1" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Completion - Azure, OpenAI in separate threads" ], "metadata": { "id": "4xrOPnt-oqwm" } }, { "cell_type": "code", "source": [ "import os\n", "import threading\n", "from litellm import completion\n", "\n", "# Function to make a completion call\n", "def make_completion(model, messages):\n", " response = completion(\n", " model=model,\n", " messages=messages\n", " )\n", "\n", " print(f\"Response for {model}: {response}\")\n", "\n", "# openai configs\n", "os.environ[\"OPENAI_API_KEY\"] = \"\"\n", "\n", "# azure openai configs\n", "os.environ[\"AZURE_API_KEY\"] = \"\"\n", "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n", "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n", "\n", "# Define the messages for the completions\n", "messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n", "\n", "# Create threads for making the completions\n", "thread1 = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\", messages))\n", "thread2 = threading.Thread(target=make_completion, args=(\"azure/your-azure-deployment\", messages))\n", "\n", "# Start both threads\n", "thread1.start()\n", "thread2.start()\n", "\n", "# Wait for both threads to finish\n", "thread1.join()\n", "thread2.join()\n", "\n", "print(\"Both completions are done.\")" ], "metadata": { "id": "V5b5taJPjvC3" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Completion - Stress Test 10 requests in parallel\n", "\n" ], "metadata": { "id": "lx8DbMBqoAoN" } }, { "cell_type": "code", "source": [ "import os\n", "import threading\n", "from litellm import completion\n", "\n", "# Function to make a completion call\n", "def make_completion(model, messages):\n", " response = completion(\n", " model=model,\n", " messages=messages\n", " )\n", "\n", " print(f\"Response for {model}: {response}\")\n", "\n", "# Set your API keys\n", "os.environ[\"OPENAI_API_KEY\"] = \"\"\n", "os.environ[\"AZURE_API_KEY\"] = \"\"\n", "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n", "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n", "\n", "# Define the messages for the completions\n", "messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n", "\n", "# Create and start 10 threads for making completions\n", "threads = []\n", "for i in range(10):\n", " thread = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\" if i % 2 == 0 else \"azure/your-azure-deployment\", messages))\n", " threads.append(thread)\n", " thread.start()\n", "\n", "# Wait for all threads to finish\n", "for thread in threads:\n", " thread.join()\n", "\n", "print(\"All completions are done.\")\n" ], "metadata": { "id": "pHYANOlOkoDh" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Completion - Azure, OpenAI in the same thread" ], "metadata": { "id": "yB2NDOO4oxrp" } }, { "cell_type": "code", "source": [ "import os\n", "from litellm import completion\n", "\n", "# Function to make both OpenAI and Azure completions\n", "def make_completions():\n", " # Set your OpenAI API key\n", " os.environ[\"OPENAI_API_KEY\"] = \"\"\n", "\n", " # OpenAI completion\n", " openai_response = completion(\n", " model=\"gpt-3.5-turbo\",\n", " messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n", " )\n", "\n", " print(\"OpenAI Response:\", openai_response)\n", "\n", " # Set your Azure OpenAI API key and configuration\n", " os.environ[\"AZURE_API_KEY\"] = \"\"\n", " os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n", " os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n", "\n", " # Azure OpenAI completion\n", " azure_response = completion(\n", " model=\"azure/your-azure-deployment\",\n", " messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n", " )\n", "\n", " print(\"Azure OpenAI Response:\", azure_response)\n", "\n", "# Call the function to make both completions in one thread\n", "make_completions()\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "HTBqwzxpnxab", "outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14" }, "execution_count": 23, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "OpenAI Response: {\n", " \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n", " \"object\": \"chat.completion\",\n", " \"created\": 1694710847,\n", " \"model\": \"gpt-3.5-turbo-0613\",\n", " \"choices\": [\n", " {\n", " \"index\": 0,\n", " \"message\": {\n", " \"role\": \"assistant\",\n", " \"content\": \"Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?\"\n", " },\n", " \"finish_reason\": \"stop\"\n", " }\n", " ],\n", " \"usage\": {\n", " \"prompt_tokens\": 13,\n", " \"completion_tokens\": 29,\n", " \"total_tokens\": 42\n", " }\n", "}\n", "Azure OpenAI Response: {\n", " \"id\": \"chatcmpl-7yjztAQ0gK6IMQt7cvLroMSOoXkeu\",\n", " \"object\": \"chat.completion\",\n", " \"created\": 1694710849,\n", " \"model\": \"gpt-35-turbo\",\n", " \"choices\": [\n", " {\n", " \"index\": 0,\n", " \"finish_reason\": \"stop\",\n", " \"message\": {\n", " \"role\": \"assistant\",\n", " \"content\": \"As an AI language model, I don't have feelings but I'm functioning properly. Thank you for asking! How can I assist you today?\"\n", " }\n", " }\n", " ],\n", " \"usage\": {\n", " \"completion_tokens\": 29,\n", " \"prompt_tokens\": 14,\n", " \"total_tokens\": 43\n", " }\n", "}\n" ] } ] } ] }