diff --git a/cookbook/LiteLLM_Azure_and_OpenAI_example.ipynb b/cookbook/LiteLLM_Azure_and_OpenAI_example.ipynb new file mode 100644 index 000000000..9e5db982b --- /dev/null +++ b/cookbook/LiteLLM_Azure_and_OpenAI_example.ipynb @@ -0,0 +1,423 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# LiteLLM - Azure OpenAI + OpenAI Calls\n", + "This notebook covers the following for Azure OpenAI + OpenAI:\n", + "* Completion - Quick start\n", + "* Completion - Streaming\n", + "* Completion - Azure, OpenAI in separate threads\n", + "* Completion - Stress Test 10 requests in parallel\n", + "* Completion - Azure, OpenAI in the same thread" + ], + "metadata": { + "id": "BmX0b5Ueh91v" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iHq4d0dpfawS" + }, + "outputs": [], + "source": [ + "!pip install litellm" + ] + }, + { + "cell_type": "code", + "source": [ + "import os, litellm" + ], + "metadata": { + "id": "mnveHO5dfcB0" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Completion - Quick start" + ], + "metadata": { + "id": "eo88QUdbiDIE" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "from litellm import completion\n", + "\n", + "# openai configs\n", + "os.environ[\"OPENAI_API_KEY\"] = \"\"\n", + "\n", + "# azure openai configs\n", + "os.environ[\"AZURE_API_KEY\"] = \"\"\n", + "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n", + "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n", + "\n", + "\n", + "# openai call\n", + "response = completion(\n", + " model = \"gpt-3.5-turbo\",\n", + " messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n", + ")\n", + "print(\"Openai Response\\n\")\n", + "print(response)\n", + "\n", + "\n", + "\n", + "# azure call\n", + "response = completion(\n", + " model = \"azure/your-azure-deployment\",\n", + " messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n", + ")\n", + "print(\"Azure Response\\n\")\n", + "print(response)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5OSosWNCfc_2", + "outputId": "c52344b1-2458-4695-a7eb-a9b076893348" + }, + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Openai Response\n", + "\n", + "{\n", + " \"id\": \"chatcmpl-7yjVOEKCPw2KdkfIaM3Ao1tIXp8EM\",\n", + " \"object\": \"chat.completion\",\n", + " \"created\": 1694708958,\n", + " \"model\": \"gpt-3.5-turbo-0613\",\n", + " \"choices\": [\n", + " {\n", + " \"index\": 0,\n", + " \"message\": {\n", + " \"role\": \"assistant\",\n", + " \"content\": \"I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you?\"\n", + " },\n", + " \"finish_reason\": \"stop\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 13,\n", + " \"completion_tokens\": 26,\n", + " \"total_tokens\": 39\n", + " }\n", + "}\n", + "Azure Response\n", + "\n", + "{\n", + " \"id\": \"chatcmpl-7yjVQ6m2R2HRtnKHRRFp6JzL4Fjez\",\n", + " \"object\": \"chat.completion\",\n", + " \"created\": 1694708960,\n", + " \"model\": \"gpt-35-turbo\",\n", + " \"choices\": [\n", + " {\n", + " \"index\": 0,\n", + " \"finish_reason\": \"stop\",\n", + " \"message\": {\n", + " \"role\": \"assistant\",\n", + " \"content\": \"Hello there! As an AI language model, I don't have feelings but I'm functioning well. How can I assist you today?\"\n", + " }\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"completion_tokens\": 27,\n", + " \"prompt_tokens\": 14,\n", + " \"total_tokens\": 41\n", + " }\n", + "}\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Completion - Streaming" + ], + "metadata": { + "id": "dQMkM-diiKdE" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "from litellm import completion\n", + "\n", + "# openai configs\n", + "os.environ[\"OPENAI_API_KEY\"] = \"\"\n", + "\n", + "# azure openai configs\n", + "os.environ[\"AZURE_API_KEY\"] = \"\"\n", + "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n", + "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n", + "\n", + "\n", + "# openai call\n", + "response = completion(\n", + " model = \"gpt-3.5-turbo\",\n", + " messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n", + " stream=True\n", + ")\n", + "print(\"OpenAI Streaming response\")\n", + "for chunk in response:\n", + " print(chunk)\n", + "\n", + "# azure call\n", + "response = completion(\n", + " model = \"azure/your-azure-deployment\",\n", + " messages = [{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n", + " stream=True\n", + ")\n", + "print(\"Azure Streaming response\")\n", + "for chunk in response:\n", + " print(chunk)\n" + ], + "metadata": { + "id": "uVvJDVn4g1i1" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Completion - Azure, OpenAI in separate threads" + ], + "metadata": { + "id": "4xrOPnt-oqwm" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "import threading\n", + "from litellm import completion\n", + "\n", + "# Function to make a completion call\n", + "def make_completion(model, messages):\n", + " response = completion(\n", + " model=model,\n", + " messages=messages\n", + " )\n", + "\n", + " print(f\"Response for {model}: {response}\")\n", + "\n", + "# openai configs\n", + "os.environ[\"OPENAI_API_KEY\"] = \"\"\n", + "\n", + "# azure openai configs\n", + "os.environ[\"AZURE_API_KEY\"] = \"\"\n", + "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n", + "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n", + "\n", + "# Define the messages for the completions\n", + "messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n", + "\n", + "# Create threads for making the completions\n", + "thread1 = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\", messages))\n", + "thread2 = threading.Thread(target=make_completion, args=(\"azure/your-azure-deployment\", messages))\n", + "\n", + "# Start both threads\n", + "thread1.start()\n", + "thread2.start()\n", + "\n", + "# Wait for both threads to finish\n", + "thread1.join()\n", + "thread2.join()\n", + "\n", + "print(\"Both completions are done.\")" + ], + "metadata": { + "id": "V5b5taJPjvC3" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Completion - Stress Test 10 requests in parallel\n", + "\n" + ], + "metadata": { + "id": "lx8DbMBqoAoN" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "import threading\n", + "from litellm import completion\n", + "\n", + "# Function to make a completion call\n", + "def make_completion(model, messages):\n", + " response = completion(\n", + " model=model,\n", + " messages=messages\n", + " )\n", + "\n", + " print(f\"Response for {model}: {response}\")\n", + "\n", + "# Set your API keys\n", + "os.environ[\"OPENAI_API_KEY\"] = \"\"\n", + "os.environ[\"AZURE_API_KEY\"] = \"\"\n", + "os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n", + "os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n", + "\n", + "# Define the messages for the completions\n", + "messages = [{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n", + "\n", + "# Create and start 10 threads for making completions\n", + "threads = []\n", + "for i in range(10):\n", + " thread = threading.Thread(target=make_completion, args=(\"gpt-3.5-turbo\" if i % 2 == 0 else \"azure/your-azure-deployment\", messages))\n", + " threads.append(thread)\n", + " thread.start()\n", + "\n", + "# Wait for all threads to finish\n", + "for thread in threads:\n", + " thread.join()\n", + "\n", + "print(\"All completions are done.\")\n" + ], + "metadata": { + "id": "pHYANOlOkoDh" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Completion - Azure, OpenAI in the same thread" + ], + "metadata": { + "id": "yB2NDOO4oxrp" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "from litellm import completion\n", + "\n", + "# Function to make both OpenAI and Azure completions\n", + "def make_completions():\n", + " # Set your OpenAI API key\n", + " os.environ[\"OPENAI_API_KEY\"] = \"\"\n", + "\n", + " # OpenAI completion\n", + " openai_response = completion(\n", + " model=\"gpt-3.5-turbo\",\n", + " messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n", + " )\n", + "\n", + " print(\"OpenAI Response:\", openai_response)\n", + "\n", + " # Set your Azure OpenAI API key and configuration\n", + " os.environ[\"AZURE_API_KEY\"] = \"\"\n", + " os.environ[\"AZURE_API_BASE\"] = \"https://openai-gpt-4-test-v-1.openai.azure.com/\"\n", + " os.environ[\"AZURE_API_VERSION\"] = \"2023-05-15\"\n", + "\n", + " # Azure OpenAI completion\n", + " azure_response = completion(\n", + " model=\"azure/your-azure-deployment\",\n", + " messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}]\n", + " )\n", + "\n", + " print(\"Azure OpenAI Response:\", azure_response)\n", + "\n", + "# Call the function to make both completions in one thread\n", + "make_completions()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HTBqwzxpnxab", + "outputId": "f3bc0efe-e4d5-44d5-a193-97d178cfbe14" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "OpenAI Response: {\n", + " \"id\": \"chatcmpl-7yjzrDeOeVeSrQ00tApmTxEww3vBS\",\n", + " \"object\": \"chat.completion\",\n", + " \"created\": 1694710847,\n", + " \"model\": \"gpt-3.5-turbo-0613\",\n", + " \"choices\": [\n", + " {\n", + " \"index\": 0,\n", + " \"message\": {\n", + " \"role\": \"assistant\",\n", + " \"content\": \"Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?\"\n", + " },\n", + " \"finish_reason\": \"stop\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 13,\n", + " \"completion_tokens\": 29,\n", + " \"total_tokens\": 42\n", + " }\n", + "}\n", + "Azure OpenAI Response: {\n", + " \"id\": \"chatcmpl-7yjztAQ0gK6IMQt7cvLroMSOoXkeu\",\n", + " \"object\": \"chat.completion\",\n", + " \"created\": 1694710849,\n", + " \"model\": \"gpt-35-turbo\",\n", + " \"choices\": [\n", + " {\n", + " \"index\": 0,\n", + " \"finish_reason\": \"stop\",\n", + " \"message\": {\n", + " \"role\": \"assistant\",\n", + " \"content\": \"As an AI language model, I don't have feelings but I'm functioning properly. Thank you for asking! How can I assist you today?\"\n", + " }\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"completion_tokens\": 29,\n", + " \"prompt_tokens\": 14,\n", + " \"total_tokens\": 43\n", + " }\n", + "}\n" + ] + } + ] + } + ] +} \ No newline at end of file