From c5ee6fe4b9579c4668f1c4a81ad49fcc861c34fa Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 27 Jul 2024 16:09:37 -0700 Subject: [PATCH] build: cookbook on migrating to litellm proxy from openai/azure sdk --- ...teLLM_Proxy_from_OpenAI_Azure_OpenAI.ipynb | 565 ++++++++++++++++++ 1 file changed, 565 insertions(+) create mode 100644 cookbook/Migrating_to_LiteLLM_Proxy_from_OpenAI_Azure_OpenAI.ipynb diff --git a/cookbook/Migrating_to_LiteLLM_Proxy_from_OpenAI_Azure_OpenAI.ipynb b/cookbook/Migrating_to_LiteLLM_Proxy_from_OpenAI_Azure_OpenAI.ipynb new file mode 100644 index 000000000..39677ed2a --- /dev/null +++ b/cookbook/Migrating_to_LiteLLM_Proxy_from_OpenAI_Azure_OpenAI.ipynb @@ -0,0 +1,565 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Migrating to LiteLLM Proxy from OpenAI/Azure OpenAI\n", + "\n", + "Covers:\n", + "\n", + "* /chat/completion\n", + "* /embedding\n", + "\n", + "\n", + "These are **selected examples**. LiteLLM Proxy is **OpenAI-Compatible**, it works with any project that calls OpenAI. Just change the `base_url`, `api_key` and `model`.\n", + "\n", + "For more examples, [go here](https://docs.litellm.ai/docs/proxy/user_keys)\n", + "\n", + "To pass provider-specific args, [go here](https://docs.litellm.ai/docs/completion/provider_specific_params#proxy-usage)\n", + "\n", + "To drop unsupported params (E.g. frequency_penalty for bedrock with librechat), [go here](https://docs.litellm.ai/docs/completion/drop_params#openai-proxy-usage)\n" + ], + "metadata": { + "id": "kccfk0mHZ4Ad" + } + }, + { + "cell_type": "markdown", + "source": [ + "## /chat/completion\n", + "\n" + ], + "metadata": { + "id": "nmSClzCPaGH6" + } + }, + { + "cell_type": "markdown", + "source": [ + "### OpenAI Python SDK" + ], + "metadata": { + "id": "_vqcjwOVaKpO" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "x1e_Ok3KZzeP" + }, + "outputs": [], + "source": [ + "import openai\n", + "client = openai.OpenAI(\n", + " api_key=\"anything\",\n", + " base_url=\"http://0.0.0.0:4000\"\n", + ")\n", + "\n", + "# request sent to model set on litellm proxy, `litellm --model`\n", + "response = client.chat.completions.create(\n", + " model=\"gpt-3.5-turbo\",\n", + " messages = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"this is a test request, write a short poem\"\n", + " }\n", + " ],\n", + " extra_body={ # pass in any provider-specific param, if not supported by openai, https://docs.litellm.ai/docs/completion/input#provider-specific-params\n", + " \"metadata\": { # 👈 use for logging additional params (e.g. to langfuse)\n", + " \"generation_name\": \"ishaan-generation-openai-client\",\n", + " \"generation_id\": \"openai-client-gen-id22\",\n", + " \"trace_id\": \"openai-client-trace-id22\",\n", + " \"trace_user_id\": \"openai-client-user-id2\"\n", + " }\n", + " }\n", + ")\n", + "\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Function Calling" + ], + "metadata": { + "id": "AqkyKk9Scxgj" + } + }, + { + "cell_type": "code", + "source": [ + "from openai import OpenAI\n", + "client = OpenAI(\n", + " api_key=\"sk-1234\", # [OPTIONAL] set if you set one on proxy, else set \"\"\n", + " base_url=\"http://0.0.0.0:4000\",\n", + ")\n", + "\n", + "tools = [\n", + " {\n", + " \"type\": \"function\",\n", + " \"function\": {\n", + " \"name\": \"get_current_weather\",\n", + " \"description\": \"Get the current weather in a given location\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"location\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The city and state, e.g. San Francisco, CA\",\n", + " },\n", + " \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n", + " },\n", + " \"required\": [\"location\"],\n", + " },\n", + " }\n", + " }\n", + "]\n", + "messages = [{\"role\": \"user\", \"content\": \"What's the weather like in Boston today?\"}]\n", + "completion = client.chat.completions.create(\n", + " model=\"gpt-4o\", # use 'model_name' from config.yaml\n", + " messages=messages,\n", + " tools=tools,\n", + " tool_choice=\"auto\"\n", + ")\n", + "\n", + "print(completion)\n" + ], + "metadata": { + "id": "wDg10VqLczE1" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Azure OpenAI Python SDK" + ], + "metadata": { + "id": "YYoxLloSaNWW" + } + }, + { + "cell_type": "code", + "source": [ + "import openai\n", + "client = openai.AzureOpenAI(\n", + " api_key=\"anything\",\n", + " base_url=\"http://0.0.0.0:4000\"\n", + ")\n", + "\n", + "# request sent to model set on litellm proxy, `litellm --model`\n", + "response = client.chat.completions.create(\n", + " model=\"gpt-3.5-turbo\",\n", + " messages = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"this is a test request, write a short poem\"\n", + " }\n", + " ],\n", + " extra_body={ # pass in any provider-specific param, if not supported by openai, https://docs.litellm.ai/docs/completion/input#provider-specific-params\n", + " \"metadata\": { # 👈 use for logging additional params (e.g. to langfuse)\n", + " \"generation_name\": \"ishaan-generation-openai-client\",\n", + " \"generation_id\": \"openai-client-gen-id22\",\n", + " \"trace_id\": \"openai-client-trace-id22\",\n", + " \"trace_user_id\": \"openai-client-user-id2\"\n", + " }\n", + " }\n", + ")\n", + "\n", + "print(response)" + ], + "metadata": { + "id": "yA1XcgowaSRy" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Langchain Python" + ], + "metadata": { + "id": "yl9qhDvnaTpL" + } + }, + { + "cell_type": "code", + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.prompts.chat import (\n", + " ChatPromptTemplate,\n", + " HumanMessagePromptTemplate,\n", + " SystemMessagePromptTemplate,\n", + ")\n", + "from langchain.schema import HumanMessage, SystemMessage\n", + "import os\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"anything\"\n", + "\n", + "chat = ChatOpenAI(\n", + " openai_api_base=\"http://0.0.0.0:4000\",\n", + " model = \"gpt-3.5-turbo\",\n", + " temperature=0.1,\n", + " extra_body={\n", + " \"metadata\": {\n", + " \"generation_name\": \"ishaan-generation-langchain-client\",\n", + " \"generation_id\": \"langchain-client-gen-id22\",\n", + " \"trace_id\": \"langchain-client-trace-id22\",\n", + " \"trace_user_id\": \"langchain-client-user-id2\"\n", + " }\n", + " }\n", + ")\n", + "\n", + "messages = [\n", + " SystemMessage(\n", + " content=\"You are a helpful assistant that im using to make a test request to.\"\n", + " ),\n", + " HumanMessage(\n", + " content=\"test from litellm. tell me why it's amazing in 1 sentence\"\n", + " ),\n", + "]\n", + "response = chat(messages)\n", + "\n", + "print(response)" + ], + "metadata": { + "id": "5MUZgSquaW5t" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Curl" + ], + "metadata": { + "id": "B9eMgnULbRaz" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "\n", + "```\n", + "curl -X POST 'http://0.0.0.0:4000/chat/completions' \\\n", + " -H 'Content-Type: application/json' \\\n", + " -d '{\n", + " \"model\": \"gpt-3.5-turbo\",\n", + " \"messages\": [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"what llm are you\"\n", + " }\n", + " ],\n", + " \"metadata\": {\n", + " \"generation_name\": \"ishaan-test-generation\",\n", + " \"generation_id\": \"gen-id22\",\n", + " \"trace_id\": \"trace-id22\",\n", + " \"trace_user_id\": \"user-id2\"\n", + " }\n", + "}'\n", + "```\n", + "\n" + ], + "metadata": { + "id": "VWCCk5PFcmhS" + } + }, + { + "cell_type": "markdown", + "source": [ + "### LlamaIndex" + ], + "metadata": { + "id": "drBAm2e1b6xe" + } + }, + { + "cell_type": "code", + "source": [ + "import os, dotenv\n", + "\n", + "from llama_index.llms import AzureOpenAI\n", + "from llama_index.embeddings import AzureOpenAIEmbedding\n", + "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n", + "\n", + "llm = AzureOpenAI(\n", + " engine=\"azure-gpt-3.5\", # model_name on litellm proxy\n", + " temperature=0.0,\n", + " azure_endpoint=\"http://0.0.0.0:4000\", # litellm proxy endpoint\n", + " api_key=\"sk-1234\", # litellm proxy API Key\n", + " api_version=\"2023-07-01-preview\",\n", + ")\n", + "\n", + "embed_model = AzureOpenAIEmbedding(\n", + " deployment_name=\"azure-embedding-model\",\n", + " azure_endpoint=\"http://0.0.0.0:4000\",\n", + " api_key=\"sk-1234\",\n", + " api_version=\"2023-07-01-preview\",\n", + ")\n", + "\n", + "\n", + "documents = SimpleDirectoryReader(\"llama_index_data\").load_data()\n", + "service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)\n", + "index = VectorStoreIndex.from_documents(documents, service_context=service_context)\n", + "\n", + "query_engine = index.as_query_engine()\n", + "response = query_engine.query(\"What did the author do growing up?\")\n", + "print(response)\n" + ], + "metadata": { + "id": "d0bZcv8fb9mL" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Langchain JS" + ], + "metadata": { + "id": "xypvNdHnb-Yy" + } + }, + { + "cell_type": "code", + "source": [ + "import { ChatOpenAI } from \"@langchain/openai\";\n", + "\n", + "\n", + "const model = new ChatOpenAI({\n", + " modelName: \"gpt-4\",\n", + " openAIApiKey: \"sk-1234\",\n", + " modelKwargs: {\"metadata\": \"hello world\"} // 👈 PASS Additional params here\n", + "}, {\n", + " basePath: \"http://0.0.0.0:4000\",\n", + "});\n", + "\n", + "const message = await model.invoke(\"Hi there!\");\n", + "\n", + "console.log(message);\n" + ], + "metadata": { + "id": "R55mK2vCcBN2" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### OpenAI JS" + ], + "metadata": { + "id": "nC4bLifCcCiW" + } + }, + { + "cell_type": "code", + "source": [ + "const { OpenAI } = require('openai');\n", + "\n", + "const openai = new OpenAI({\n", + " apiKey: \"sk-1234\", // This is the default and can be omitted\n", + " baseURL: \"http://0.0.0.0:4000\"\n", + "});\n", + "\n", + "async function main() {\n", + " const chatCompletion = await openai.chat.completions.create({\n", + " messages: [{ role: 'user', content: 'Say this is a test' }],\n", + " model: 'gpt-3.5-turbo',\n", + " }, {\"metadata\": {\n", + " \"generation_name\": \"ishaan-generation-openaijs-client\",\n", + " \"generation_id\": \"openaijs-client-gen-id22\",\n", + " \"trace_id\": \"openaijs-client-trace-id22\",\n", + " \"trace_user_id\": \"openaijs-client-user-id2\"\n", + " }});\n", + "}\n", + "\n", + "main();\n" + ], + "metadata": { + "id": "MICH8kIMcFpg" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Anthropic SDK" + ], + "metadata": { + "id": "D1Q07pEAcGTb" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "\n", + "from anthropic import Anthropic\n", + "\n", + "client = Anthropic(\n", + " base_url=\"http://localhost:4000\", # proxy endpoint\n", + " api_key=\"sk-s4xN1IiLTCytwtZFJaYQrA\", # litellm proxy virtual key\n", + ")\n", + "\n", + "message = client.messages.create(\n", + " max_tokens=1024,\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"Hello, Claude\",\n", + " }\n", + " ],\n", + " model=\"claude-3-opus-20240229\",\n", + ")\n", + "print(message.content)" + ], + "metadata": { + "id": "qBjFcAvgcI3t" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## /embeddings" + ], + "metadata": { + "id": "dFAR4AJGcONI" + } + }, + { + "cell_type": "markdown", + "source": [ + "### OpenAI Python SDK" + ], + "metadata": { + "id": "lgNoM281cRzR" + } + }, + { + "cell_type": "code", + "source": [ + "import openai\n", + "from openai import OpenAI\n", + "\n", + "# set base_url to your proxy server\n", + "# set api_key to send to proxy server\n", + "client = OpenAI(api_key=\"\", base_url=\"http://0.0.0.0:4000\")\n", + "\n", + "response = client.embeddings.create(\n", + " input=[\"hello from litellm\"],\n", + " model=\"text-embedding-ada-002\"\n", + ")\n", + "\n", + "print(response)\n" + ], + "metadata": { + "id": "NY3DJhPfcQhA" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Langchain Embeddings" + ], + "metadata": { + "id": "hmbg-DW6cUZs" + } + }, + { + "cell_type": "code", + "source": [ + "from langchain.embeddings import OpenAIEmbeddings\n", + "\n", + "embeddings = OpenAIEmbeddings(model=\"sagemaker-embeddings\", openai_api_base=\"http://0.0.0.0:4000\", openai_api_key=\"temp-key\")\n", + "\n", + "\n", + "text = \"This is a test document.\"\n", + "\n", + "query_result = embeddings.embed_query(text)\n", + "\n", + "print(f\"SAGEMAKER EMBEDDINGS\")\n", + "print(query_result[:5])\n", + "\n", + "embeddings = OpenAIEmbeddings(model=\"bedrock-embeddings\", openai_api_base=\"http://0.0.0.0:4000\", openai_api_key=\"temp-key\")\n", + "\n", + "text = \"This is a test document.\"\n", + "\n", + "query_result = embeddings.embed_query(text)\n", + "\n", + "print(f\"BEDROCK EMBEDDINGS\")\n", + "print(query_result[:5])\n", + "\n", + "embeddings = OpenAIEmbeddings(model=\"bedrock-titan-embeddings\", openai_api_base=\"http://0.0.0.0:4000\", openai_api_key=\"temp-key\")\n", + "\n", + "text = \"This is a test document.\"\n", + "\n", + "query_result = embeddings.embed_query(text)\n", + "\n", + "print(f\"TITAN EMBEDDINGS\")\n", + "print(query_result[:5])" + ], + "metadata": { + "id": "lX2S8Nl1cWVP" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Curl Request" + ], + "metadata": { + "id": "oqGbWBCQcYfd" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "\n", + "```curl\n", + "curl -X POST 'http://0.0.0.0:4000/embeddings' \\\n", + " -H 'Content-Type: application/json' \\\n", + " -d ' {\n", + " \"model\": \"text-embedding-ada-002\",\n", + " \"input\": [\"write a litellm poem\"]\n", + " }'\n", + "```\n", + "\n" + ], + "metadata": { + "id": "7rkIMV9LcdwQ" + } + } + ] +} \ No newline at end of file