From ca0d8139eced5329fe1476371cd80f0c093a9a29 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Fri, 15 Dec 2023 07:29:35 +0530 Subject: [PATCH] (docs) proxy - deploy on GCP cloud run --- docs/my-website/docs/proxy/deploy.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/docs/my-website/docs/proxy/deploy.md b/docs/my-website/docs/proxy/deploy.md index 5975bed0b..69e07df53 100644 --- a/docs/my-website/docs/proxy/deploy.md +++ b/docs/my-website/docs/proxy/deploy.md @@ -1,3 +1,6 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # 🐳 Docker, Deploying LiteLLM Proxy ## Dockerfile @@ -82,6 +85,26 @@ Your LiteLLM container should be running now on the defined port e.g. `8000`. +## Deploy on Google Cloud Run +**Click the button** to deploy to Google Cloud Run + +[![Deploy](https://deploy.cloud.run/button.svg)](https://l.linklyhq.com/l/1uHtX) + +#### Testing your deployed proxy +**Assuming the required keys are set as Environment Variables** + +https://litellm-7yjrj3ha2q-uc.a.run.app is our example proxy, substitute it with your deployed cloud run app + +```shell +curl https://litellm-7yjrj3ha2q-uc.a.run.app/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-3.5-turbo", + "messages": [{"role": "user", "content": "Say this is a test!"}], + "temperature": 0.7 + }' +``` + ## LiteLLM Proxy Performance LiteLLM proxy has been load tested to handle 1500 req/s.