From 87d0b72a4a7533fd03c46b4c04cf81e5e7dc6a57 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 7 Nov 2023 09:48:58 -0800 Subject: [PATCH] (docs) proxy --- docs/my-website/docs/simple_proxy.md | 171 +-------------------------- 1 file changed, 3 insertions(+), 168 deletions(-) diff --git a/docs/my-website/docs/simple_proxy.md b/docs/my-website/docs/simple_proxy.md index e4577e4ad..71701bba1 100644 --- a/docs/my-website/docs/simple_proxy.md +++ b/docs/my-website/docs/simple_proxy.md @@ -4,20 +4,11 @@ import TabItem from '@theme/TabItem'; # 💥 Evaluate LLMs - OpenAI Proxy Server -A simple, fast, and lightweight **OpenAI-compatible server** to call 100+ LLM APIs. - LiteLLM Server supports: -* Call [Huggingface/Bedrock/TogetherAI/etc.](#other-supported-models) in the OpenAI ChatCompletions format -* Set custom prompt templates + model-specific configs (temperature, max_tokens, etc.) -* Caching (In-memory + Redis) - -[**See Code**](https://github.com/BerriAI/litellm/tree/main/litellm_server) - -:::info -We want to learn how we can make the server better! Meet the [founders](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) or -join our [discord](https://discord.gg/wuPM9dRgDw) -::: +* Call Call 100+ LLMs [Huggingface/Bedrock/TogetherAI/etc.](#other-supported-models) in the OpenAI `ChatCompletions` & `Completions` format +* Set custom prompt templates + model-specific configs (`temperature`, `max_tokens`, etc.) +* Caching Responses ## Quick Start @@ -347,162 +338,6 @@ $ cd ./litellm/litellm_server $ uvicorn main:app --host 0.0.0.0 --port 8000 ``` -## Setting LLM API keys -This server allows two ways of passing API keys to litellm -- Environment Variables - This server by default assumes the LLM API Keys are stored in the environment variables -- Dynamic Variables passed to `/chat/completions` - - Set `AUTH_STRATEGY=DYNAMIC` in the Environment - - Pass required auth params `api_key`,`api_base`, `api_version` with the request params - - - - - -#### Deploy on Google Cloud Run -**Click the button** to deploy to Google Cloud Run - -[![Deploy](https://deploy.cloud.run/button.svg)](https://l.linklyhq.com/l/1uHtX) - -On a successfull deploy your Cloud Run Shell will have this output - - -### Testing your deployed server -**Assuming the required keys are set as Environment Variables** - -https://litellm-7yjrj3ha2q-uc.a.run.app is our example server, substitute it with your deployed cloud run app - - - - -```shell -curl https://litellm-7yjrj3ha2q-uc.a.run.app/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "gpt-3.5-turbo", - "messages": [{"role": "user", "content": "Say this is a test!"}], - "temperature": 0.7 - }' -``` - - - - -```shell -curl https://litellm-7yjrj3ha2q-uc.a.run.app/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "azure/", - "messages": [{"role": "user", "content": "Say this is a test!"}], - "temperature": 0.7 - }' -``` - - - - - -```shell -curl https://litellm-7yjrj3ha2q-uc.a.run.app/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "claude-2", - "messages": [{"role": "user", "content": "Say this is a test!"}], - "temperature": 0.7, - }' -``` - - - - -### Set LLM API Keys -#### Environment Variables -More info [here](https://cloud.google.com/run/docs/configuring/services/environment-variables#console) - -1. In the Google Cloud console, go to Cloud Run: [Go to Cloud Run](https://console.cloud.google.com/run) - -2. Click on the **litellm** service - - -3. Click **Edit and Deploy New Revision** - - -4. Enter your Environment Variables -Example `OPENAI_API_KEY`, `ANTHROPIC_API_KEY` - - - - - -#### Deploy on Render -**Click the button** to deploy to Render - -[![Deploy](https://render.com/images/deploy-to-render-button.svg)](https://l.linklyhq.com/l/1uHsr) - -On a successfull deploy https://dashboard.render.com/ should display the following - - - - - - -#### Deploy on AWS Apprunner -1. Fork LiteLLM https://github.com/BerriAI/litellm -2. Navigate to to App Runner on AWS Console: https://console.aws.amazon.com/apprunner/home#/services -3. Follow the steps in the video below - - -4. Testing your deployed endpoint - - **Assuming the required keys are set as Environment Variables** Example: `OPENAI_API_KEY` - - https://b2w6emmkzp.us-east-1.awsapprunner.com is our example server, substitute it with your deployed apprunner endpoint - - - - - ```shell - curl https://b2w6emmkzp.us-east-1.awsapprunner.com/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "gpt-3.5-turbo", - "messages": [{"role": "user", "content": "Say this is a test!"}], - "temperature": 0.7 - }' - ``` - - - - - ```shell - curl https://b2w6emmkzp.us-east-1.awsapprunner.com/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "azure/", - "messages": [{"role": "user", "content": "Say this is a test!"}], - "temperature": 0.7 - }' - ``` - - - - - - ```shell - curl https://b2w6emmkzp.us-east-1.awsapprunner.com/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "claude-2", - "messages": [{"role": "user", "content": "Say this is a test!"}], - "temperature": 0.7, - }' - ``` - - - - - - - ## Advanced ### Caching - Completion() and Embedding() Responses