diff --git a/cookbook/llm-ab-test-server/main.py b/cookbook/llm-ab-test-server/main.py new file mode 100644 index 000000000..3102bedd2 --- /dev/null +++ b/cookbook/llm-ab-test-server/main.py @@ -0,0 +1,46 @@ +from flask import Flask, request, jsonify, abort, Response +from flask_cors import CORS +from litellm import completion +import os, dotenv +import random +dotenv.load_dotenv() + +# TODO: set your keys in .env or here: +# os.environ["OPENAI_API_KEY"] = "" # set your openai key here or in your .env +# see supported models, keys here: + + +app = Flask(__name__) +CORS(app) + +@app.route('/') +def index(): + return 'received!', 200 + +# Dictionary of LLM functions with their A/B test ratios, should sum to 1 :) +llm_dict = { + "gpt-4": 0.2, + "together_ai/togethercomputer/llama-2-70b-chat": 0.4, + "claude-2": 0.2, + "claude-1.2": 0.2 +} + + +@app.route('/chat/completions', methods=["POST"]) +def api_completion(): + data = request.json + try: + # pass in data to completion function, unpack data + selected_llm = random.choices(list(llm_dict.keys()), weights=list(llm_dict.values()))[0] + data['model'] = selected_llm + response = completion(**data) + except Exception as e: + print(f"got error{e}") + return response, 200 + + +if __name__ == "__main__": + from waitress import serve + print("starting server") + serve(app, host="0.0.0.0", port=5000, threads=500) + diff --git a/cookbook/llm-ab-test-server/readme.md b/cookbook/llm-ab-test-server/readme.md new file mode 100644 index 000000000..2c7afd6bd --- /dev/null +++ b/cookbook/llm-ab-test-server/readme.md @@ -0,0 +1,97 @@ +

+ 🚅 LiteLLM - A/B Testing LLMs in Production +

+

+

Call all LLM APIs using the OpenAI format [Anthropic, Huggingface, Cohere, Azure OpenAI etc.]

+

+ +

+ + PyPI Version + + + Stable Version + + + CircleCI + + Downloads + + + +

+ +

+ 100+ Supported Models | + Docs | + Demo Website +

+ +LiteLLM allows you to call 100+ LLMs using completion +This template server allows you to define LLMs with their A/B test ratios + +```python +llm_dict = { + "gpt-4": 0.2, + "together_ai/togethercomputer/llama-2-70b-chat": 0.4, + "claude-2": 0.2, + "claude-1.2": 0.2 +} +``` + +Litellm simplifies I/O with all models, the server simply makes a `litellm.completion()` call to the selected model + + + +- Translating inputs to the provider's completion and embedding endpoints +- Guarantees [consistent output](https://litellm.readthedocs.io/en/latest/output/), text responses will always be available at `['choices'][0]['message']['content']` +- Exception mapping - common exceptions across providers are mapped to the [OpenAI exception types](https://help.openai.com/en/articles/6897213-openai-library-error-types-guidance) +# Usage + + + Open In Colab + + + +``` +pip install litellm +``` + +```python +from litellm import completion + +## set ENV variables +os.environ["OPENAI_API_KEY"] = "openai key" +os.environ["COHERE_API_KEY"] = "cohere key" +os.environ["ANTHROPIC_API_KEY"] = "anthropic key" + +messages = [{ "content": "Hello, how are you?","role": "user"}] + +# openai call +response = completion(model="gpt-3.5-turbo", messages=messages) + +# cohere call +response = completion(model="command-nightly", messages=messages) + +# anthropic +response = completion(model="claude-2", messages=messages) +``` + +Stable version +``` +pip install litellm==0.1.424 +``` + + + + + + +# support / talk with founders +- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) +- [Community Discord 💭](https://discord.gg/wuPM9dRgDw) +- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬ +- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai + +# why did we build this +- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI, Cohere