mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
45 lines
1.1 KiB
Python
45 lines
1.1 KiB
Python
from flask import Flask, request, jsonify, abort, Response
|
|
from flask_cors import CORS
|
|
from litellm import completion
|
|
import os, dotenv
|
|
import random
|
|
dotenv.load_dotenv()
|
|
|
|
# TODO: set your keys in .env or here:
|
|
# os.environ["OPENAI_API_KEY"] = "" # set your openai key here or in your .env
|
|
# see supported models, keys here:
|
|
|
|
|
|
app = Flask(__name__)
|
|
CORS(app)
|
|
|
|
@app.route('/')
|
|
def index():
|
|
return 'received!', 200
|
|
|
|
# Dictionary of LLM functions with their A/B test ratios, should sum to 1 :)
|
|
llm_dict = {
|
|
"gpt-4": 0.2,
|
|
"together_ai/togethercomputer/llama-2-70b-chat": 0.4,
|
|
"claude-2": 0.2,
|
|
"claude-1.2": 0.2
|
|
}
|
|
|
|
|
|
@app.route('/chat/completions', methods=["POST"])
|
|
def api_completion():
|
|
data = request.json
|
|
try:
|
|
# pass in data to completion function, unpack data
|
|
selected_llm = random.choices(list(llm_dict.keys()), weights=list(llm_dict.values()))[0]
|
|
response = completion(**data, model=selected_llm)
|
|
except Exception as e:
|
|
print(f"got error{e}")
|
|
return response, 200
|
|
|
|
|
|
if __name__ == "__main__":
|
|
from waitress import serve
|
|
print("starting server")
|
|
serve(app, host="0.0.0.0", port=5000, threads=500)
|
|
|