moving proxy server to the top of repo

2023-08-18 09:53:49 -07:00 · 2023-08-18 09:53:49 -07:00 · 8ef47524bf
commit 8ef47524bf
parent 8543d89418
10 changed files with 0 additions and 0 deletions
--- a/proxy-server/main.py
+++ b/proxy-server/main.py
@ -0,0 +1,86 @@
+from flask import Flask, request, jsonify, abort, Response
+from flask_cors import CORS
+import traceback
+import litellm
+
+from litellm import completion 
+import openai
+from utils import handle_error, get_cache, add_cache
+import os, dotenv
+import logging
+import json
+dotenv.load_dotenv()
+
+# TODO: set your keys in .env or here:
+# os.environ["OPENAI_API_KEY"] = "" # set your openai key here
+# see supported models / keys here: https://litellm.readthedocs.io/en/latest/supported/
+
+######### LOGGING ###################
+# log your data to slack, supabase
+litellm.success_callback=["slack", "supabase"] # set .env SLACK_API_TOKEN, SLACK_API_SECRET, SLACK_API_CHANNEL, SUPABASE 
+
+######### ERROR MONITORING ##########
+# log errors to slack, sentry, supabase
+litellm.failure_callback=["slack", "sentry", "supabase"] # .env SENTRY_API_URL
+
+app = Flask(__name__)
+CORS(app)
+
+@app.route('/')
+def index():
+    return 'received!', 200
+
+def data_generator(response):
+    for chunk in response:
+        yield f"data: {json.dumps(chunk)}\n\n"
+
+@app.route('/chat/completions', methods=["POST"])
+def api_completion():
+    data = request.json
+    if data.get('stream') == "True":
+        data['stream'] = True # convert to boolean
+    try:
+        # pass in data to completion function, unpack data
+        response = completion(**data)
+        if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+            return Response(data_generator(response), mimetype='text/event-stream')
+    except Exception as e:
+        # call handle_error function
+        print(f"got error{e}")
+        return handle_error(data)
+    return response, 200 # non streaming responses
+
+@app.route('/get_models', methods=["POST"])
+def get_models():
+    try:
+        return litellm.model_list
+    except Exception as e:
+        traceback.print_exc()
+        response = {"error": str(e)}
+    return response, 200
+
+if __name__ == "__main__":
+  from waitress import serve
+  serve(app, host="0.0.0.0", port=5000, threads=500)
+
+############### Advanced ##########################
+
+############ Caching ###################################
+# make a new endpoint with caching
+# This Cache is built using ChromaDB
+# it has two functions add_cache() and get_cache()
+@app.route('/chat/completions_with_cache', methods=["POST"])
+def api_completion_with_cache():
+    data = request.json
+    try:
+        cache_response = get_cache(data['messages'])
+        if cache_response!=None:
+            return cache_response
+        # pass in data to completion function, unpack data
+        response = completion(**data) 
+
+        # add to cache 
+    except Exception as e:
+        # call handle_error function
+        return handle_error(data)
+    return response, 200