diff --git a/openai-proxy/.env.template b/openai_proxy/.env.template similarity index 100% rename from openai-proxy/.env.template rename to openai_proxy/.env.template diff --git a/openai-proxy/Dockerfile b/openai_proxy/Dockerfile similarity index 100% rename from openai-proxy/Dockerfile rename to openai_proxy/Dockerfile diff --git a/openai-proxy/README.md b/openai_proxy/README.md similarity index 100% rename from openai-proxy/README.md rename to openai_proxy/README.md diff --git a/openai_proxy/__init__.py b/openai_proxy/__init__.py new file mode 100644 index 000000000..4c57a5065 --- /dev/null +++ b/openai_proxy/__init__.py @@ -0,0 +1,2 @@ +from .main import * +from .utils import * \ No newline at end of file diff --git a/openai_proxy/config b/openai_proxy/config new file mode 100644 index 000000000..e69de29bb diff --git a/openai-proxy/main.py b/openai_proxy/main.py similarity index 63% rename from openai-proxy/main.py rename to openai_proxy/main.py index e801110e5..1cf15bca0 100644 --- a/openai-proxy/main.py +++ b/openai_proxy/main.py @@ -5,7 +5,8 @@ from fastapi.responses import StreamingResponse, FileResponse from fastapi.middleware.cors import CORSMiddleware import json import os -from utils import set_callbacks +from typing import Optional +from openai_proxy.utils import set_callbacks, load_router_config import dotenv dotenv.load_dotenv() # load env variables @@ -20,7 +21,11 @@ app.add_middleware( allow_methods=["*"], allow_headers=["*"], ) +#### GLOBAL VARIABLES #### +llm_router: Optional[litellm.Router] = None + set_callbacks() # sets litellm callbacks for logging if they exist in the environment +llm_router = load_router_config(router=llm_router) #### API ENDPOINTS #### @router.post("/v1/models") @router.get("/models") # if project requires model list @@ -101,6 +106,48 @@ async def chat_completion(request: Request): return {"error": error_msg} # raise HTTPException(status_code=500, detail=error_msg) +@router.post("/router/completions") +async def router_completion(request: Request): + global llm_router + try: + data = await request.json() + if "model_list" in data: + llm_router = litellm.Router(model_list=data["model_list"]) + if llm_router is None: + raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body") + + # openai.ChatCompletion.create replacement + response = await llm_router.acompletion(model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hey, how's it going?"}]) + + if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses + return StreamingResponse(data_generator(response), media_type='text/event-stream') + return response + except Exception as e: + error_traceback = traceback.format_exc() + error_msg = f"{str(e)}\n\n{error_traceback}" + return {"error": error_msg} + +@router.post("/router/embedding") +async def router_embedding(request: Request): + global llm_router + try: + if llm_router is None: + raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .`") + + data = await request.json() + # openai.ChatCompletion.create replacement + response = await llm_router.aembedding(model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hey, how's it going?"}]) + + if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses + return StreamingResponse(data_generator(response), media_type='text/event-stream') + return response + except Exception as e: + error_traceback = traceback.format_exc() + error_msg = f"{str(e)}\n\n{error_traceback}" + return {"error": error_msg} + @router.get("/") async def home(request: Request): return "LiteLLM: RUNNING" diff --git a/openai-proxy/openapi.json b/openai_proxy/openapi.json similarity index 100% rename from openai-proxy/openapi.json rename to openai_proxy/openapi.json diff --git a/openai-proxy/requirements.txt b/openai_proxy/requirements.txt similarity index 100% rename from openai-proxy/requirements.txt rename to openai_proxy/requirements.txt diff --git a/openai-proxy/tests/test_bedrock.py b/openai_proxy/tests/test_bedrock.py similarity index 100% rename from openai-proxy/tests/test_bedrock.py rename to openai_proxy/tests/test_bedrock.py diff --git a/openai-proxy/tests/test_caching.py b/openai_proxy/tests/test_caching.py similarity index 100% rename from openai-proxy/tests/test_caching.py rename to openai_proxy/tests/test_caching.py diff --git a/openai-proxy/tests/test_openai.py b/openai_proxy/tests/test_openai.py similarity index 100% rename from openai-proxy/tests/test_openai.py rename to openai_proxy/tests/test_openai.py diff --git a/openai-proxy/tests/test_openrouter.py b/openai_proxy/tests/test_openrouter.py similarity index 100% rename from openai-proxy/tests/test_openrouter.py rename to openai_proxy/tests/test_openrouter.py diff --git a/openai_proxy/tests/test_router.py b/openai_proxy/tests/test_router.py new file mode 100644 index 000000000..13977145a --- /dev/null +++ b/openai_proxy/tests/test_router.py @@ -0,0 +1,59 @@ +#### What this tests #### +# This tests calling batch_completions by running 100 messages together + +import sys, os +import traceback, asyncio +import pytest +from fastapi.testclient import TestClient +from fastapi import Request +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +from openai_proxy import app + + +def test_router_completion(): + client = TestClient(app) + data = { + "model": "gpt-3.5-turbo", + "messages": [{"role": "user", "content": "Hey, how's it going?"}], + "model_list": [{ # list of model deployments + "model_name": "gpt-3.5-turbo", # openai model name + "litellm_params": { # params for litellm completion/embedding call + "model": "azure/chatgpt-v-2", + "api_key": os.getenv("AZURE_API_KEY"), + "api_version": os.getenv("AZURE_API_VERSION"), + "api_base": os.getenv("AZURE_API_BASE") + }, + "tpm": 240000, + "rpm": 1800 + }, { + "model_name": "gpt-3.5-turbo", # openai model name + "litellm_params": { # params for litellm completion/embedding call + "model": "azure/chatgpt-functioncalling", + "api_key": os.getenv("AZURE_API_KEY"), + "api_version": os.getenv("AZURE_API_VERSION"), + "api_base": os.getenv("AZURE_API_BASE") + }, + "tpm": 240000, + "rpm": 1800 + }, { + "model_name": "gpt-3.5-turbo", # openai model name + "litellm_params": { # params for litellm completion/embedding call + "model": "gpt-3.5-turbo", + "api_key": os.getenv("OPENAI_API_KEY"), + }, + "tpm": 1000000, + "rpm": 9000 + }] + } + + response = client.post("/router/completions", json=data) + print(f"response: {response.text}") + assert response.status_code == 200 + + response_data = response.json() + # Perform assertions on the response data + assert isinstance(response_data['choices'][0]['message']['content'], str) + +test_router_completion() diff --git a/openai-proxy/utils.py b/openai_proxy/utils.py similarity index 54% rename from openai-proxy/utils.py rename to openai_proxy/utils.py index c0f8612f3..3cac94c2c 100644 --- a/openai-proxy/utils.py +++ b/openai_proxy/utils.py @@ -1,5 +1,7 @@ import os, litellm +import yaml import dotenv +from typing import Optional dotenv.load_dotenv() # load env variables def set_callbacks(): @@ -21,5 +23,25 @@ def set_callbacks(): litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD")) +def load_router_config(router: Optional[litellm.Router]): + config = {} + config_file = 'config.yaml' + if os.path.exists(config_file): + with open(config_file, 'r') as file: + config = yaml.safe_load(file) + else: + print(f"Config file '{config_file}' not found.") + + ## MODEL LIST + model_list = config.get('model_list', None) + if model_list: + router = litellm.Router(model_list=model_list) + ## ENVIRONMENT VARIABLES + environment_variables = config.get('environment_variables', None) + if environment_variables: + for key, value in environment_variables.items(): + os.environ[key] = value + + return router diff --git a/router_config_template.yaml b/router_config_template.yaml new file mode 100644 index 000000000..0fb951ba7 --- /dev/null +++ b/router_config_template.yaml @@ -0,0 +1,28 @@ +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: azure/chatgpt-v-2 + api_key: your_azure_api_key + api_version: your_azure_api_version + api_base: your_azure_api_base + tpm: 240000 # REPLACE with your azure deployment tpm + rpm: 1800 # REPLACE with your azure deployment rpm + - model_name: gpt-3.5-turbo + litellm_params: + model: azure/chatgpt-functioncalling + api_key: your_azure_api_key + api_version: your_azure_api_version + api_base: your_azure_api_base + tpm: 240000 + rpm: 1800 + - model_name: gpt-3.5-turbo + litellm_params: + model: gpt-3.5-turbo + api_key: your_openai_api_key + tpm: 1000000 # REPLACE with your openai tpm + rpm: 9000 # REPLACE with your openai rpm + +environment_variables: + REDIS_HOST: your_redis_host + REDIS_PASSWORD: your_redis_password + REDIS_PORT: your_redis_port \ No newline at end of file