(feat) add cost tracking to proxy server

This commit is contained in:
ishaan-jaff 2023-10-09 14:51:37 -07:00
parent a9f7a80e3d
commit 262f874621
2 changed files with 26 additions and 0 deletions

View file

@ -22,6 +22,7 @@ from fastapi import FastAPI, Request
from fastapi.routing import APIRouter
from fastapi.responses import StreamingResponse, FileResponse
import json
import logging
app = FastAPI()
router = APIRouter()
@ -205,11 +206,32 @@ async def chat_completion(request: Request):
final_prompt_value=os.getenv("MODEL_POST_PROMPT", "")
)
response = litellm.completion(**data)
# track cost of this response, using litellm.completion_cost
await track_cost(response)
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
return StreamingResponse(data_generator(response), media_type='text/event-stream')
print_verbose(f"response: {response}")
return response
async def track_cost(response):
try:
logging.basicConfig(
filename='cost.log',
level=logging.INFO,
format='%(asctime)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
import datetime
response_cost = litellm.completion_cost(completion_response=response)
logging.info(f"Model {response.model} Cost: {response_cost:.8f}")
except:
pass
@router.get("/ollama_logs")
async def retrieve_server_log(request: Request):