diff --git a/litellm/proxy/cost.log b/litellm/proxy/cost.log new file mode 100644 index 0000000000..566d7dbbb1 --- /dev/null +++ b/litellm/proxy/cost.log @@ -0,0 +1,4 @@ +2023-10-09 14:46:28 - Model gpt-3.5-turbo-0613 Cost: 6.1e-05 +2023-10-09 14:46:29 - Model gpt-3.5-turbo Cost: 0.0 +2023-10-09 14:48:18 - Model gpt-3.5-turbo-0613 Cost: 0.00004700 +2023-10-09 14:48:18 - Model gpt-3.5-turbo Cost: 0.00000000 diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index e0a8ae68fe..ca1ee6f237 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -22,6 +22,7 @@ from fastapi import FastAPI, Request from fastapi.routing import APIRouter from fastapi.responses import StreamingResponse, FileResponse import json +import logging app = FastAPI() router = APIRouter() @@ -205,11 +206,32 @@ async def chat_completion(request: Request): final_prompt_value=os.getenv("MODEL_POST_PROMPT", "") ) response = litellm.completion(**data) + + # track cost of this response, using litellm.completion_cost + await track_cost(response) if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses return StreamingResponse(data_generator(response), media_type='text/event-stream') print_verbose(f"response: {response}") return response +async def track_cost(response): + try: + logging.basicConfig( + filename='cost.log', + level=logging.INFO, + format='%(asctime)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + import datetime + + response_cost = litellm.completion_cost(completion_response=response) + + logging.info(f"Model {response.model} Cost: {response_cost:.8f}") + + except: + pass + + @router.get("/ollama_logs") async def retrieve_server_log(request: Request):