mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
(feat) add cost tracking to proxy server
This commit is contained in:
parent
a9f7a80e3d
commit
262f874621
2 changed files with 26 additions and 0 deletions
4
litellm/proxy/cost.log
Normal file
4
litellm/proxy/cost.log
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
2023-10-09 14:46:28 - Model gpt-3.5-turbo-0613 Cost: 6.1e-05
|
||||||
|
2023-10-09 14:46:29 - Model gpt-3.5-turbo Cost: 0.0
|
||||||
|
2023-10-09 14:48:18 - Model gpt-3.5-turbo-0613 Cost: 0.00004700
|
||||||
|
2023-10-09 14:48:18 - Model gpt-3.5-turbo Cost: 0.00000000
|
|
@ -22,6 +22,7 @@ from fastapi import FastAPI, Request
|
||||||
from fastapi.routing import APIRouter
|
from fastapi.routing import APIRouter
|
||||||
from fastapi.responses import StreamingResponse, FileResponse
|
from fastapi.responses import StreamingResponse, FileResponse
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
@ -205,11 +206,32 @@ async def chat_completion(request: Request):
|
||||||
final_prompt_value=os.getenv("MODEL_POST_PROMPT", "")
|
final_prompt_value=os.getenv("MODEL_POST_PROMPT", "")
|
||||||
)
|
)
|
||||||
response = litellm.completion(**data)
|
response = litellm.completion(**data)
|
||||||
|
|
||||||
|
# track cost of this response, using litellm.completion_cost
|
||||||
|
await track_cost(response)
|
||||||
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
|
||||||
return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
return StreamingResponse(data_generator(response), media_type='text/event-stream')
|
||||||
print_verbose(f"response: {response}")
|
print_verbose(f"response: {response}")
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
async def track_cost(response):
|
||||||
|
try:
|
||||||
|
logging.basicConfig(
|
||||||
|
filename='cost.log',
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(message)s',
|
||||||
|
datefmt='%Y-%m-%d %H:%M:%S'
|
||||||
|
)
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
response_cost = litellm.completion_cost(completion_response=response)
|
||||||
|
|
||||||
|
logging.info(f"Model {response.model} Cost: {response_cost:.8f}")
|
||||||
|
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@router.get("/ollama_logs")
|
@router.get("/ollama_logs")
|
||||||
async def retrieve_server_log(request: Request):
|
async def retrieve_server_log(request: Request):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue