litellm-mirror/litellm/proxy/utils.py

import litellm
from litellm import ModelResponse
from proxy_server import llm_model_list
from typing import Optional

def track_cost_callback(
    kwargs,                                       # kwargs to completion
    completion_response: ModelResponse,           # response from completion
    start_time = None,
    end_time = None,                              # start/end time for completion
):
    try:
        # init logging config
        print("in custom callback tracking cost", llm_model_list)
        if "azure" in kwargs["model"]:
            # for azure cost tracking, we check the provided model list in the config.yaml
            # we need to map azure/chatgpt-deployment to -> azure/gpt-3.5-turbo
            pass
        # check if it has collected an entire stream response
        if "complete_streaming_response" in kwargs:
            # for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost
            completion_response=kwargs["complete_streaming_response"]
            input_text = kwargs["messages"]
            output_text = completion_response["choices"][0]["message"]["content"]
            response_cost = litellm.completion_cost(
                model = kwargs["model"],
                messages = input_text,
                completion=output_text
            )
            print("streaming response_cost", response_cost)
        # for non streaming responses
        else:
            # we pass the completion_response obj
            if kwargs["stream"] != True:
                input_text = kwargs.get("messages", "")
                if isinstance(input_text, list):
                    input_text = "".join(m["content"] for m in input_text)
                response_cost = litellm.completion_cost(completion_response=completion_response, completion=input_text)
                print("regular response_cost", response_cost)
    except:
        pass

def update_prisma_database(token, response_cost):
    try:
        # Import your Prisma client
        from your_prisma_module import prisma

        # Fetch the existing cost for the given token
        existing_cost = prisma.LiteLLM_VerificationToken.find_unique(
            where={
                "token": token
            }
        ).cost

        # Calculate the new cost by adding the existing cost and response_cost
        new_cost = existing_cost + response_cost

        # Update the cost column for the given token
        prisma_liteLLM_VerificationToken = prisma.LiteLLM_VerificationToken.update(
            where={
                "token": token
            },
            data={
                "cost": new_cost
            }
        )
        print(f"Prisma database updated for token {token}. New cost: {new_cost}")

    except Exception as e:
        print(f"Error updating Prisma database: {e}")
        pass