(fix) SpendLogs Table

2024-01-26 13:23:51 -08:00 · 2024-01-26 13:23:51 -08:00 · 55b95e87dd
commit 55b95e87dd
parent 12f569ad60
5 changed files with 25 additions and 13 deletions
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -346,9 +346,9 @@ class LiteLLM_SpendLogs(LiteLLMBase):
    model: Optional[str] = ""
    call_type: str
    spend: Optional[float] = 0.0
-    total_tokens: Optional[float] = 0.0
-    prompt_tokens: Optional[float] = 0.0
-    completion_tokens: Optional[float] = 0.0
+    total_tokens: Optional[int] = 0
+    prompt_tokens: Optional[int] = 0
+    completion_tokens: Optional[int] = 0
    startTime: Union[str, datetime, None]
    endTime: Union[str, datetime, None]
    user: Optional[str] = ""
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -11,6 +11,12 @@ model_list:
      output_cost_per_token: 0.00003
      max_tokens: 4096
      base_model: gpt-3.5-turbo
+  - model_name: gpt-4
+    litellm_params:
+      model: azure/chatgpt-v-2
+      api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
+      api_version: "2023-05-15"
+      api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
  - model_name: gpt-vision
    litellm_params:
      model: azure/gpt-4-vision
@ -61,7 +67,7 @@ model_list:
 litellm_settings:
  fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
  success_callback: ['langfuse']
-  max_budget: 0.025       # global budget for proxy 
+  max_budget: 10      # global budget for proxy 
  budget_duration: 30d    # global budget duration, will reset after 30d
  # cache: True     
  # setting callback class
--- a/litellm/proxy/schema.prisma
+++ b/litellm/proxy/schema.prisma
@ -50,9 +50,9 @@ model LiteLLM_SpendLogs {
  call_type           String
  api_key             String  @default ("")
  spend               Float    @default(0.0)
-  total_tokens        Float    @default(0.0)
-  prompt_tokens       Float    @default(0.0)
-  completion_tokens   Float    @default(0.0)
+  total_tokens        Int     @default(0)
+  prompt_tokens       Int     @default(0)
+  completion_tokens   Int     @default(0)
  startTime           DateTime // Assuming start_time is a DateTime field
  endTime             DateTime // Assuming end_time is a DateTime field
  model               String   @default("")
--- a/schema.prisma
+++ b/schema.prisma
@ -53,9 +53,9 @@ model LiteLLM_SpendLogs {
  call_type           String
  api_key             String  @default ("")
  spend               Float    @default(0.0)
-  total_tokens        Float    @default(0.0)
-  prompt_tokens       Float    @default(0.0)
-  completion_tokens   Float    @default(0.0)
+  total_tokens        Int     @default(0)
+  prompt_tokens       Int     @default(0)
+  completion_tokens   Int     @default(0)
  startTime           DateTime // Assuming start_time is a DateTime field
  endTime             DateTime // Assuming end_time is a DateTime field
  model               String   @default("")
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@ -281,14 +281,20 @@ async def test_key_info_spend_values():
        await asyncio.sleep(5)
        spend_logs = await get_spend_logs(session=session, request_id=response["id"])
        print(f"spend_logs: {spend_logs}")
-        usage = spend_logs[0]["usage"]
+        completion_tokens = spend_logs[0]["completion_tokens"]
+        prompt_tokens = spend_logs[0]["prompt_tokens"]
+        print(f"prompt_tokens: {prompt_tokens}; completion_tokens: {completion_tokens}")
+
+        litellm.set_verbose = True
        prompt_cost, completion_cost = litellm.cost_per_token(
            model="gpt-35-turbo",
-            prompt_tokens=usage["prompt_tokens"],
-            completion_tokens=usage["completion_tokens"],
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
            custom_llm_provider="azure",
        )
+        print("prompt_cost: ", prompt_cost, "completion_cost: ", completion_cost)
        response_cost = prompt_cost + completion_cost
+        print(f"response_cost: {response_cost}")
        await asyncio.sleep(5)  # allow db log to be updated
        key_info = await get_key_info(session=session, get_key=key, call_key=key)
        print(