diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index a3d1b4815..9a5acc440 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -346,9 +346,9 @@ class LiteLLM_SpendLogs(LiteLLMBase): model: Optional[str] = "" call_type: str spend: Optional[float] = 0.0 - total_tokens: Optional[float] = 0.0 - prompt_tokens: Optional[float] = 0.0 - completion_tokens: Optional[float] = 0.0 + total_tokens: Optional[int] = 0 + prompt_tokens: Optional[int] = 0 + completion_tokens: Optional[int] = 0 startTime: Union[str, datetime, None] endTime: Union[str, datetime, None] user: Optional[str] = "" diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 7cb2714f4..aa950c035 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -11,6 +11,12 @@ model_list: output_cost_per_token: 0.00003 max_tokens: 4096 base_model: gpt-3.5-turbo + - model_name: gpt-4 + litellm_params: + model: azure/chatgpt-v-2 + api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ + api_version: "2023-05-15" + api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault - model_name: gpt-vision litellm_params: model: azure/gpt-4-vision @@ -61,7 +67,7 @@ model_list: litellm_settings: fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}] success_callback: ['langfuse'] - max_budget: 0.025 # global budget for proxy + max_budget: 10 # global budget for proxy budget_duration: 30d # global budget duration, will reset after 30d # cache: True # setting callback class diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma index 2d8b0e662..2eb633209 100644 --- a/litellm/proxy/schema.prisma +++ b/litellm/proxy/schema.prisma @@ -50,9 +50,9 @@ model LiteLLM_SpendLogs { call_type String api_key String @default ("") spend Float @default(0.0) - total_tokens Float @default(0.0) - prompt_tokens Float @default(0.0) - completion_tokens Float @default(0.0) + total_tokens Int @default(0) + prompt_tokens Int @default(0) + completion_tokens Int @default(0) startTime DateTime // Assuming start_time is a DateTime field endTime DateTime // Assuming end_time is a DateTime field model String @default("") diff --git a/schema.prisma b/schema.prisma index 103186aae..0882c650c 100644 --- a/schema.prisma +++ b/schema.prisma @@ -53,9 +53,9 @@ model LiteLLM_SpendLogs { call_type String api_key String @default ("") spend Float @default(0.0) - total_tokens Float @default(0.0) - prompt_tokens Float @default(0.0) - completion_tokens Float @default(0.0) + total_tokens Int @default(0) + prompt_tokens Int @default(0) + completion_tokens Int @default(0) startTime DateTime // Assuming start_time is a DateTime field endTime DateTime // Assuming end_time is a DateTime field model String @default("") diff --git a/tests/test_keys.py b/tests/test_keys.py index 348be63af..a296ef13e 100644 --- a/tests/test_keys.py +++ b/tests/test_keys.py @@ -281,14 +281,20 @@ async def test_key_info_spend_values(): await asyncio.sleep(5) spend_logs = await get_spend_logs(session=session, request_id=response["id"]) print(f"spend_logs: {spend_logs}") - usage = spend_logs[0]["usage"] + completion_tokens = spend_logs[0]["completion_tokens"] + prompt_tokens = spend_logs[0]["prompt_tokens"] + print(f"prompt_tokens: {prompt_tokens}; completion_tokens: {completion_tokens}") + + litellm.set_verbose = True prompt_cost, completion_cost = litellm.cost_per_token( model="gpt-35-turbo", - prompt_tokens=usage["prompt_tokens"], - completion_tokens=usage["completion_tokens"], + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, custom_llm_provider="azure", ) + print("prompt_cost: ", prompt_cost, "completion_cost: ", completion_cost) response_cost = prompt_cost + completion_cost + print(f"response_cost: {response_cost}") await asyncio.sleep(5) # allow db log to be updated key_info = await get_key_info(session=session, get_key=key, call_key=key) print(