forked from phoenix/litellm-mirror
feat(utils.py): support cost tracking for openai/azure image gen models
This commit is contained in:
parent
1661526d97
commit
ef0171e063
5 changed files with 125 additions and 8 deletions
|
@ -150,6 +150,7 @@ jobs:
|
||||||
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
|
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
|
||||||
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
|
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
|
||||||
-e AWS_REGION_NAME=$AWS_REGION_NAME \
|
-e AWS_REGION_NAME=$AWS_REGION_NAME \
|
||||||
|
-e OPENAI_API_KEY=$OPENAI_API_KEY \
|
||||||
--name my-app \
|
--name my-app \
|
||||||
-v $(pwd)/proxy_server_config.yaml:/app/config.yaml \
|
-v $(pwd)/proxy_server_config.yaml:/app/config.yaml \
|
||||||
my-app:latest \
|
my-app:latest \
|
||||||
|
|
|
@ -1079,7 +1079,7 @@ def get_logging_payload(kwargs, response_obj, start_time, end_time):
|
||||||
metadata = (
|
metadata = (
|
||||||
litellm_params.get("metadata", {}) or {}
|
litellm_params.get("metadata", {}) or {}
|
||||||
) # if litellm_params['metadata'] == None
|
) # if litellm_params['metadata'] == None
|
||||||
call_type = kwargs.get("call_type", "litellm.completion")
|
call_type = kwargs.get("call_type")
|
||||||
cache_hit = kwargs.get("cache_hit", False)
|
cache_hit = kwargs.get("cache_hit", False)
|
||||||
usage = response_obj["usage"]
|
usage = response_obj["usage"]
|
||||||
if type(usage) == litellm.Usage:
|
if type(usage) == litellm.Usage:
|
||||||
|
@ -1118,6 +1118,7 @@ def get_logging_payload(kwargs, response_obj, start_time, end_time):
|
||||||
"completion_tokens": usage.get("completion_tokens", 0),
|
"completion_tokens": usage.get("completion_tokens", 0),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
verbose_proxy_logger.debug(f"SpendTable: created payload - payload: {payload}\n\n")
|
||||||
json_fields = [
|
json_fields = [
|
||||||
field
|
field
|
||||||
for field, field_type in LiteLLM_SpendLogs.__annotations__.items()
|
for field, field_type in LiteLLM_SpendLogs.__annotations__.items()
|
||||||
|
|
|
@ -804,6 +804,7 @@ class Logging:
|
||||||
"stream": self.stream,
|
"stream": self.stream,
|
||||||
"user": user,
|
"user": user,
|
||||||
"call_type": str(self.call_type),
|
"call_type": str(self.call_type),
|
||||||
|
"litellm_call_id": self.litellm_call_id,
|
||||||
**self.optional_params,
|
**self.optional_params,
|
||||||
**additional_params,
|
**additional_params,
|
||||||
}
|
}
|
||||||
|
@ -1056,6 +1057,7 @@ class Logging:
|
||||||
and (
|
and (
|
||||||
isinstance(result, ModelResponse)
|
isinstance(result, ModelResponse)
|
||||||
or isinstance(result, EmbeddingResponse)
|
or isinstance(result, EmbeddingResponse)
|
||||||
|
or isinstance(result, ImageResponse)
|
||||||
)
|
)
|
||||||
and self.stream != True
|
and self.stream != True
|
||||||
): # handle streaming separately
|
): # handle streaming separately
|
||||||
|
@ -1063,11 +1065,24 @@ class Logging:
|
||||||
if self.model_call_details.get("cache_hit", False) == True:
|
if self.model_call_details.get("cache_hit", False) == True:
|
||||||
self.model_call_details["response_cost"] = 0.0
|
self.model_call_details["response_cost"] = 0.0
|
||||||
else:
|
else:
|
||||||
self.model_call_details[
|
result._hidden_params["optional_params"] = self.optional_params
|
||||||
"response_cost"
|
if (
|
||||||
] = litellm.completion_cost(
|
self.call_type == CallTypes.aimage_generation.value
|
||||||
completion_response=result,
|
or self.call_type == CallTypes.image_generation.value
|
||||||
)
|
):
|
||||||
|
self.model_call_details[
|
||||||
|
"response_cost"
|
||||||
|
] = litellm.completion_cost(
|
||||||
|
completion_response=result,
|
||||||
|
model=self.model,
|
||||||
|
call_type=self.call_type,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.model_call_details[
|
||||||
|
"response_cost"
|
||||||
|
] = litellm.completion_cost(
|
||||||
|
completion_response=result, call_type=self.call_type
|
||||||
|
)
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"Model={self.model}; cost={self.model_call_details['response_cost']}"
|
f"Model={self.model}; cost={self.model_call_details['response_cost']}"
|
||||||
)
|
)
|
||||||
|
@ -3174,6 +3189,16 @@ def completion_cost(
|
||||||
messages: List = [],
|
messages: List = [],
|
||||||
completion="",
|
completion="",
|
||||||
total_time=0.0, # used for replicate, sagemaker
|
total_time=0.0, # used for replicate, sagemaker
|
||||||
|
call_type: Literal[
|
||||||
|
"completion",
|
||||||
|
"acompletion",
|
||||||
|
"embedding",
|
||||||
|
"aembedding",
|
||||||
|
"atext_completion",
|
||||||
|
"text_completion",
|
||||||
|
"image_generation",
|
||||||
|
"aimage_generation",
|
||||||
|
] = "completion",
|
||||||
### REGION ###
|
### REGION ###
|
||||||
custom_llm_provider=None,
|
custom_llm_provider=None,
|
||||||
region_name=None, # used for bedrock pricing
|
region_name=None, # used for bedrock pricing
|
||||||
|
@ -3232,6 +3257,19 @@ def completion_cost(
|
||||||
region_name = completion_response._hidden_params.get(
|
region_name = completion_response._hidden_params.get(
|
||||||
"region_name", region_name
|
"region_name", region_name
|
||||||
)
|
)
|
||||||
|
size = completion_response._hidden_params.get(
|
||||||
|
"optional_params", {}
|
||||||
|
).get(
|
||||||
|
"size", "1024-x-1024"
|
||||||
|
) # openai default
|
||||||
|
quality = completion_response._hidden_params.get(
|
||||||
|
"optional_params", {}
|
||||||
|
).get(
|
||||||
|
"quality", "standard"
|
||||||
|
) # openai default
|
||||||
|
n = completion_response._hidden_params.get("optional_params", {}).get(
|
||||||
|
"n", 1
|
||||||
|
) # openai default
|
||||||
else:
|
else:
|
||||||
if len(messages) > 0:
|
if len(messages) > 0:
|
||||||
prompt_tokens = token_counter(model=model, messages=messages)
|
prompt_tokens = token_counter(model=model, messages=messages)
|
||||||
|
@ -3243,7 +3281,10 @@ def completion_cost(
|
||||||
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
f"Model is None and does not exist in passed completion_response. Passed completion_response={completion_response}, model={model}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if size is not None and n is not None:
|
if (
|
||||||
|
call_type == CallTypes.image_generation.value
|
||||||
|
or call_type == CallTypes.aimage_generation.value
|
||||||
|
):
|
||||||
### IMAGE GENERATION COST CALCULATION ###
|
### IMAGE GENERATION COST CALCULATION ###
|
||||||
image_gen_model_name = f"{size}/{model}"
|
image_gen_model_name = f"{size}/{model}"
|
||||||
image_gen_model_name_with_quality = image_gen_model_name
|
image_gen_model_name_with_quality = image_gen_model_name
|
||||||
|
|
|
@ -42,6 +42,9 @@ model_list:
|
||||||
api_version: 2023-06-01-preview
|
api_version: 2023-06-01-preview
|
||||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
||||||
api_key: os.environ/AZURE_API_KEY
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
- model_name: openai-dall-e-3
|
||||||
|
litellm_params:
|
||||||
|
model: dall-e-3
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
drop_params: True
|
drop_params: True
|
||||||
|
|
|
@ -14,7 +14,11 @@ import litellm
|
||||||
|
|
||||||
|
|
||||||
async def generate_key(
|
async def generate_key(
|
||||||
session, i, budget=None, budget_duration=None, models=["azure-models", "gpt-4"]
|
session,
|
||||||
|
i,
|
||||||
|
budget=None,
|
||||||
|
budget_duration=None,
|
||||||
|
models=["azure-models", "gpt-4", "dall-e-3"],
|
||||||
):
|
):
|
||||||
url = "http://0.0.0.0:4000/key/generate"
|
url = "http://0.0.0.0:4000/key/generate"
|
||||||
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
|
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
|
||||||
|
@ -129,6 +133,39 @@ async def chat_completion(session, key, model="gpt-4"):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
async def image_generation(session, key, model="dall-e-3"):
|
||||||
|
url = "http://0.0.0.0:4000/v1/images/generations"
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
data = {
|
||||||
|
"model": model,
|
||||||
|
"prompt": "A cute baby sea otter",
|
||||||
|
}
|
||||||
|
|
||||||
|
for i in range(3):
|
||||||
|
try:
|
||||||
|
async with session.post(url, headers=headers, json=data) as response:
|
||||||
|
status = response.status
|
||||||
|
response_text = await response.text()
|
||||||
|
|
||||||
|
print(response_text)
|
||||||
|
print()
|
||||||
|
|
||||||
|
if status != 200:
|
||||||
|
raise Exception(
|
||||||
|
f"Request did not return a 200 status code: {status}. Response: {response_text}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return await response.json()
|
||||||
|
except Exception as e:
|
||||||
|
if "Request did not return a 200 status code" in str(e):
|
||||||
|
raise e
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
async def chat_completion_streaming(session, key, model="gpt-4"):
|
async def chat_completion_streaming(session, key, model="gpt-4"):
|
||||||
client = AsyncOpenAI(api_key=key, base_url="http://0.0.0.0:4000")
|
client = AsyncOpenAI(api_key=key, base_url="http://0.0.0.0:4000")
|
||||||
messages = [
|
messages = [
|
||||||
|
@ -357,6 +394,40 @@ async def test_key_info_spend_values_streaming():
|
||||||
assert rounded_response_cost == rounded_key_info_spend
|
assert rounded_response_cost == rounded_key_info_spend
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_key_info_spend_values_image_generation():
|
||||||
|
"""
|
||||||
|
Test to ensure spend is correctly calculated
|
||||||
|
- create key
|
||||||
|
- make image gen call
|
||||||
|
- assert cost is expected value
|
||||||
|
"""
|
||||||
|
|
||||||
|
async def retry_request(func, *args, _max_attempts=5, **kwargs):
|
||||||
|
for attempt in range(_max_attempts):
|
||||||
|
try:
|
||||||
|
return await func(*args, **kwargs)
|
||||||
|
except aiohttp.client_exceptions.ClientOSError as e:
|
||||||
|
if attempt + 1 == _max_attempts:
|
||||||
|
raise # re-raise the last ClientOSError if all attempts failed
|
||||||
|
print(f"Attempt {attempt+1} failed, retrying...")
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession(
|
||||||
|
timeout=aiohttp.ClientTimeout(total=600)
|
||||||
|
) as session:
|
||||||
|
## Test Spend Update ##
|
||||||
|
# completion
|
||||||
|
key_gen = await generate_key(session=session, i=0)
|
||||||
|
key = key_gen["key"]
|
||||||
|
response = await image_generation(session=session, key=key)
|
||||||
|
await asyncio.sleep(5)
|
||||||
|
key_info = await retry_request(
|
||||||
|
get_key_info, session=session, get_key=key, call_key=key
|
||||||
|
)
|
||||||
|
spend = key_info["info"]["spend"]
|
||||||
|
assert spend > 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_key_with_budgets():
|
async def test_key_with_budgets():
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue