Merge pull request #1690 from BerriAI/litellm_custom_pricing_fix

fix(main.py): register both model name and model name with provider
This commit is contained in:
Krish Dholakia 2024-01-30 13:56:38 -08:00 committed by GitHub
commit 8ef7e9ad20
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 28 additions and 30 deletions

View file

@ -592,26 +592,37 @@ def completion(
### REGISTER CUSTOM MODEL PRICING -- IF GIVEN ###
if input_cost_per_token is not None and output_cost_per_token is not None:
print_verbose(f"Registering model={model} in model cost map")
litellm.register_model(
{
f"{custom_llm_provider}/{model}": {
"input_cost_per_token": input_cost_per_token,
"output_cost_per_token": output_cost_per_token,
"litellm_provider": custom_llm_provider,
},
model: {
"input_cost_per_token": input_cost_per_token,
"output_cost_per_token": output_cost_per_token,
"litellm_provider": custom_llm_provider,
}
},
}
)
if (
elif (
input_cost_per_second is not None
): # time based pricing just needs cost in place
output_cost_per_second = output_cost_per_second or 0.0
litellm.register_model(
{
f"{custom_llm_provider}/{model}": {
"input_cost_per_second": input_cost_per_second,
"output_cost_per_second": output_cost_per_second,
"litellm_provider": custom_llm_provider,
},
model: {
"input_cost_per_second": input_cost_per_second,
"output_cost_per_second": output_cost_per_second,
"litellm_provider": custom_llm_provider,
}
},
}
)
### BUILD CUSTOM PROMPT TEMPLATE -- IF GIVEN ###
@ -3270,6 +3281,7 @@ async def ahealth_check(
## Set verbose to true -> ```litellm.set_verbose = True```
def print_verbose(print_statement):
try:
verbose_logger.debug(print_statement)
if litellm.set_verbose:
print(print_statement) # noqa
except:

View file

@ -17,7 +17,12 @@ class MaxParallelRequestsHandler(CustomLogger):
pass
def print_verbose(self, print_statement):
verbose_proxy_logger.debug(print_statement)
try:
verbose_proxy_logger.debug(print_statement)
if litellm.set_verbose:
print(print_statement) # noqa
except:
pass
async def async_pre_call_hook(
self,

View file

@ -21,6 +21,7 @@ from datetime import datetime, timedelta
def print_verbose(print_statement):
verbose_proxy_logger.debug(print_statement)
if litellm.set_verbose:
print(f"LiteLLM Proxy: {print_statement}") # noqa
@ -96,6 +97,7 @@ class ProxyLogging:
2. /embeddings
3. /image/generation
"""
print_verbose(f"Inside Proxy Logging Pre-call hook!")
### ALERTING ###
asyncio.create_task(self.response_taking_too_long(request_data=data))
@ -1035,7 +1037,7 @@ async def send_email(sender_name, sender_email, receiver_email, subject, html):
print_verbose(f"SMTP Connection Init")
# Establish a secure connection with the SMTP server
with smtplib.SMTP(smtp_host, smtp_port) as server:
if os.getenv("SMTP_TLS", 'True') != "False":
if os.getenv("SMTP_TLS", "True") != "False":
server.starttls()
# Login to your email account

View file

@ -456,6 +456,7 @@ async def test_streaming_router_call():
@pytest.mark.asyncio
async def test_streaming_router_tpm_limit():
litellm.set_verbose = True
model_list = [
{
"model_name": "azure-model",
@ -520,7 +521,7 @@ async def test_streaming_router_tpm_limit():
)
async for chunk in response:
continue
await asyncio.sleep(1) # success is done in a separate thread
await asyncio.sleep(5) # success is done in a separate thread
try:
await parallel_request_handler.async_pre_call_hook(

View file

@ -2929,32 +2929,10 @@ def cost_per_token(
model_with_provider_and_region in model_cost_ref
): # use region based pricing, if it's available
model_with_provider = model_with_provider_and_region
if model_with_provider in model_cost_ref:
model = model_with_provider
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
print_verbose(f"Looking up model={model} in model_cost_map")
if model_with_provider in model_cost_ref:
print_verbose(
f"Success: model={model_with_provider} in model_cost_map - {model_cost_ref[model_with_provider]}"
)
print_verbose(
f"applying cost={model_cost_ref[model_with_provider].get('input_cost_per_token', None)} for prompt_tokens={prompt_tokens}"
)
prompt_tokens_cost_usd_dollar = (
model_cost_ref[model_with_provider]["input_cost_per_token"] * prompt_tokens
)
print_verbose(
f"calculated prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}"
)
print_verbose(
f"applying cost={model_cost_ref[model_with_provider].get('output_cost_per_token', None)} for completion_tokens={completion_tokens}"
)
completion_tokens_cost_usd_dollar = (
model_cost_ref[model_with_provider]["output_cost_per_token"]
* completion_tokens
)
print_verbose(
f"calculated completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
)
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
if model in model_cost_ref:
print_verbose(f"Success: model={model} in model_cost_map")
print_verbose(