forked from phoenix/litellm-mirror
Merge pull request #1690 from BerriAI/litellm_custom_pricing_fix
fix(main.py): register both model name and model name with provider
This commit is contained in:
commit
8ef7e9ad20
5 changed files with 28 additions and 30 deletions
|
@ -592,26 +592,37 @@ def completion(
|
|||
|
||||
### REGISTER CUSTOM MODEL PRICING -- IF GIVEN ###
|
||||
if input_cost_per_token is not None and output_cost_per_token is not None:
|
||||
print_verbose(f"Registering model={model} in model cost map")
|
||||
litellm.register_model(
|
||||
{
|
||||
f"{custom_llm_provider}/{model}": {
|
||||
"input_cost_per_token": input_cost_per_token,
|
||||
"output_cost_per_token": output_cost_per_token,
|
||||
"litellm_provider": custom_llm_provider,
|
||||
},
|
||||
model: {
|
||||
"input_cost_per_token": input_cost_per_token,
|
||||
"output_cost_per_token": output_cost_per_token,
|
||||
"litellm_provider": custom_llm_provider,
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
if (
|
||||
elif (
|
||||
input_cost_per_second is not None
|
||||
): # time based pricing just needs cost in place
|
||||
output_cost_per_second = output_cost_per_second or 0.0
|
||||
litellm.register_model(
|
||||
{
|
||||
f"{custom_llm_provider}/{model}": {
|
||||
"input_cost_per_second": input_cost_per_second,
|
||||
"output_cost_per_second": output_cost_per_second,
|
||||
"litellm_provider": custom_llm_provider,
|
||||
},
|
||||
model: {
|
||||
"input_cost_per_second": input_cost_per_second,
|
||||
"output_cost_per_second": output_cost_per_second,
|
||||
"litellm_provider": custom_llm_provider,
|
||||
}
|
||||
},
|
||||
}
|
||||
)
|
||||
### BUILD CUSTOM PROMPT TEMPLATE -- IF GIVEN ###
|
||||
|
@ -3270,6 +3281,7 @@ async def ahealth_check(
|
|||
## Set verbose to true -> ```litellm.set_verbose = True```
|
||||
def print_verbose(print_statement):
|
||||
try:
|
||||
verbose_logger.debug(print_statement)
|
||||
if litellm.set_verbose:
|
||||
print(print_statement) # noqa
|
||||
except:
|
||||
|
|
|
@ -17,7 +17,12 @@ class MaxParallelRequestsHandler(CustomLogger):
|
|||
pass
|
||||
|
||||
def print_verbose(self, print_statement):
|
||||
try:
|
||||
verbose_proxy_logger.debug(print_statement)
|
||||
if litellm.set_verbose:
|
||||
print(print_statement) # noqa
|
||||
except:
|
||||
pass
|
||||
|
||||
async def async_pre_call_hook(
|
||||
self,
|
||||
|
|
|
@ -21,6 +21,7 @@ from datetime import datetime, timedelta
|
|||
|
||||
|
||||
def print_verbose(print_statement):
|
||||
verbose_proxy_logger.debug(print_statement)
|
||||
if litellm.set_verbose:
|
||||
print(f"LiteLLM Proxy: {print_statement}") # noqa
|
||||
|
||||
|
@ -96,6 +97,7 @@ class ProxyLogging:
|
|||
2. /embeddings
|
||||
3. /image/generation
|
||||
"""
|
||||
print_verbose(f"Inside Proxy Logging Pre-call hook!")
|
||||
### ALERTING ###
|
||||
asyncio.create_task(self.response_taking_too_long(request_data=data))
|
||||
|
||||
|
@ -1035,7 +1037,7 @@ async def send_email(sender_name, sender_email, receiver_email, subject, html):
|
|||
print_verbose(f"SMTP Connection Init")
|
||||
# Establish a secure connection with the SMTP server
|
||||
with smtplib.SMTP(smtp_host, smtp_port) as server:
|
||||
if os.getenv("SMTP_TLS", 'True') != "False":
|
||||
if os.getenv("SMTP_TLS", "True") != "False":
|
||||
server.starttls()
|
||||
|
||||
# Login to your email account
|
||||
|
|
|
@ -456,6 +456,7 @@ async def test_streaming_router_call():
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_router_tpm_limit():
|
||||
litellm.set_verbose = True
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "azure-model",
|
||||
|
@ -520,7 +521,7 @@ async def test_streaming_router_tpm_limit():
|
|||
)
|
||||
async for chunk in response:
|
||||
continue
|
||||
await asyncio.sleep(1) # success is done in a separate thread
|
||||
await asyncio.sleep(5) # success is done in a separate thread
|
||||
|
||||
try:
|
||||
await parallel_request_handler.async_pre_call_hook(
|
||||
|
|
|
@ -2929,32 +2929,10 @@ def cost_per_token(
|
|||
model_with_provider_and_region in model_cost_ref
|
||||
): # use region based pricing, if it's available
|
||||
model_with_provider = model_with_provider_and_region
|
||||
if model_with_provider in model_cost_ref:
|
||||
model = model_with_provider
|
||||
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
||||
print_verbose(f"Looking up model={model} in model_cost_map")
|
||||
if model_with_provider in model_cost_ref:
|
||||
print_verbose(
|
||||
f"Success: model={model_with_provider} in model_cost_map - {model_cost_ref[model_with_provider]}"
|
||||
)
|
||||
print_verbose(
|
||||
f"applying cost={model_cost_ref[model_with_provider].get('input_cost_per_token', None)} for prompt_tokens={prompt_tokens}"
|
||||
)
|
||||
prompt_tokens_cost_usd_dollar = (
|
||||
model_cost_ref[model_with_provider]["input_cost_per_token"] * prompt_tokens
|
||||
)
|
||||
print_verbose(
|
||||
f"calculated prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}"
|
||||
)
|
||||
print_verbose(
|
||||
f"applying cost={model_cost_ref[model_with_provider].get('output_cost_per_token', None)} for completion_tokens={completion_tokens}"
|
||||
)
|
||||
completion_tokens_cost_usd_dollar = (
|
||||
model_cost_ref[model_with_provider]["output_cost_per_token"]
|
||||
* completion_tokens
|
||||
)
|
||||
print_verbose(
|
||||
f"calculated completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
|
||||
)
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
if model in model_cost_ref:
|
||||
print_verbose(f"Success: model={model} in model_cost_map")
|
||||
print_verbose(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue