diff --git a/litellm/main.py b/litellm/main.py index 851a89cb3..bf4132863 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -592,26 +592,37 @@ def completion( ### REGISTER CUSTOM MODEL PRICING -- IF GIVEN ### if input_cost_per_token is not None and output_cost_per_token is not None: + print_verbose(f"Registering model={model} in model cost map") litellm.register_model( { + f"{custom_llm_provider}/{model}": { + "input_cost_per_token": input_cost_per_token, + "output_cost_per_token": output_cost_per_token, + "litellm_provider": custom_llm_provider, + }, model: { "input_cost_per_token": input_cost_per_token, "output_cost_per_token": output_cost_per_token, "litellm_provider": custom_llm_provider, - } + }, } ) - if ( + elif ( input_cost_per_second is not None ): # time based pricing just needs cost in place output_cost_per_second = output_cost_per_second or 0.0 litellm.register_model( { + f"{custom_llm_provider}/{model}": { + "input_cost_per_second": input_cost_per_second, + "output_cost_per_second": output_cost_per_second, + "litellm_provider": custom_llm_provider, + }, model: { "input_cost_per_second": input_cost_per_second, "output_cost_per_second": output_cost_per_second, "litellm_provider": custom_llm_provider, - } + }, } ) ### BUILD CUSTOM PROMPT TEMPLATE -- IF GIVEN ### @@ -3270,6 +3281,7 @@ async def ahealth_check( ## Set verbose to true -> ```litellm.set_verbose = True``` def print_verbose(print_statement): try: + verbose_logger.debug(print_statement) if litellm.set_verbose: print(print_statement) # noqa except: diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py index 30877daf3..5c1893ea5 100644 --- a/litellm/proxy/hooks/parallel_request_limiter.py +++ b/litellm/proxy/hooks/parallel_request_limiter.py @@ -17,7 +17,12 @@ class MaxParallelRequestsHandler(CustomLogger): pass def print_verbose(self, print_statement): - verbose_proxy_logger.debug(print_statement) + try: + verbose_proxy_logger.debug(print_statement) + if litellm.set_verbose: + print(print_statement) # noqa + except: + pass async def async_pre_call_hook( self, diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 9e46db796..3ec45203f 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -21,6 +21,7 @@ from datetime import datetime, timedelta def print_verbose(print_statement): + verbose_proxy_logger.debug(print_statement) if litellm.set_verbose: print(f"LiteLLM Proxy: {print_statement}") # noqa @@ -96,6 +97,7 @@ class ProxyLogging: 2. /embeddings 3. /image/generation """ + print_verbose(f"Inside Proxy Logging Pre-call hook!") ### ALERTING ### asyncio.create_task(self.response_taking_too_long(request_data=data)) @@ -1035,7 +1037,7 @@ async def send_email(sender_name, sender_email, receiver_email, subject, html): print_verbose(f"SMTP Connection Init") # Establish a secure connection with the SMTP server with smtplib.SMTP(smtp_host, smtp_port) as server: - if os.getenv("SMTP_TLS", 'True') != "False": + if os.getenv("SMTP_TLS", "True") != "False": server.starttls() # Login to your email account diff --git a/litellm/tests/test_parallel_request_limiter.py b/litellm/tests/test_parallel_request_limiter.py index 384044c9c..dee909eaa 100644 --- a/litellm/tests/test_parallel_request_limiter.py +++ b/litellm/tests/test_parallel_request_limiter.py @@ -456,6 +456,7 @@ async def test_streaming_router_call(): @pytest.mark.asyncio async def test_streaming_router_tpm_limit(): + litellm.set_verbose = True model_list = [ { "model_name": "azure-model", @@ -520,7 +521,7 @@ async def test_streaming_router_tpm_limit(): ) async for chunk in response: continue - await asyncio.sleep(1) # success is done in a separate thread + await asyncio.sleep(5) # success is done in a separate thread try: await parallel_request_handler.async_pre_call_hook( diff --git a/litellm/utils.py b/litellm/utils.py index bbc4e651c..3aaf53514 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2929,32 +2929,10 @@ def cost_per_token( model_with_provider_and_region in model_cost_ref ): # use region based pricing, if it's available model_with_provider = model_with_provider_and_region + if model_with_provider in model_cost_ref: + model = model_with_provider # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models print_verbose(f"Looking up model={model} in model_cost_map") - if model_with_provider in model_cost_ref: - print_verbose( - f"Success: model={model_with_provider} in model_cost_map - {model_cost_ref[model_with_provider]}" - ) - print_verbose( - f"applying cost={model_cost_ref[model_with_provider].get('input_cost_per_token', None)} for prompt_tokens={prompt_tokens}" - ) - prompt_tokens_cost_usd_dollar = ( - model_cost_ref[model_with_provider]["input_cost_per_token"] * prompt_tokens - ) - print_verbose( - f"calculated prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}" - ) - print_verbose( - f"applying cost={model_cost_ref[model_with_provider].get('output_cost_per_token', None)} for completion_tokens={completion_tokens}" - ) - completion_tokens_cost_usd_dollar = ( - model_cost_ref[model_with_provider]["output_cost_per_token"] - * completion_tokens - ) - print_verbose( - f"calculated completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}" - ) - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar if model in model_cost_ref: print_verbose(f"Success: model={model} in model_cost_map") print_verbose(