Merge pull request #1690 from BerriAI/litellm_custom_pricing_fix

fix(main.py): register both model name and model name with provider
2024-01-30 13:56:38 -08:00 · 2024-01-30 13:56:38 -08:00 · 8ef7e9ad20
commit 8ef7e9ad20
parent dd9c78819a bbe71c8375
5 changed files with 28 additions and 30 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -592,26 +592,37 @@ def completion(

        ### REGISTER CUSTOM MODEL PRICING -- IF GIVEN ###
        if input_cost_per_token is not None and output_cost_per_token is not None:
+            print_verbose(f"Registering model={model} in model cost map")
            litellm.register_model(
                {
+                    f"{custom_llm_provider}/{model}": {
+                        "input_cost_per_token": input_cost_per_token,
+                        "output_cost_per_token": output_cost_per_token,
+                        "litellm_provider": custom_llm_provider,
+                    },
                    model: {
                        "input_cost_per_token": input_cost_per_token,
                        "output_cost_per_token": output_cost_per_token,
                        "litellm_provider": custom_llm_provider,
-                    }
+                    },
                }
            )
-        if (
+        elif (
            input_cost_per_second is not None
        ):  # time based pricing just needs cost in place
            output_cost_per_second = output_cost_per_second or 0.0
            litellm.register_model(
                {
+                    f"{custom_llm_provider}/{model}": {
+                        "input_cost_per_second": input_cost_per_second,
+                        "output_cost_per_second": output_cost_per_second,
+                        "litellm_provider": custom_llm_provider,
+                    },
                    model: {
                        "input_cost_per_second": input_cost_per_second,
                        "output_cost_per_second": output_cost_per_second,
                        "litellm_provider": custom_llm_provider,
-                    }
+                    },
                }
            )
        ### BUILD CUSTOM PROMPT TEMPLATE -- IF GIVEN ###
@ -3270,6 +3281,7 @@ async def ahealth_check(
 ## Set verbose to true -> ```litellm.set_verbose = True```
 def print_verbose(print_statement):
    try:
+        verbose_logger.debug(print_statement)
        if litellm.set_verbose:
            print(print_statement)  # noqa
    except:
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm/proxy/hooks/parallel_request_limiter.py
@ -17,7 +17,12 @@ class MaxParallelRequestsHandler(CustomLogger):
        pass

    def print_verbose(self, print_statement):
-        verbose_proxy_logger.debug(print_statement)
+        try:
+            verbose_proxy_logger.debug(print_statement)
+            if litellm.set_verbose:
+                print(print_statement)  # noqa
+        except:
+            pass

    async def async_pre_call_hook(
        self,
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -21,6 +21,7 @@ from datetime import datetime, timedelta


 def print_verbose(print_statement):
+    verbose_proxy_logger.debug(print_statement)
    if litellm.set_verbose:
        print(f"LiteLLM Proxy: {print_statement}")  # noqa

@ -96,6 +97,7 @@ class ProxyLogging:
        2. /embeddings
        3. /image/generation
        """
+        print_verbose(f"Inside Proxy Logging Pre-call hook!")
        ### ALERTING ###
        asyncio.create_task(self.response_taking_too_long(request_data=data))

@ -1035,7 +1037,7 @@ async def send_email(sender_name, sender_email, receiver_email, subject, html):
        print_verbose(f"SMTP Connection Init")
        # Establish a secure connection with the SMTP server
        with smtplib.SMTP(smtp_host, smtp_port) as server:
-            if os.getenv("SMTP_TLS", 'True') != "False":
+            if os.getenv("SMTP_TLS", "True") != "False":
                server.starttls()

            # Login to your email account
--- a/litellm/tests/test_parallel_request_limiter.py
+++ b/litellm/tests/test_parallel_request_limiter.py
@ -456,6 +456,7 @@ async def test_streaming_router_call():

@pytest.mark.asyncio
 async def test_streaming_router_tpm_limit():
+    litellm.set_verbose = True
    model_list = [
        {
            "model_name": "azure-model",
@ -520,7 +521,7 @@ async def test_streaming_router_tpm_limit():
    )
    async for chunk in response:
        continue
-    await asyncio.sleep(1)  # success is done in a separate thread
+    await asyncio.sleep(5)  # success is done in a separate thread

    try:
        await parallel_request_handler.async_pre_call_hook(
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2929,32 +2929,10 @@ def cost_per_token(
                model_with_provider_and_region in model_cost_ref
            ):  # use region based pricing, if it's available
                model_with_provider = model_with_provider_and_region
+    if model_with_provider in model_cost_ref:
+        model = model_with_provider
    # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
    print_verbose(f"Looking up model={model} in model_cost_map")
-    if model_with_provider in model_cost_ref:
-        print_verbose(
-            f"Success: model={model_with_provider} in model_cost_map - {model_cost_ref[model_with_provider]}"
-        )
-        print_verbose(
-            f"applying cost={model_cost_ref[model_with_provider].get('input_cost_per_token', None)} for prompt_tokens={prompt_tokens}"
-        )
-        prompt_tokens_cost_usd_dollar = (
-            model_cost_ref[model_with_provider]["input_cost_per_token"] * prompt_tokens
-        )
-        print_verbose(
-            f"calculated prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}"
-        )
-        print_verbose(
-            f"applying cost={model_cost_ref[model_with_provider].get('output_cost_per_token', None)} for completion_tokens={completion_tokens}"
-        )
-        completion_tokens_cost_usd_dollar = (
-            model_cost_ref[model_with_provider]["output_cost_per_token"]
-            * completion_tokens
-        )
-        print_verbose(
-            f"calculated completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
-        )
-        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
    if model in model_cost_ref:
        print_verbose(f"Success: model={model} in model_cost_map")
        print_verbose(