diff --git a/docs/my-website/docs/proxy/custom_pricing.md b/docs/my-website/docs/proxy/custom_pricing.md
index 16d634dee4..26d7ffb1f5 100644
--- a/docs/my-website/docs/proxy/custom_pricing.md
+++ b/docs/my-website/docs/proxy/custom_pricing.md
@@ -26,10 +26,12 @@ model_list:
   - model_name: sagemaker-completion-model
     litellm_params:
       model: sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4
+    model_info:
       input_cost_per_second: 0.000420
   - model_name: sagemaker-embedding-model
     litellm_params:
       model: sagemaker/berri-benchmarking-gpt-j-6b-fp16
+    model_info:
       input_cost_per_second: 0.000420 
 ```
 
@@ -55,11 +57,33 @@ model_list:
       api_key: os.environ/AZURE_API_KEY
       api_base: os.environ/AZURE_API_BASE
       api_version: os.envrion/AZURE_API_VERSION
+    model_info:
       input_cost_per_token: 0.000421 # 👈 ONLY to track cost per token
       output_cost_per_token: 0.000520 # 👈 ONLY to track cost per token
 ```
 
-### Debugging 
+## Override Model Cost Map
+
+You can override [our model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) with your own custom pricing for a mapped model.
+
+Just add a `model_info` key to your model in the config, and override the desired keys.
+
+Example: Override Anthropic's model cost map for the `prod/claude-3-5-sonnet-20241022` model.
+
+```yaml
+model_list:
+  - model_name: "prod/claude-3-5-sonnet-20241022"
+    litellm_params:
+      model: "anthropic/claude-3-5-sonnet-20241022"
+      api_key: os.environ/ANTHROPIC_PROD_API_KEY
+    model_info:
+      input_cost_per_token: 0.000006
+      output_cost_per_token: 0.00003
+      cache_creation_input_token_cost: 0.0000075
+      cache_read_input_token_cost: 0.0000006
+```
+
+## Debugging 
 
 If you're custom pricing is not being used or you're seeing errors, please check the following:
 
diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index 4ef9d5430e..9376defc00 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -403,6 +403,7 @@ def _select_model_name_for_cost_calc(
     base_model: Optional[str] = None,
     custom_pricing: Optional[bool] = None,
     custom_llm_provider: Optional[str] = None,
+    router_model_id: Optional[str] = None,
 ) -> Optional[str]:
     """
     1. If custom pricing is true, return received model name
@@ -417,12 +418,6 @@ def _select_model_name_for_cost_calc(
         model=model, custom_llm_provider=custom_llm_provider
     )
 
-    if custom_pricing is True:
-        return_model = model
-
-    if base_model is not None:
-        return_model = base_model
-
     completion_response_model: Optional[str] = None
     if completion_response is not None:
         if isinstance(completion_response, BaseModel):
@@ -430,6 +425,16 @@ def _select_model_name_for_cost_calc(
         elif isinstance(completion_response, dict):
             completion_response_model = completion_response.get("model", None)
     hidden_params: Optional[dict] = getattr(completion_response, "_hidden_params", None)
+
+    if custom_pricing is True:
+        if router_model_id is not None and router_model_id in litellm.model_cost:
+            return_model = router_model_id
+        else:
+            return_model = model
+
+    if base_model is not None:
+        return_model = base_model
+
     if completion_response_model is None and hidden_params is not None:
         if (
             hidden_params.get("model", None) is not None
@@ -559,6 +564,7 @@ def completion_cost(  # noqa: PLR0915
     base_model: Optional[str] = None,
     standard_built_in_tools_params: Optional[StandardBuiltInToolsParams] = None,
     litellm_model_name: Optional[str] = None,
+    router_model_id: Optional[str] = None,
 ) -> float:
     """
     Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm.
@@ -617,12 +623,12 @@ def completion_cost(  # noqa: PLR0915
             custom_llm_provider=custom_llm_provider,
             custom_pricing=custom_pricing,
             base_model=base_model,
+            router_model_id=router_model_id,
         )
 
         potential_model_names = [selected_model]
         if model is not None:
             potential_model_names.append(model)
-
         for idx, model in enumerate(potential_model_names):
             try:
                 verbose_logger.info(
@@ -943,6 +949,7 @@ def response_cost_calculator(
     prompt: str = "",
     standard_built_in_tools_params: Optional[StandardBuiltInToolsParams] = None,
     litellm_model_name: Optional[str] = None,
+    router_model_id: Optional[str] = None,
 ) -> float:
     """
     Returns
@@ -973,6 +980,8 @@ def response_cost_calculator(
                 base_model=base_model,
                 prompt=prompt,
                 standard_built_in_tools_params=standard_built_in_tools_params,
+                litellm_model_name=litellm_model_name,
+                router_model_id=router_model_id,
             )
         return response_cost
     except Exception as e:
diff --git a/litellm/litellm_core_utils/get_supported_openai_params.py b/litellm/litellm_core_utils/get_supported_openai_params.py
index ca08df1858..bcf9fdb961 100644
--- a/litellm/litellm_core_utils/get_supported_openai_params.py
+++ b/litellm/litellm_core_utils/get_supported_openai_params.py
@@ -1,7 +1,6 @@
 from typing import Literal, Optional
 
 import litellm
-from litellm._logging import verbose_logger
 from litellm.exceptions import BadRequestError
 from litellm.types.utils import LlmProviders, LlmProvidersSet
 
@@ -43,9 +42,6 @@ def get_supported_openai_params(  # noqa: PLR0915
         provider_config = None
 
     if provider_config and request_type == "chat_completion":
-        verbose_logger.info(
-            f"using provider_config: {provider_config} for checking supported params"
-        )
         return provider_config.get_supported_openai_params(model=model)
 
     if custom_llm_provider == "bedrock":
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index 1d919155ce..f00b81619a 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -622,7 +622,6 @@ class Logging(LiteLLMLoggingBaseClass):
                     ] = RawRequestTypedDict(
                         error=str(e),
                     )
-                    traceback.print_exc()
                     _metadata[
                         "raw_request"
                     ] = "Unable to Log \
@@ -906,6 +905,7 @@ class Logging(LiteLLMLoggingBaseClass):
         ],
         cache_hit: Optional[bool] = None,
         litellm_model_name: Optional[str] = None,
+        router_model_id: Optional[str] = None,
     ) -> Optional[float]:
         """
         Calculate response cost using result + logging object variables.
@@ -944,6 +944,7 @@ class Logging(LiteLLMLoggingBaseClass):
                 "custom_pricing": custom_pricing,
                 "prompt": prompt,
                 "standard_built_in_tools_params": self.standard_built_in_tools_params,
+                "router_model_id": router_model_id,
             }
         except Exception as e:  # error creating kwargs for cost calculation
             debug_info = StandardLoggingModelCostFailureDebugInformation(
diff --git a/litellm/litellm_core_utils/llm_response_utils/response_metadata.py b/litellm/litellm_core_utils/llm_response_utils/response_metadata.py
index 84c80174f9..614b5573cc 100644
--- a/litellm/litellm_core_utils/llm_response_utils/response_metadata.py
+++ b/litellm/litellm_core_utils/llm_response_utils/response_metadata.py
@@ -36,11 +36,16 @@ class ResponseMetadata:
         self, logging_obj: LiteLLMLoggingObject, model: Optional[str], kwargs: dict
     ) -> None:
         """Set hidden parameters on the response"""
+
+        ## ADD OTHER HIDDEN PARAMS
+        model_id = kwargs.get("model_info", {}).get("id", None)
         new_params = {
             "litellm_call_id": getattr(logging_obj, "litellm_call_id", None),
-            "model_id": kwargs.get("model_info", {}).get("id", None),
             "api_base": get_api_base(model=model or "", optional_params=kwargs),
-            "response_cost": logging_obj._response_cost_calculator(result=self.result),
+            "model_id": model_id,
+            "response_cost": logging_obj._response_cost_calculator(
+                result=self.result, litellm_model_name=model, router_model_id=model_id
+            ),
             "additional_headers": process_response_headers(
                 self._get_value_from_hidden_params("additional_headers") or {}
             ),
diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py
index 1495c05685..d4d757de2c 100644
--- a/litellm/litellm_core_utils/prompt_templates/factory.py
+++ b/litellm/litellm_core_utils/prompt_templates/factory.py
@@ -1,7 +1,6 @@
 import copy
 import json
 import re
-import traceback
 import uuid
 import xml.etree.ElementTree as ET
 from enum import Enum
@@ -748,7 +747,6 @@ def convert_to_anthropic_image_obj(
             data=base64_data,
         )
     except Exception as e:
-        traceback.print_exc()
         if "Error: Unable to fetch image from URL" in str(e):
             raise e
         raise Exception(
diff --git a/litellm/proxy/caching_routes.py b/litellm/proxy/caching_routes.py
index 3a22ce2fa8..f25c273ae9 100644
--- a/litellm/proxy/caching_routes.py
+++ b/litellm/proxy/caching_routes.py
@@ -100,7 +100,6 @@ async def cache_ping():
     except Exception as e:
         import traceback
 
-        traceback.print_exc()
         error_message = {
             "message": f"Service Unhealthy ({str(e)})",
             "litellm_cache_params": safe_dumps(litellm_cache_params),
diff --git a/litellm/proxy/management_endpoints/organization_endpoints.py b/litellm/proxy/management_endpoints/organization_endpoints.py
index 37de12a9d2..f0b0b645d2 100644
--- a/litellm/proxy/management_endpoints/organization_endpoints.py
+++ b/litellm/proxy/management_endpoints/organization_endpoints.py
@@ -816,9 +816,6 @@ async def add_member_to_organization(
         return user_object, organization_membership
 
     except Exception as e:
-        import traceback
-
-        traceback.print_exc()
         raise ValueError(
             f"Error adding member={member} to organization={organization_id}: {e}"
         )
diff --git a/litellm/router.py b/litellm/router.py
index 456e8641e0..a4dca0dedd 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -116,6 +116,7 @@ from litellm.types.router import (
     AllowedFailsPolicy,
     AssistantsTypedDict,
     CredentialLiteLLMParams,
+    CustomPricingLiteLLMParams,
     CustomRoutingStrategyBase,
     Deployment,
     DeploymentTypedDict,
@@ -132,6 +133,7 @@ from litellm.types.router import (
 )
 from litellm.types.services import ServiceTypes
 from litellm.types.utils import GenericBudgetConfigType
+from litellm.types.utils import ModelInfo
 from litellm.types.utils import ModelInfo as ModelMapInfo
 from litellm.types.utils import StandardLoggingPayload
 from litellm.utils import (
@@ -3324,7 +3326,6 @@ class Router:
 
                     return response
             except Exception as new_exception:
-                traceback.print_exc()
                 parent_otel_span = _get_parent_otel_span_from_kwargs(kwargs)
                 verbose_router_logger.error(
                     "litellm.router.py::async_function_with_fallbacks() - Error occurred while trying to do fallbacks - {}\n{}\n\nDebug Information:\nCooldown Deployments={}".format(
@@ -4301,7 +4302,20 @@ class Router:
             model_info=_model_info,
         )
 
+        for field in CustomPricingLiteLLMParams.model_fields.keys():
+            if deployment.litellm_params.get(field) is not None:
+                _model_info[field] = deployment.litellm_params[field]
+
         ## REGISTER MODEL INFO IN LITELLM MODEL COST MAP
+        model_id = deployment.model_info.id
+        if model_id is not None:
+            litellm.register_model(
+                model_cost={
+                    model_id: _model_info,
+                }
+            )
+
+        ## OLD MODEL REGISTRATION ## Kept to prevent breaking changes
         _model_name = deployment.litellm_params.model
         if deployment.litellm_params.custom_llm_provider is not None:
             _model_name = (
@@ -4802,6 +4816,42 @@ class Router:
         model_name = model_info["model_name"]
         return self.get_model_list(model_name=model_name)
 
+    def get_deployment_model_info(
+        self, model_id: str, model_name: str
+    ) -> Optional[ModelInfo]:
+        """
+        For a given model id, return the model info
+
+        1. Check if model_id is in model info
+        2. If not, check if litellm model name is in model info
+        3. If not, return None
+        """
+        from litellm.utils import _update_dictionary
+
+        model_info: Optional[ModelInfo] = None
+        litellm_model_name_model_info: Optional[ModelInfo] = None
+
+        try:
+            model_info = litellm.get_model_info(model=model_id)
+        except Exception:
+            pass
+
+        try:
+            litellm_model_name_model_info = litellm.get_model_info(model=model_name)
+        except Exception:
+            pass
+
+        if model_info is not None and litellm_model_name_model_info is not None:
+            model_info = cast(
+                ModelInfo,
+                _update_dictionary(
+                    cast(dict, litellm_model_name_model_info).copy(),
+                    cast(dict, model_info),
+                ),
+            )
+
+        return model_info
+
     def _set_model_group_info(  # noqa: PLR0915
         self, model_group: str, user_facing_model_group_name: str
     ) -> Optional[ModelGroupInfo]:
@@ -4860,9 +4910,16 @@ class Router:
 
             # get model info
             try:
-                model_info = litellm.get_model_info(model=litellm_params.model)
+                model_id = model.get("model_info", {}).get("id", None)
+                if model_id is not None:
+                    model_info = self.get_deployment_model_info(
+                        model_id=model_id, model_name=litellm_params.model
+                    )
+                else:
+                    model_info = None
             except Exception:
                 model_info = None
+
             # get llm provider
             litellm_model, llm_provider = "", ""
             try:
diff --git a/litellm/types/router.py b/litellm/types/router.py
index 5609c3f67f..745d7640e2 100644
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@@ -162,7 +162,15 @@ class CredentialLiteLLMParams(BaseModel):
     watsonx_region_name: Optional[str] = None
 
 
-class GenericLiteLLMParams(CredentialLiteLLMParams):
+class CustomPricingLiteLLMParams(BaseModel):
+    ## CUSTOM PRICING ##
+    input_cost_per_token: Optional[float] = None
+    output_cost_per_token: Optional[float] = None
+    input_cost_per_second: Optional[float] = None
+    output_cost_per_second: Optional[float] = None
+
+
+class GenericLiteLLMParams(CredentialLiteLLMParams, CustomPricingLiteLLMParams):
     """
     LiteLLM Params without 'model' arg (used across completion / assistants api)
     """
@@ -184,12 +192,6 @@ class GenericLiteLLMParams(CredentialLiteLLMParams):
     ## LOGGING PARAMS ##
     litellm_trace_id: Optional[str] = None
 
-    ## CUSTOM PRICING ##
-    input_cost_per_token: Optional[float] = None
-    output_cost_per_token: Optional[float] = None
-    input_cost_per_second: Optional[float] = None
-    output_cost_per_second: Optional[float] = None
-
     max_file_size_mb: Optional[float] = None
 
     # Deployment budgets
diff --git a/litellm/utils.py b/litellm/utils.py
index 9ebe0f4b09..25d2f2105e 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2245,7 +2245,8 @@ def supports_embedding_image_input(
 ####### HELPER FUNCTIONS ################
 def _update_dictionary(existing_dict: Dict, new_dict: dict) -> dict:
     for k, v in new_dict.items():
-        existing_dict[k] = v
+        if v is not None:
+            existing_dict[k] = v
 
     return existing_dict
 
diff --git a/tests/code_coverage_tests/router_code_coverage.py b/tests/code_coverage_tests/router_code_coverage.py
index 5ed00203cc..8b4622d9b8 100644
--- a/tests/code_coverage_tests/router_code_coverage.py
+++ b/tests/code_coverage_tests/router_code_coverage.py
@@ -31,7 +31,7 @@ def get_all_functions_called_in_tests(base_dir):
     specifically in files containing the word 'router'.
     """
     called_functions = set()
-    test_dirs = ["local_testing", "router_unit_tests"]
+    test_dirs = ["local_testing", "router_unit_tests", "litellm"]
 
     for test_dir in test_dirs:
         dir_path = os.path.join(base_dir, test_dir)
diff --git a/tests/litellm/test_cost_calculator.py b/tests/litellm/test_cost_calculator.py
index daf57a2f8a..4621dd722a 100644
--- a/tests/litellm/test_cost_calculator.py
+++ b/tests/litellm/test_cost_calculator.py
@@ -151,3 +151,63 @@ def test_handle_realtime_stream_cost_calculation():
         litellm_model_name="gpt-3.5-turbo",
     )
     assert cost == 0.0  # No usage, no cost
+
+
+def test_custom_pricing_with_router_model_id():
+    from litellm import Router
+
+    router = Router(
+        model_list=[
+            {
+                "model_name": "prod/claude-3-5-sonnet-20240620",
+                "litellm_params": {
+                    "model": "anthropic/claude-3-5-sonnet-20240620",
+                    "api_key": "test_api_key",
+                },
+                "model_info": {
+                    "id": "my-unique-model-id",
+                    "input_cost_per_token": 0.000006,
+                    "output_cost_per_token": 0.00003,
+                    "cache_creation_input_token_cost": 0.0000075,
+                    "cache_read_input_token_cost": 0.0000006,
+                },
+            },
+            {
+                "model_name": "claude-3-5-sonnet-20240620",
+                "litellm_params": {
+                    "model": "anthropic/claude-3-5-sonnet-20240620",
+                    "api_key": "test_api_key",
+                },
+                "model_info": {
+                    "input_cost_per_token": 100,
+                    "output_cost_per_token": 200,
+                },
+            },
+        ]
+    )
+
+    result = router.completion(
+        model="claude-3-5-sonnet-20240620",
+        messages=[{"role": "user", "content": "Hello, world!"}],
+        mock_response=True,
+    )
+
+    result_2 = router.completion(
+        model="prod/claude-3-5-sonnet-20240620",
+        messages=[{"role": "user", "content": "Hello, world!"}],
+        mock_response=True,
+    )
+
+    assert (
+        result._hidden_params["response_cost"]
+        > result_2._hidden_params["response_cost"]
+    )
+
+    model_info = router.get_deployment_model_info(
+        model_id="my-unique-model-id", model_name="anthropic/claude-3-5-sonnet-20240620"
+    )
+    assert model_info is not None
+    assert model_info["input_cost_per_token"] == 0.000006
+    assert model_info["output_cost_per_token"] == 0.00003
+    assert model_info["cache_creation_input_token_cost"] == 0.0000075
+    assert model_info["cache_read_input_token_cost"] == 0.0000006
diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py
index 1cb6269cb3..af89c38789 100644
--- a/tests/local_testing/test_completion_cost.py
+++ b/tests/local_testing/test_completion_cost.py
@@ -2954,9 +2954,6 @@ def test_cost_calculator_with_custom_pricing():
 @pytest.mark.asyncio
 async def test_cost_calculator_with_custom_pricing_router(model_item, custom_pricing):
     from litellm import Router
-
-    litellm._turn_on_debug()
-
     if custom_pricing == "litellm_params":
         model_item["litellm_params"]["input_cost_per_token"] = 0.0000008
         model_item["litellm_params"]["output_cost_per_token"] = 0.0000032
diff --git a/tests/local_testing/test_get_model_info.py b/tests/local_testing/test_get_model_info.py
index a8c19fde5a..eae1b7ef65 100644
--- a/tests/local_testing/test_get_model_info.py
+++ b/tests/local_testing/test_get_model_info.py
@@ -314,12 +314,14 @@ def test_get_model_info_custom_model_router():
                     "input_cost_per_token": 1,
                     "output_cost_per_token": 1,
                     "model": "openai/meta-llama/Meta-Llama-3-8B-Instruct",
-                    "model_id": "c20d603e-1166-4e0f-aa65-ed9c476ad4ca",
                 },
+                "model_info": {
+                    "id": "c20d603e-1166-4e0f-aa65-ed9c476ad4ca",
+                }
             }
         ]
     )
-    info = get_model_info("openai/meta-llama/Meta-Llama-3-8B-Instruct")
+    info = get_model_info("c20d603e-1166-4e0f-aa65-ed9c476ad4ca")
     print("info", info)
     assert info is not None
 
diff --git a/tests/local_testing/test_router_utils.py b/tests/local_testing/test_router_utils.py
index bb748d27af..067aaf032a 100644
--- a/tests/local_testing/test_router_utils.py
+++ b/tests/local_testing/test_router_utils.py
@@ -451,3 +451,11 @@ def test_router_get_deployment_credentials():
     credentials = router.get_deployment_credentials(model_id="1")
     assert credentials is not None
     assert credentials["api_key"] == "123"
+
+
+def test_router_get_deployment_model_info():
+    router = Router(
+        model_list=[{"model_name": "gemini/*", "litellm_params": {"model": "gemini/*"}, "model_info": {"id": "1"}}]
+    )
+    model_info = router.get_deployment_model_info(model_id="1", model_name="gemini/gemini-1.5-flash")
+    assert model_info is not None