diff --git a/litellm/__init__.py b/litellm/__init__.py
index b5c84bab4..4c89c441f 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -1,4 +1,4 @@
-import threading
+import threading, requests
 from typing import Callable, List, Optional, Dict
 from litellm.caching import Cache
 
@@ -35,94 +35,18 @@ caching = False # deprecated son
 caching_with_models = False  # if you want the caching key to be model + prompt # deprecated soon
 cache: Optional[Cache] = None # cache object
 model_alias_map: Dict[str, str] = {}
-model_cost = {
-    "babbage-002": {
-        "max_tokens": 16384,
-        "input_cost_per_token": 0.0000004,
-        "output_cost_per_token": 0.0000004,
-    },
-    "davinci-002": {
-        "max_tokens": 16384,
-        "input_cost_per_token": 0.000002,
-        "output_cost_per_token": 0.000002,
-    },
-    "gpt-3.5-turbo": {
-        "max_tokens": 4000,
-        "input_cost_per_token": 0.0000015,
-        "output_cost_per_token": 0.000002,
-    },
-    "gpt-35-turbo": {
-        "max_tokens": 4000,
-        "input_cost_per_token": 0.0000015,
-        "output_cost_per_token": 0.000002,
-    },  # azure model name
-    "gpt-3.5-turbo-0613": {
-        "max_tokens": 4000,
-        "input_cost_per_token": 0.0000015,
-        "output_cost_per_token": 0.000002,
-    },
-    "gpt-3.5-turbo-0301": {
-        "max_tokens": 4000,
-        "input_cost_per_token": 0.0000015,
-        "output_cost_per_token": 0.000002,
-    },
-    "gpt-3.5-turbo-16k": {
-        "max_tokens": 16000,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000004,
-    },
-    "gpt-35-turbo-16k": {
-        "max_tokens": 16000,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000004,
-    },  # azure model name
-    "gpt-3.5-turbo-16k-0613": {
-        "max_tokens": 16000,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000004,
-    },
-    "gpt-4": {
-        "max_tokens": 8000,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.00006,
-    },
-    "gpt-4-0613": {
-        "max_tokens": 8000,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.00006,
-    },
-    "gpt-4-32k": {
-        "max_tokens": 8000,
-        "input_cost_per_token": 0.00006,
-        "output_cost_per_token": 0.00012,
-    },
-    "claude-instant-1": {
-        "max_tokens": 100000,
-        "input_cost_per_token": 0.00000163,
-        "output_cost_per_token": 0.00000551,
-    },
-    "claude-2": {
-        "max_tokens": 100000,
-        "input_cost_per_token": 0.00001102,
-        "output_cost_per_token": 0.00003268,
-    },
-    "text-bison-001": {
-        "max_tokens": 8192,
-        "input_cost_per_token": 0.000004,
-        "output_cost_per_token": 0.000004,
-    },
-    "chat-bison-001": {
-        "max_tokens": 4096,
-        "input_cost_per_token": 0.000002,
-        "output_cost_per_token": 0.000002,
-    },
-    "command-nightly": {
-        "max_tokens": 4096,
-        "input_cost_per_token": 0.000015,
-        "output_cost_per_token": 0.000015,
-    },
-}
-
+def get_model_cost_map():
+    url = "https://raw.githubusercontent.com/BerriAI/litellm/main/cookbook/community-resources/max_tokens.json"
+    
+    try:
+        response = requests.get(url)
+        response.raise_for_status()  # Raise an exception if request is unsuccessful
+        content = response.json()
+        return content
+    except requests.exceptions.RequestException as e:
+        print("Error occurred:", e)
+        return None
+model_cost = get_model_cost_map()
 
 ####### THREAD-SPECIFIC DATA ###################
 class MyLocal(threading.local):
@@ -298,7 +222,8 @@ from .utils import (
     Logging,
     acreate,
     get_model_list,
-    completion_with_split_tests
+    completion_with_split_tests,
+    get_max_tokens
 )
 from .main import *  # type: ignore
 from .integrations import *
diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc
index c2b76dce9..e68c4da03 100644
Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ
diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index 5be8c1fc5..4e3285a90 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 2826ca27f..309ff5b2e 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/exceptions.py b/litellm/exceptions.py
index 26a6e8b9f..ccf6e2d76 100644
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@@ -9,10 +9,11 @@ from openai.error import (
 
 
 class AuthenticationError(AuthenticationError):  # type: ignore
-    def __init__(self, message, llm_provider):
+    def __init__(self, message, llm_provider, model):
         self.status_code = 401
         self.message = message
         self.llm_provider = llm_provider
+        self.model = model
         super().__init__(
             self.message
         )  # Call the base class constructor with the parameters it needs
@@ -41,20 +42,22 @@ class ContextWindowExceededError(InvalidRequestError):  # type: ignore
 
 
 class RateLimitError(RateLimitError):  # type: ignore
-    def __init__(self, message, llm_provider):
+    def __init__(self, message, llm_provider, model):
         self.status_code = 429
         self.message = message
         self.llm_provider = llm_provider
+        self.modle = model
         super().__init__(
             self.message
         )  # Call the base class constructor with the parameters it needs
 
 
 class ServiceUnavailableError(ServiceUnavailableError):  # type: ignore
-    def __init__(self, message, llm_provider):
+    def __init__(self, message, llm_provider, model):
         self.status_code = 500
         self.message = message
         self.llm_provider = llm_provider
+        self.model = model
         super().__init__(
             self.message
         )  # Call the base class constructor with the parameters it needs
diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py
index d8619abd6..dc89d160e 100644
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@@ -22,7 +22,7 @@ import pytest
 litellm.vertex_project = "pathrise-convert-1606954137718"
 litellm.vertex_location = "us-central1"
 
-litellm.failure_callback = ["sentry"]
+# litellm.failure_callback = ["sentry"]
 #### What this tests ####
 #    This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type
 
diff --git a/litellm/tests/test_get_model_cost_map.py b/litellm/tests/test_get_model_cost_map.py
new file mode 100644
index 000000000..e8d751af8
--- /dev/null
+++ b/litellm/tests/test_get_model_cost_map.py
@@ -0,0 +1,10 @@
+import sys, os
+import traceback
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import time
+from litellm import get_max_tokens
+
+print(get_max_tokens("gpt-3.5-turbo"))
\ No newline at end of file
diff --git a/litellm/utils.py b/litellm/utils.py
index 2ca755b94..d611c05ed 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -846,6 +846,13 @@ def get_optional_params(  # use the openai defaults
     return optional_params
 
 
+def get_max_tokens(model: str):
+    try:
+        return litellm.model_cost[model]
+    except:
+        raise Exception("This model isn't mapped yet. Add it here - https://raw.githubusercontent.com/BerriAI/litellm/main/cookbook/community-resources/max_tokens.json")
+    
+
 def load_test_model(
     model: str,
     custom_llm_provider: str = "",
@@ -1458,6 +1465,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                         raise AuthenticationError(
                             message=f"AnthropicException - {original_exception.message}",
                             llm_provider="anthropic",
+                            model=model
                         )
                     elif original_exception.status_code == 400:
                         exception_mapping_worked = True
@@ -1478,6 +1486,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                         raise RateLimitError(
                             message=f"AnthropicException - {original_exception.message}",
                             llm_provider="anthropic",
+                            model=model
                         )
                 elif (
                     "Could not resolve authentication method. Expected either api_key or auth_token to be set."
@@ -1487,6 +1496,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                     raise AuthenticationError(
                         message=f"AnthropicException - {original_exception.message}",
                         llm_provider="anthropic",
+                        model=model
                     )
             elif "replicate" in model:
                 if "Incorrect authentication token" in error_str:
@@ -1494,6 +1504,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                     raise AuthenticationError(
                         message=f"ReplicateException - {error_str}",
                         llm_provider="replicate",
+                        model=model
                     )
                 elif "input is too long" in error_str:
                     exception_mapping_worked = True
@@ -1514,6 +1525,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                     raise RateLimitError(
                         message=f"ReplicateException - {error_str}",
                         llm_provider="replicate",
+                        model=model
                     )
                 elif (
                     exception_type == "ReplicateError"
@@ -1521,6 +1533,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                     raise ServiceUnavailableError(
                         message=f"ReplicateException - {error_str}",
                         llm_provider="replicate",
+                        model=model
                     )
             elif model in litellm.cohere_models:  # Cohere
                 if (
@@ -1531,6 +1544,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                     raise AuthenticationError(
                         message=f"CohereException - {original_exception.message}",
                         llm_provider="cohere",
+                        model=model
                     )
                 elif "too many tokens" in error_str:
                     exception_mapping_worked = True
@@ -1546,6 +1560,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                     raise RateLimitError(
                         message=f"CohereException - {original_exception.message}",
                         llm_provider="cohere",
+                        model=model
                     )
             elif custom_llm_provider == "huggingface":
                 if "length limit exceeded" in error_str:
@@ -1561,6 +1576,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                         raise AuthenticationError(
                             message=f"HuggingfaceException - {original_exception.message}",
                             llm_provider="huggingface",
+                            model=model
                         )
                     elif original_exception.status_code == 400:
                         exception_mapping_worked = True
@@ -1574,6 +1590,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                         raise RateLimitError(
                             message=f"HuggingfaceException - {original_exception.message}",
                             llm_provider="huggingface",
+                            model=model
                         )
             elif custom_llm_provider == "ai21":
                 if hasattr(original_exception, "message"):
@@ -1590,6 +1607,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                         raise AuthenticationError(
                             message=f"AI21Exception - {original_exception.message}",
                             llm_provider="ai21",
+                            model=model
                         )
                     if original_exception.status_code == 422:
                         exception_mapping_worked = True
@@ -1617,7 +1635,8 @@ def exception_type(model, original_exception, custom_llm_provider):
                     exception_mapping_worked = True
                     raise AuthenticationError(
                         message=f"TogetherAIException - {error_response['error']}",
-                        llm_provider="together_ai"
+                        llm_provider="together_ai",
+                        model=model
                     )
                 elif "error" in error_response and "INVALID_ARGUMENT" in error_response["error"]:
                     exception_mapping_worked = True
@@ -1638,6 +1657,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                         raise RateLimitError(
                             message=f"TogetherAIException - {original_exception.message}",
                             llm_provider="together_ai",
+                            model=model
                         )
             raise original_exception  # base case - return the original exception
         else:
diff --git a/pyproject.toml b/pyproject.toml
index 7a2d0b33b..cf6b93ce8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.525"
+version = "0.1.526"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"