diff --git a/litellm/__init__.py b/litellm/__init__.py
index 38697c7519..7ed52d7cd3 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -25,6 +25,6 @@ open_ai_embedding_models = [
     'text-embedding-ada-002'
 ]
 
-from .utils import client, logging  # Import all the symbols from main.py
+from .utils import client, logging, exception_type  # Import all the symbols from main.py
 from .main import *  # Import all the symbols from main.py
 
diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc
index 73352d569b..3e9ac33f0f 100644
Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ
diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index 11eb85d841..1490fcfea0 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 9a0883ee25..71c54c1834 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/main.py b/litellm/main.py
index b5a6b1c8af..ec2de634f9 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -6,7 +6,7 @@ import traceback
 import dotenv
 import traceback
 import litellm
-from litellm import client, logging
+from litellm import client, logging, exception_type
 from litellm import success_callback, failure_callback
 import random
 ####### ENVIRONMENT VARIABLES ###################
@@ -120,7 +120,7 @@ def completion(
     elif "replicate" in model:
       # replicate defaults to os.environ.get("REPLICATE_API_TOKEN")
       # checking in case user set it to REPLICATE_API_KEY instead 
-      if not os.environ.get("REPLICATE_API_TOKEN") and  os.environ.get("REPLICATE_API_KEY"):
+      if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"):
         replicate_api_token = os.environ.get("REPLICATE_API_KEY")
         os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
       prompt = " ".join([message["content"] for message in messages])
@@ -207,7 +207,7 @@ def completion(
                   "finish_reason": "stop",
                   "index": 0,
                   "message": {
-                      "content": response[0],
+                      "content": response[0].text,
                       "role": "assistant"
                   }
               }
@@ -246,8 +246,10 @@ def completion(
       raise ValueError(f"No valid completion model args passed in - {args}")
     return response
   except Exception as e:
-    logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn)
-    raise e
+    # log the original exception
+    logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn, exception=e)
+    ## Map to OpenAI Exception
+    raise exception_type(model=model, original_exception=e)
 
 
 ### EMBEDDING ENDPOINTS ####################
diff --git a/litellm/tests/__pycache__/test_bad_params.cpython-311-pytest-7.4.0.pyc b/litellm/tests/__pycache__/test_bad_params.cpython-311-pytest-7.4.0.pyc
new file mode 100644
index 0000000000..1e3c4bc7d3
Binary files /dev/null and b/litellm/tests/__pycache__/test_bad_params.cpython-311-pytest-7.4.0.pyc differ
diff --git a/litellm/tests/__pycache__/test_client.cpython-311-pytest-7.4.0.pyc b/litellm/tests/__pycache__/test_client.cpython-311-pytest-7.4.0.pyc
new file mode 100644
index 0000000000..3bc5a08bbf
Binary files /dev/null and b/litellm/tests/__pycache__/test_client.cpython-311-pytest-7.4.0.pyc differ
diff --git a/litellm/tests/__pycache__/test_completion.cpython-311-pytest-7.4.0.pyc b/litellm/tests/__pycache__/test_completion.cpython-311-pytest-7.4.0.pyc
new file mode 100644
index 0000000000..2baa7bc5f0
Binary files /dev/null and b/litellm/tests/__pycache__/test_completion.cpython-311-pytest-7.4.0.pyc differ
diff --git a/litellm/tests/__pycache__/test_exceptions.cpython-311-pytest-7.4.0.pyc b/litellm/tests/__pycache__/test_exceptions.cpython-311-pytest-7.4.0.pyc
new file mode 100644
index 0000000000..0e69bc88de
Binary files /dev/null and b/litellm/tests/__pycache__/test_exceptions.cpython-311-pytest-7.4.0.pyc differ
diff --git a/litellm/tests/__pycache__/test_logging.cpython-311-pytest-7.4.0.pyc b/litellm/tests/__pycache__/test_logging.cpython-311-pytest-7.4.0.pyc
new file mode 100644
index 0000000000..9f71ef3a11
Binary files /dev/null and b/litellm/tests/__pycache__/test_logging.cpython-311-pytest-7.4.0.pyc differ
diff --git a/litellm/tests/__pycache__/test_model_fallback.cpython-311-pytest-7.4.0.pyc b/litellm/tests/__pycache__/test_model_fallback.cpython-311-pytest-7.4.0.pyc
new file mode 100644
index 0000000000..864247d098
Binary files /dev/null and b/litellm/tests/__pycache__/test_model_fallback.cpython-311-pytest-7.4.0.pyc differ
diff --git a/litellm/tests/test_bad_params.py b/litellm/tests/test_bad_params.py
index dd1e8d5093..53872e5fad 100644
--- a/litellm/tests/test_bad_params.py
+++ b/litellm/tests/test_bad_params.py
@@ -26,7 +26,7 @@ litellm.failure_callback = ["slack", "sentry", "posthog"]
 
 user_message = "Hello, how are you?"
 messages = [{ "content": user_message,"role": "user"}]
-model_val = "krrish is a model"
+model_val = None
 
 
 def test_completion_with_empty_model():
@@ -35,4 +35,4 @@ def test_completion_with_empty_model():
         response = completion(model=model_val, messages=messages)
     except Exception as e:
         print(f"error occurred: {e}") 
-        pass
+        pass
\ No newline at end of file
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index adb55a45e3..b9bbbebe4b 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -1,5 +1,8 @@
 import sys, os
 import traceback
+from dotenv import load_dotenv
+load_dotenv()
+import os
 sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
 import pytest
 import litellm
diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py
new file mode 100644
index 0000000000..38be0e2c15
--- /dev/null
+++ b/litellm/tests/test_exceptions.py
@@ -0,0 +1,129 @@
+from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, OpenAIError
+import os 
+import sys
+import traceback
+sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+import litellm
+from litellm import embedding, completion
+from concurrent.futures import ThreadPoolExecutor
+#### What this tests ####
+#    This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type
+
+
+# 5 providers -> OpenAI, Azure, Anthropic, Cohere, Replicate
+
+# 3 main types of exceptions -> - Rate Limit Errors, Context Window Errors, Auth errors (incorrect/rotated key, etc.)
+
+# Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered
+
+models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly", "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"]
+
+# Test 1: Rate Limit Errors 
+def test_model(model):
+    try: 
+        sample_text = "how does a court case get to the Supreme Court?" * 50000
+        messages = [{ "content": sample_text,"role": "user"}]
+        azure = False
+        if model == "chatgpt-test":
+            azure = True
+        print(f"model: {model}")
+        response = completion(model=model, messages=messages, azure=azure)
+    except RateLimitError:
+        return True
+    except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
+        return True
+    except Exception as e:
+        print(f"Uncaught Exception {model}: {type(e).__name__} - {e}")
+        pass
+    return False
+
+# Repeat each model 500 times
+extended_models = [model for model in models for _ in range(250)]
+
+def worker(model):
+    return test_model(model)
+
+# Create a dictionary to store the results
+counts = {True: 0, False: 0}
+
+# Use Thread Pool Executor
+with ThreadPoolExecutor(max_workers=500) as executor:
+    # Use map to start the operation in thread pool
+    results = executor.map(worker, extended_models)
+
+    # Iterate over results and count True/False
+    for result in results:
+        counts[result] += 1
+
+accuracy_score = counts[True]/(counts[True] + counts[False])
+print(f"accuracy_score: {accuracy_score}")
+
+# Test 2: Context Window Errors 
+print("Testing Context Window Errors")
+def test_model(model): # pass extremely long input
+    sample_text = "how does a court case get to the Supreme Court?" * 100000
+    messages = [{ "content": sample_text,"role": "user"}]
+    try: 
+        azure = False
+        if model == "chatgpt-test":
+            azure = True
+        print(f"model: {model}")
+        response = completion(model=model, messages=messages, azure=azure)
+    except InvalidRequestError:
+        return True
+    except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
+        return True
+    except Exception as e:
+        print(f"Error Type: {type(e).__name__}")
+        print(f"Uncaught Exception - {e}")
+        pass
+    return False
+
+## TEST SCORE
+true_val = 0
+for model in models: 
+    if test_model(model=model) == True:
+        true_val += 1
+accuracy_score = true_val/len(models)
+print(f"CTX WINDOW accuracy_score: {accuracy_score}")
+
+# Test 3: InvalidAuth Errors
+def logger_fn(model_call_object: dict):
+    print(f"model call details: {model_call_object}")
+
+
+def test_model(model): # set the model key to an invalid key, depending on the model 
+    messages = [{ "content": "Hello, how are you?","role": "user"}]
+    try: 
+        azure = False
+        if model == "gpt-3.5-turbo":
+            os.environ["OPENAI_API_KEY"] = "bad-key"
+        elif model == "chatgpt-test":
+            os.environ["AZURE_API_KEY"] = "bad-key"
+            azure = True
+        elif model == "claude-instant-1":
+            os.environ["ANTHROPIC_API_KEY"] = "bad-key"
+        elif model == "command-nightly":
+            os.environ["COHERE_API_KEY"] = "bad-key"
+        elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
+            os.environ["REPLICATE_API_KEY"] = "bad-key"
+            os.environ["REPLICATE_API_TOKEN"] = "bad-key"
+        print(f"model: {model}")
+        response = completion(model=model, messages=messages, azure=azure, logger_fn=logger_fn)
+        print(f"response: {response}")
+    except AuthenticationError as e:
+        return True
+    except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
+        return True
+    except Exception as e:
+        print(f"Uncaught Exception - {e}")
+        pass
+    return False
+
+## TEST SCORE
+true_val = 0
+for model in models: 
+    if test_model(model=model) == True:
+        true_val += 1
+accuracy_score = true_val/len(models)
+print(f"INVALID AUTH accuracy_score: {accuracy_score}")
\ No newline at end of file
diff --git a/litellm/tests/test_logging.py b/litellm/tests/test_logging.py
index 21e4a879c6..dbacf8b472 100644
--- a/litellm/tests/test_logging.py
+++ b/litellm/tests/test_logging.py
@@ -7,7 +7,9 @@ sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the
 import litellm
 from litellm import embedding, completion
 
-litellm.set_verbose = True
+litellm.set_verbose = False
+
+score = 0
 
 def logger_fn(model_call_object: dict):
     print(f"model call details: {model_call_object}")
@@ -18,6 +20,7 @@ messages = [{ "content": user_message,"role": "user"}]
 # test on openai completion call 
 try:
     response = completion(model="gpt-3.5-turbo", messages=messages)
+    score +=1 
 except:
     print(f"error occurred: {traceback.format_exc()}") 
     pass
@@ -25,6 +28,7 @@ except:
 # test on non-openai completion call 
 try:
     response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn)
+    score +=1 
 except:
     print(f"error occurred: {traceback.format_exc()}") 
     pass
@@ -32,20 +36,23 @@ except:
 # test on openai embedding call 
 try: 
     response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn)
-    print(f"response: {str(response)[:50]}")
+    score +=1 
 except:
     traceback.print_exc()
 
 # test on bad azure openai embedding call -> missing azure flag and this isn't an embedding model
 try: 
     response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn)
-    print(f"response: {str(response)[:50]}")
 except:
+    score +=1 # expect this to fail
     traceback.print_exc()
 
 # test on good azure openai embedding call 
 try: 
     response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn)
-    print(f"response: {str(response)[:50]}")
+    score +=1 
 except:
     traceback.print_exc()
+
+
+print(f"Score: {score}, Overall score: {score/5}")
\ No newline at end of file
diff --git a/litellm/tests/test_model_fallback.py b/litellm/tests/test_model_fallback.py
index b389e9f6ac..69dc1f68dd 100644
--- a/litellm/tests/test_model_fallback.py
+++ b/litellm/tests/test_model_fallback.py
@@ -12,7 +12,7 @@ litellm.failure_callback = ["slack", "sentry", "posthog"]
 
 litellm.set_verbose = True
 
-model_fallback_list = ["replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1", "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1", "chatgpt-test"]
+model_fallback_list = ["claude-instant-1", "gpt-3.5-turbo", "chatgpt-test"]
 
 user_message = "Hello, how are you?"
 messages = [{ "content": user_message,"role": "user"}]
@@ -21,6 +21,5 @@ for model in model_fallback_list:
     try:
         response = embedding(model="text-embedding-ada-002", input=[user_message])
         response = completion(model=model, messages=messages)
-        print(response)
     except Exception as e:
         print(f"error occurred: {traceback.format_exc()}") 
diff --git a/litellm/utils.py b/litellm/utils.py
index 593b754f03..c7eaa96d2b 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -9,6 +9,7 @@ import litellm
 import os 
 import openai 
 import random
+from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
 ####### ENVIRONMENT VARIABLES ###################
 dotenv.load_dotenv() # Loading env variables using dotenv
 sentry_sdk_instance = None
@@ -29,12 +30,15 @@ def print_verbose(print_statement):
 
 ####### LOGGING ###################
 #Logging function -> log the exact model details + what's being sent | Non-Blocking
-def logging(model, input, azure=False, additional_args={}, logger_fn=None):
+def logging(model, input, azure=False, additional_args={}, logger_fn=None, exception=None):
   try:
     model_call_details = {}
     model_call_details["model"] = model
     model_call_details["input"] = input
     model_call_details["azure"] = azure
+    # log exception details
+    if exception:
+      model_call_details["original_exception"] = exception
     # log additional call details -> api key, etc. 
     if azure == True or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_embedding_models:
       model_call_details["api_type"] = openai.api_type
@@ -222,3 +226,42 @@ def handle_success(*args, **kwargs):
     success_handler(args, kwargs)
   pass
 
+
+def exception_type(model, original_exception):
+    if isinstance(original_exception, OpenAIError):
+        # Handle the OpenAIError
+        raise original_exception
+    elif model:
+      error_str = str(original_exception)
+      if isinstance(original_exception, BaseException):
+        exception_type = type(original_exception).__name__
+      else:
+        exception_type = ""
+      if "claude" in model: #one of the anthropics
+        print_verbose(f"status_code: {original_exception.status_code}")
+        if original_exception.status_code == 401:
+          raise AuthenticationError(f"AnthropicException - {original_exception.message}")
+        elif original_exception.status_code == 400:
+          raise InvalidRequestError(f"AnthropicException - {original_exception.message}", f"{model}")
+        elif original_exception.status_code == 429:
+          raise RateLimitError(f"AnthropicException - {original_exception.message}")
+      elif "replicate" in model:
+        if "Incorrect authentication token" in error_str:
+          raise AuthenticationError(f"ReplicateException - {error_str}")
+        elif exception_type == "ModelError":
+          raise InvalidRequestError(f"ReplicateException - {error_str}", f"{model}")
+        elif "Request was throttled" in error_str:
+          raise RateLimitError(f"ReplicateException - {error_str}")
+        elif exception_type == "ReplicateError": ## ReplicateError implies an error on Replicate server side, not user side
+          raise ServiceUnavailableError(f"ReplicateException - {error_str}")
+      elif model == "command-nightly": #Cohere
+        if "invalid api token" in error_str or "No API key provided." in error_str:
+          raise AuthenticationError(f"CohereException - {error_str}")
+        elif "too many tokens" in error_str:
+          raise InvalidRequestError(f"CohereException - {error_str}", f"{model}")
+        elif "CohereConnectionError" in exception_type: # cohere seems to fire these errors when we load test it (1k+ messages / min)
+          raise RateLimitError(f"CohereException - {original_exception.message}")
+      raise original_exception # base case - return the original exception
+    else:
+      raise original_exception
+                        
\ No newline at end of file