diff --git a/litellm/__init__.py b/litellm/__init__.py
index ef2a5b3cf..349f427b9 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -286,10 +286,11 @@ from .utils import (
 )
 from .main import *  # type: ignore
 from .integrations import *
-from openai.error import (
+from .exceptions import (
     AuthenticationError,
     InvalidRequestError,
     RateLimitError,
     ServiceUnavailableError,
     OpenAIError,
+    ContextWindowExceededError
 )
diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc
index f49d4c5b4..a37c35ddb 100644
Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ
diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index 51744654f..0fdab360c 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index b2edcbec9..b8c0d351b 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/exceptions.py b/litellm/exceptions.py
index 7b48a343d..26a6e8b9f 100644
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@@ -28,6 +28,17 @@ class InvalidRequestError(InvalidRequestError):  # type: ignore
             self.message, f"{self.model}"
         )  # Call the base class constructor with the parameters it needs
 
+# sub class of invalid request error - meant to give more granularity for error handling context window exceeded errors
+class ContextWindowExceededError(InvalidRequestError):  # type: ignore
+    def __init__(self, message, model, llm_provider):
+        self.status_code = 400
+        self.message = message
+        self.model = model
+        self.llm_provider = llm_provider
+        super().__init__(
+            self.message, self.model, self.llm_provider
+        )  # Call the base class constructor with the parameters it needs
+
 
 class RateLimitError(RateLimitError):  # type: ignore
     def __init__(self, message, llm_provider):
diff --git a/litellm/main.py b/litellm/main.py
index 6cdce68f3..2a68e0469 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -1,4 +1,4 @@
-import os, openai, sys
+import os, openai, sys, json
 from typing import Any
 from functools import partial
 import dotenv, traceback, random, asyncio, time, contextvars
@@ -539,6 +539,7 @@ def completion(
                 return response
             response = model_response
         elif custom_llm_provider == "together_ai" or ("togethercomputer" in model):
+            custom_llm_provider = "together_ai"
             import requests
 
             TOGETHER_AI_TOKEN = (
@@ -594,10 +595,10 @@ def completion(
                 )
                 # make this safe for reading, if output does not exist raise an error
                 json_response = res.json()
-                if "output" not in json_response:
-                    raise Exception(
-                        f"liteLLM: Error Making TogetherAI request, JSON Response {json_response}"
-                    )
+                if "error" in json_response:
+                    raise Exception(json.dumps(json_response))
+                elif "error" in json_response["output"]:
+                    raise Exception(json.dumps(json_response["output"]))
                 completion_response = json_response["output"]["choices"][0]["text"]
                 prompt_tokens = len(encoding.encode(prompt))
                 completion_tokens = len(encoding.encode(completion_response))
diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py
index 6620eb2ae..3367bfa16 100644
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@@ -12,6 +12,7 @@ from litellm import (
     completion,
     AuthenticationError,
     InvalidRequestError,
+    ContextWindowExceededError,
     RateLimitError,
     ServiceUnavailableError,
     OpenAIError,
@@ -32,11 +33,12 @@ litellm.failure_callback = ["sentry"]
 # Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered
 
 # models = ["gpt-3.5-turbo", "chatgpt-test",  "claude-instant-1", "command-nightly"]
-test_model = "claude-instant-1"
-models = ["claude-instant-1"]
+test_model = "togethercomputer/CodeLlama-34b-Python"
+models = ["togethercomputer/CodeLlama-34b-Python"]
 
 
 def logging_fn(model_call_dict):
+    return
     if "model" in model_call_dict:
         print(f"model_call_dict: {model_call_dict['model']}")
     else:
@@ -49,15 +51,16 @@ def test_context_window(model):
     sample_text = "how does a court case get to the Supreme Court?" * 5000
     messages = [{"content": sample_text, "role": "user"}]
     try:
-        model = "chatgpt-test"
         print(f"model: {model}")
         response = completion(
             model=model,
             messages=messages,
-            custom_llm_provider="azure",
             logger_fn=logging_fn,
         )
         print(f"response: {response}")
+    except ContextWindowExceededError as e:
+        print(f"ContextWindowExceededError: {e.llm_provider}")
+        return
     except InvalidRequestError as e:
         print(f"InvalidRequestError: {e.llm_provider}")
         return
@@ -95,6 +98,9 @@ def invalid_auth(model):  # set the model key to an invalid key, depending on th
         elif model == "command-nightly":
             temporary_key = os.environ["COHERE_API_KEY"]
             os.environ["COHERE_API_KEY"] = "bad-key"
+        elif "togethercomputer" in model:
+            temporary_key = os.environ["TOGETHERAI_API_KEY"]
+            os.environ["TOGETHERAI_API_KEY"] = "84060c79880fc49df126d3e87b53f8a463ff6e1c6d27fe64207cde25cdfcd1f24a"
         elif (
             model
             == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
@@ -132,46 +138,48 @@ def invalid_auth(model):  # set the model key to an invalid key, depending on th
             == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
         ):
             os.environ["REPLICATE_API_KEY"] = temporary_key
+        elif ("togethercomputer" in model):
+            os.environ["TOGETHERAI_API_KEY"] = temporary_key
     return
 
 
 invalid_auth(test_model)
-# # Test 3: Rate Limit Errors
-# def test_model(model):
-#     try:
-#         sample_text = "how does a court case get to the Supreme Court?" * 50000
-#         messages = [{ "content": sample_text,"role": "user"}]
-#         custom_llm_provider = None
-#         if model == "chatgpt-test":
-#             custom_llm_provider = "azure"
-#         print(f"model: {model}")
-#         response = completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider)
-#     except RateLimitError:
-#         return True
-#     except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
-#         return True
-#     except Exception as e:
-#         print(f"Uncaught Exception {model}: {type(e).__name__} - {e}")
-#         pass
-#     return False
+# Test 3: Rate Limit Errors
+def test_model(model):
+    try:
+        sample_text = "how does a court case get to the Supreme Court?" * 50000
+        messages = [{ "content": sample_text,"role": "user"}]
+        custom_llm_provider = None
+        if model == "chatgpt-test":
+            custom_llm_provider = "azure"
+        print(f"model: {model}")
+        response = completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider)
+    except RateLimitError:
+        return True
+    except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
+        return True
+    except Exception as e:
+        print(f"Uncaught Exception {model}: {type(e).__name__} - {e}")
+        pass
+    return False
 
-# # Repeat each model 500 times
-# extended_models = [model for model in models for _ in range(250)]
+# Repeat each model 500 times
+extended_models = [model for model in models for _ in range(250)]
 
-# def worker(model):
-#     return test_model(model)
+def worker(model):
+    return test_model(model)
 
-# # Create a dictionary to store the results
-# counts = {True: 0, False: 0}
+# Create a dictionary to store the results
+counts = {True: 0, False: 0}
 
-# # Use Thread Pool Executor
-# with ThreadPoolExecutor(max_workers=500) as executor:
-#     # Use map to start the operation in thread pool
-#     results = executor.map(worker, extended_models)
+# Use Thread Pool Executor
+with ThreadPoolExecutor(max_workers=500) as executor:
+    # Use map to start the operation in thread pool
+    results = executor.map(worker, extended_models)
 
-#     # Iterate over results and count True/False
-#     for result in results:
-#         counts[result] += 1
+    # Iterate over results and count True/False
+    for result in results:
+        counts[result] += 1
 
-# accuracy_score = counts[True]/(counts[True] + counts[False])
-# print(f"accuracy_score: {accuracy_score}")
+accuracy_score = counts[True]/(counts[True] + counts[False])
+print(f"accuracy_score: {accuracy_score}")
diff --git a/litellm/utils.py b/litellm/utils.py
index 7ceff8196..777c9f3b6 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -25,6 +25,7 @@ from .exceptions import (
     RateLimitError,
     ServiceUnavailableError,
     OpenAIError,
+    ContextWindowExceededError
 )
 from typing import List, Dict, Union, Optional
 from .caching import Cache
@@ -1445,6 +1446,37 @@ def exception_type(model, original_exception, custom_llm_provider):
                             message=f"HuggingfaceException - {original_exception.message}",
                             llm_provider="huggingface",
                         )
+            elif custom_llm_provider == "together_ai":
+                error_response = json.loads(error_str)
+                if "error" in error_response and "`inputs` tokens + `max_new_tokens` must be <=" in error_response["error"]:
+                    exception_mapping_worked = True
+                    raise ContextWindowExceededError(
+                        message=error_response["error"],
+                        model=model,
+                        llm_provider="together_ai"
+                    )
+                elif "error" in error_response and "invalid private key" in error_response["error"]:
+                    exception_mapping_worked = True
+                    raise AuthenticationError(
+                        message=error_response["error"],
+                        llm_provider="together_ai"
+                    )
+                elif "error" in error_response and "INVALID_ARGUMENT" in error_response["error"]:
+                    exception_mapping_worked = True
+                    raise InvalidRequestError(
+                        message=error_response["error"],
+                        model=model,
+                        llm_provider="together_ai"
+                    )
+                elif "error_type" in error_response and error_response["error_type"] == "validation":
+                    exception_mapping_worked = True
+                    raise InvalidRequestError(
+                        message=error_response["error"],
+                        model=model,
+                        llm_provider="together_ai"
+                    )
+                print(f"error: {error_response}")
+                print(f"e: {original_exception}")
             raise original_exception  # base case - return the original exception
         else:
             raise original_exception