diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index c0ed4c412b..3358392261 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index fece25c518..200ddc9d0b 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py
index 5e755e34c3..5b5d928b20 100644
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@@ -4,13 +4,20 @@ import requests
 from litellm import logging
 import time 
 from typing import Callable
+
 class AnthropicConstants(Enum):
     HUMAN_PROMPT = "\n\nHuman:"
     AI_PROMPT = "\n\nAssistant:"
 
+class AnthropicError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+
 class AnthropicLLM: 
     
-    def __init__(self, default_max_tokens_to_sample, api_key=None):
+    def __init__(self, encoding, default_max_tokens_to_sample, api_key=None):
+        self.encoding = encoding
         self.default_max_tokens_to_sample = default_max_tokens_to_sample
         self.completion_url = "https://api.anthropic.com/v1/complete"
         self.validate_environment(api_key=api_key)
@@ -33,9 +40,6 @@ class AnthropicLLM:
             raise ValueError("Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params")
         pass  
 
-    def _stream(self): # logic for handling streaming with the LLM API 
-        pass
-
     def completion(self, model: str, messages: list, model_response: dict, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls
         model = model
         prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}"
@@ -73,12 +77,13 @@ class AnthropicLLM:
             completion_response = response.json()
             print(f"completion_response: {completion_response}")
             if "error" in completion_response:
-                raise Exception(completion_response["error"])
+                raise AnthropicError(message=completion_response["error"], status_code=response.status_code)
             else:
                 model_response["choices"][0]["message"]["content"] = completion_response["completion"]    
+            
             ## CALCULATING USAGE
-            prompt_tokens = 0
-            completion_tokens = 0
+            prompt_tokens = len(self.encoding.encode(prompt)) ##[TODO] use the anthropic tokenizer here
+            completion_tokens = len(self.encoding.encode(model_response["choices"][0]["message"]["content"])) ##[TODO] use the anthropic tokenizer here
             
             
             model_response["created"] = time.time()
@@ -91,7 +96,4 @@ class AnthropicLLM:
             return model_response
     
     def embedding(): # logic for parsing in - calling - parsing out model embedding calls
-        pass 
-    
-    def stream(): # logic for how to parse in-out model completion streams
-        pass
\ No newline at end of file
+        pass 
\ No newline at end of file
diff --git a/litellm/llms/base.py b/litellm/llms/base.py
new file mode 100644
index 0000000000..368df96245
--- /dev/null
+++ b/litellm/llms/base.py
@@ -0,0 +1,11 @@
+## This is a template base class to be used for adding new LLM providers via API calls
+
+class BaseLLM():
+    def validate_environment(): # set up the environment required to run the model 
+        pass  
+
+    def completion(): # logic for parsing in - calling - parsing out model completion calls
+        pass
+
+    def embedding(): # logic for parsing in - calling - parsing out model embedding calls
+        pass 
\ No newline at end of file
diff --git a/litellm/main.py b/litellm/main.py
index bead3e1d86..9ee8beaefb 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -208,7 +208,7 @@ def completion(
       response = model_response
     elif model in litellm.anthropic_models:
       anthropic_key = api_key if api_key is not None else litellm.anthropic_key
-      anthropic_client = AnthropicLLM(default_max_tokens_to_sample=litellm.max_tokens, api_key=anthropic_key)
+      anthropic_client = AnthropicLLM(encoding=encoding, default_max_tokens_to_sample=litellm.max_tokens, api_key=anthropic_key)
       model_response = anthropic_client.completion(model=model, messages=messages, model_response=model_response, print_verbose=print_verbose, optional_params=optional_params, litellm_params=litellm_params, logger_fn=logger_fn)
       if 'stream' in optional_params and optional_params['stream'] == True:
         # don't try to access stream object,
diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py
index 16f7ae65d4..e224fdf7a6 100644
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@@ -21,7 +21,8 @@ litellm.failure_callback = ["sentry"]
 # Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered
 
 # models = ["gpt-3.5-turbo", "chatgpt-test",  "claude-instant-1", "command-nightly"]
-models = ["command-nightly"]
+test_model = "claude-instant-1"
+models = ["claude-instant-1"]
 def logging_fn(model_call_dict):
     if "model" in model_call_dict: 
         print(f"model_call_dict: {model_call_dict['model']}")
@@ -35,7 +36,7 @@ def test_context_window(model):
     sample_text = "how does a court case get to the Supreme Court?" * 5000
     messages = [{"content": sample_text, "role": "user"}]
     try:
-        azure = model == "chatgpt-test"
+        model = "chatgpt-test"
         print(f"model: {model}")
         response = completion(model=model, messages=messages, custom_llm_provider="azure", logger_fn=logging_fn)
         print(f"response: {response}")
@@ -51,7 +52,7 @@ def test_context_window(model):
         print(f"Uncaught Exception - {e}")
         pytest.fail(f"Error occurred: {e}")
     return
-test_context_window("command-nightly")
+test_context_window(test_model)
 
 # Test 2: InvalidAuth Errors
 @pytest.mark.parametrize("model", models)
@@ -101,7 +102,7 @@ def invalid_auth(model): # set the model key to an invalid key, depending on the
         elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
             os.environ["REPLICATE_API_KEY"] = temporary_key
     return
-invalid_auth("command-nightly")
+invalid_auth(test_model)
 # # Test 3: Rate Limit Errors 
 # def test_model(model):
 #     try: