diff --git a/dist/litellm-1.3.3.dev3-py3-none-any.whl b/dist/litellm-1.3.3.dev3-py3-none-any.whl
new file mode 100644
index 0000000000..9b5e80b660
Binary files /dev/null and b/dist/litellm-1.3.3.dev3-py3-none-any.whl differ
diff --git a/dist/litellm-1.3.3.dev3.tar.gz b/dist/litellm-1.3.3.dev3.tar.gz
new file mode 100644
index 0000000000..aac943fa37
Binary files /dev/null and b/dist/litellm-1.3.3.dev3.tar.gz differ
diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py
index 738eb9024d..6b1d50ff8b 100644
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@@ -114,7 +114,7 @@ def completion(
     logging_obj.pre_call(
         input=prompt,
         api_key=api_key,
-        additional_args={"complete_input_dict": data},
+        additional_args={"complete_input_dict": data, "api_base": api_base},
     )
     
     ## COMPLETION CALL
diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py
index da71bdd66b..2aac70d6e3 100644
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@@ -230,6 +230,30 @@ def hf_chat_template(model: str, messages: list):
         raise Exception("Error rendering template")
 
 # Anthropic template 
+def claude_2_1_pt(messages: list): # format - https://docs.anthropic.com/claude/docs/how-to-use-system-prompts
+    class AnthropicConstants(Enum):
+        HUMAN_PROMPT = "\n\nHuman: "
+        AI_PROMPT = "\n\nAssistant: "
+    
+    prompt = "" 
+    for idx, message in enumerate(messages): # needs to start with `\n\nHuman: ` and end with `\n\nAssistant: `
+        if message["role"] == "user":
+            prompt += (
+                f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
+            )
+        elif message["role"] == "system":
+            prompt += (
+                f"{message['content']}"
+            )
+        else:
+            prompt += (
+                f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
+            )
+        if idx == 0 and message["role"] == "assistant": # ensure the prompt always starts with `\n\nHuman: `
+            prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}" + prompt
+    prompt += f"{AnthropicConstants.AI_PROMPT.value}"
+    return prompt
+
 def anthropic_pt(messages: list): # format - https://docs.anthropic.com/claude/reference/complete_post
     class AnthropicConstants(Enum):
         HUMAN_PROMPT = "\n\nHuman: "
@@ -302,7 +326,10 @@ def prompt_factory(model: str, messages: list, custom_llm_provider: Optional[str
     if custom_llm_provider == "ollama": 
         return ollama_pt(model=model, messages=messages)
     elif custom_llm_provider == "anthropic":
-        return anthropic_pt(messages=messages)
+        if "claude-2.1" in model: 
+            return claude_2_1_pt(messages=messages)
+        else: 
+            return anthropic_pt(messages=messages)
     
     try:
         if "meta-llama/llama-2" in model and "chat" in model:
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 47cea9fd17..f84a86bee6 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -40,9 +40,10 @@ def test_completion_custom_provider_model_name():
 
 
 def test_completion_claude():
-    litellm.set_verbose = False
+    litellm.set_verbose = True
     litellm.cache = None
     litellm.AnthropicConfig(max_tokens_to_sample=200, metadata={"user_id": "1224"})
+    messages = [{"role": "system", "content": """You are an upbeat, enthusiastic personal fitness coach named Sam. Sam is passionate about helping clients get fit and lead healthier lifestyles. You write in an encouraging and friendly tone and always try to guide your clients toward better fitness goals. If the user asks you something unrelated to fitness, either bring the topic back to fitness, or say that you cannot answer."""},{"content": user_message, "role": "user"}]
     try:
         # test without max tokens
         response = completion(
@@ -53,11 +54,14 @@ def test_completion_claude():
         print(response.usage)
         print(response.usage.completion_tokens)
         print(response["usage"]["completion_tokens"])
+        response = completion(
+            model="claude-2.1", messages=messages, request_timeout=10,
+        )
         # print("new cost tracking")
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
-# test_completion_claude()
+test_completion_claude()
 
 def test_completion_claude2_1():
     try:
@@ -283,7 +287,7 @@ def hf_test_completion_tgi():
         print(response)
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
-hf_test_completion_tgi()
+# hf_test_completion_tgi()
 
 def hf_test_completion_tgi_stream():
     try:
@@ -473,7 +477,7 @@ def test_completion_openai_with_optional_params():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
-test_completion_openai_with_optional_params()
+# test_completion_openai_with_optional_params()
 
 def test_completion_openai_litellm_key():
     try: