diff --git a/docs/my-website/docs/providers/anthropic.md b/docs/my-website/docs/providers/anthropic.md
index cfa14cd325..198a6a03dc 100644
--- a/docs/my-website/docs/providers/anthropic.md
+++ b/docs/my-website/docs/providers/anthropic.md
@@ -1,9 +1,9 @@
 # Anthropic
 LiteLLM supports
 
+- `claude-3` (`claude-3-opus-20240229`, `claude-3-sonnet-20240229`)
 - `claude-2`
 - `claude-2.1`
-- `claude-instant-1`
 - `claude-instant-1.2`
 
 ## API Keys
@@ -24,11 +24,42 @@ from litellm import completion
 os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
 
 messages = [{"role": "user", "content": "Hey! how's it going?"}]
-response = completion(model="claude-instant-1", messages=messages)
+response = completion(model="claude-3-opus-20240229", messages=messages)
 print(response)
 ```
 
-## Usage - "Assistant Pre-fill"
+
+## Usage - Streaming
+Just set `stream=True` when calling completion.
+
+```python
+import os
+from litellm import completion
+
+# set env
+os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
+
+messages = [{"role": "user", "content": "Hey! how's it going?"}]
+response = completion(model="claude-3-opus-20240229", messages=messages, stream=True)
+for chunk in response:
+    print(chunk["choices"][0]["delta"]["content"])  # same as openai format
+```
+
+
+
+## Supported Models
+
+| Model Name       | Function Call                              |
+|------------------|--------------------------------------------|
+| claude-3-opus  | `completion('claude-3-opus-20240229', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
+| claude-3-sonnet  | `completion('claude-3-sonnet-20240229', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
+| claude-2.1  | `completion('claude-2.1', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
+| claude-2  | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
+| claude-instant-1.2  | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
+
+## Advanced
+
+### Usage - "Assistant Pre-fill"
 
 You can "put words in Claude's mouth" by including an `assistant` role message as the last item in the `messages` array.
 
@@ -50,7 +81,7 @@ response = completion(model="claude-2.1", messages=messages)
 print(response)
 ```
 
-### Example prompt sent to Claude
+#### Example prompt sent to Claude
 
 ```
 
@@ -61,7 +92,7 @@ Human: How do you say 'Hello' in German? Return your answer as a JSON object, li
 Assistant: {
 ```
 
-## Usage - "System" messages
+### Usage - "System" messages
 If you're using Anthropic's Claude 2.1 with Bedrock, `system` role messages are properly formatted for you.
 
 ```python
@@ -78,7 +109,7 @@ messages = [
 response = completion(model="claude-2.1", messages=messages)
 ```
 
-### Example prompt sent to Claude
+#### Example prompt sent to Claude
 
 ```
 You are a snarky assistant.
@@ -88,28 +119,3 @@ Human: How do I boil water?
 Assistant:
 ```
 
-## Streaming
-Just set `stream=True` when calling completion.
-
-```python
-import os
-from litellm import completion
-
-# set env
-os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
-
-messages = [{"role": "user", "content": "Hey! how's it going?"}]
-response = completion(model="claude-instant-1", messages=messages, stream=True)
-for chunk in response:
-    print(chunk["choices"][0]["delta"]["content"])  # same as openai format
-```
-
-
-### Model Details
-
-| Model Name       | Function Call                              | Required OS Variables                |
-|------------------|--------------------------------------------|--------------------------------------|
-| claude-2.1  | `completion('claude-2.1', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
-| claude-2  | `completion('claude-2', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
-| claude-instant-1  | `completion('claude-instant-1', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
-| claude-instant-1.2  | `completion('claude-instant-1.2', messages)` | `os.environ['ANTHROPIC_API_KEY']`       |
diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py
index 150ae0e076..44a1b128a9 100644
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@@ -20,7 +20,7 @@ class AnthropicError(Exception):
         self.status_code = status_code
         self.message = message
         self.request = httpx.Request(
-            method="POST", url="https://api.anthropic.com/v1/complete"
+            method="POST", url="https://api.anthropic.com/v1/messages"
         )
         self.response = httpx.Response(status_code=status_code, request=self.request)
         super().__init__(
@@ -35,9 +35,7 @@ class AnthropicConfig:
     to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
     """
 
-    max_tokens_to_sample: Optional[
-        int
-    ] = litellm.max_tokens  # anthropic requires a default
+    max_tokens: Optional[int] = litellm.max_tokens  # anthropic requires a default
     stop_sequences: Optional[list] = None
     temperature: Optional[int] = None
     top_p: Optional[int] = None
@@ -46,7 +44,7 @@ class AnthropicConfig:
 
     def __init__(
         self,
-        max_tokens_to_sample: Optional[int] = 256,  # anthropic requires a default
+        max_tokens: Optional[int] = 256,  # anthropic requires a default
         stop_sequences: Optional[list] = None,
         temperature: Optional[int] = None,
         top_p: Optional[int] = None,
@@ -123,6 +121,35 @@ def completion(
         prompt = prompt_factory(
             model=model, messages=messages, custom_llm_provider="anthropic"
         )
+    """
+    format messages for anthropic
+    1. Anthropic supports roles like "user" and "assistant", (here litellm translates system-> assistant)
+    2. The first message always needs to be of role "user"
+    3. Each message must alternate between "user" and "assistant" (this is not addressed as now by litellm)
+    4. final assistant content cannot end with trailing whitespace (anthropic raises an error otherwise)
+    """
+    # 1. Anthropic only supports roles like "user" and "assistant"
+    for idx, message in enumerate(messages):
+        if message["role"] == "system":
+            message["role"] = "assistant"
+
+        # if this is the final assistant message, remove trailing whitespace
+        # TODO: only do this if it's the final assistant message
+        if message["role"] == "assistant":
+            message["content"] = message["content"].strip()
+
+    # 2. The first message always needs to be of role "user"
+    if len(messages) > 0:
+        if messages[0]["role"] != "user":
+            # find the index of the first user message
+            for i, message in enumerate(messages):
+                if message["role"] == "user":
+                    break
+
+            # remove the user message at existing position and add it to the front
+            messages.pop(i)
+            # move the first user message to the front
+            messages = [message] + messages
 
     ## Load Config
     config = litellm.AnthropicConfig.get_config()
@@ -134,7 +161,7 @@ def completion(
 
     data = {
         "model": model,
-        "prompt": prompt,
+        "messages": messages,
         **optional_params,
     }
 
@@ -173,7 +200,7 @@ def completion(
 
         ## LOGGING
         logging_obj.post_call(
-            input=prompt,
+            input=messages,
             api_key=api_key,
             original_response=response.text,
             additional_args={"complete_input_dict": data},
@@ -191,20 +218,20 @@ def completion(
                 message=str(completion_response["error"]),
                 status_code=response.status_code,
             )
+        elif len(completion_response["content"]) == 0:
+            raise AnthropicError(
+                message="No content in response",
+                status_code=response.status_code,
+            )
         else:
-            if len(completion_response["completion"]) > 0:
-                model_response["choices"][0]["message"][
-                    "content"
-                ] = completion_response["completion"]
+            text_content = completion_response["content"][0].get("text", None)
+            model_response.choices[0].message.content = text_content  # type: ignore
             model_response.choices[0].finish_reason = completion_response["stop_reason"]
 
         ## CALCULATING USAGE
-        prompt_tokens = len(
-            encoding.encode(prompt)
-        )  ##[TODO] use the anthropic tokenizer here
-        completion_tokens = len(
-            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
-        )  ##[TODO] use the anthropic tokenizer here
+        prompt_tokens = completion_response["usage"]["input_tokens"]
+        completion_tokens = completion_response["usage"]["output_tokens"]
+        total_tokens = prompt_tokens + completion_tokens
 
         model_response["created"] = int(time.time())
         model_response["model"] = model
diff --git a/litellm/main.py b/litellm/main.py
index 67586603d9..9fbd1b8283 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -1023,7 +1023,7 @@ def completion(
                 api_base
                 or litellm.api_base
                 or get_secret("ANTHROPIC_API_BASE")
-                or "https://api.anthropic.com/v1/complete"
+                or "https://api.anthropic.com/v1/messages"
             )
             custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
             response = anthropic.completion(
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index d97c042c5a..b29e152294 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -643,6 +643,22 @@
         "litellm_provider": "anthropic",
         "mode": "chat"
     },
+    "claude-3-opus-20240229": {
+        "max_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000075,
+        "litellm_provider": "anthropic",
+        "mode": "chat"
+    },
+    "claude-3-sonnet-20240229": {
+        "max_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000015,
+        "litellm_provider": "anthropic",
+        "mode": "chat"
+    },
     "text-bison": {
         "max_tokens": 8192,
         "input_cost_per_token": 0.000000125,
diff --git a/litellm/tests/test_add_function_to_prompt.py b/litellm/tests/test_add_function_to_prompt.py
index 932e6edd14..93b09cd8c8 100644
--- a/litellm/tests/test_add_function_to_prompt.py
+++ b/litellm/tests/test_add_function_to_prompt.py
@@ -47,8 +47,9 @@ test_function_call_non_openai_model()
 ## case 2: add_function_to_prompt set
 def test_function_call_non_openai_model_litellm_mod_set():
     litellm.add_function_to_prompt = True
+    litellm.set_verbose = True
     try:
-        model = "claude-instant-1"
+        model = "claude-instant-1.2"
         messages = [{"role": "user", "content": "what's the weather in sf?"}]
         functions = [
             {
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index f6be6e9528..a9d41be8d1 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -56,7 +56,7 @@ def test_completion_custom_provider_model_name():
 def test_completion_claude():
     litellm.set_verbose = True
     litellm.cache = None
-    litellm.AnthropicConfig(max_tokens_to_sample=200, metadata={"user_id": "1224"})
+    litellm.AnthropicConfig(max_tokens=200, metadata={"user_id": "1224"})
     messages = [
         {
             "role": "system",
@@ -67,7 +67,7 @@ def test_completion_claude():
     try:
         # test without max tokens
         response = completion(
-            model="claude-instant-1",
+            model="claude-instant-1.2",
             messages=messages,
             request_timeout=10,
         )
@@ -84,6 +84,40 @@ def test_completion_claude():
 # test_completion_claude()
 
 
+def test_completion_claude_3():
+    litellm.set_verbose = True
+    messages = [{"role": "user", "content": "Hello, world"}]
+    try:
+        # test without max tokens
+        response = completion(
+            model="anthropic/claude-3-opus-20240229",
+            messages=messages,
+        )
+        # Add any assertions, here to check response args
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
+def test_completion_claude_3_stream():
+    litellm.set_verbose = False
+    messages = [{"role": "user", "content": "Hello, world"}]
+    try:
+        # test without max tokens
+        response = completion(
+            model="anthropic/claude-3-opus-20240229",
+            messages=messages,
+            max_tokens=10,
+            stream=True,
+        )
+        # Add any assertions, here to check response args
+        print(response)
+        for chunk in response:
+            print(chunk)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
 def test_completion_mistral_api():
     try:
         litellm.set_verbose = True
@@ -163,19 +197,17 @@ def test_completion_mistral_api_modified_input():
 
 def test_completion_claude2_1():
     try:
+        litellm.set_verbose = True
         print("claude2.1 test request")
         messages = [
             {
                 "role": "system",
-                "content": "Your goal is generate a joke on the topic user gives",
+                "content": "Your goal is generate a joke on the topic user gives.",
             },
-            {"role": "assistant", "content": "Hi, how can i assist you today?"},
             {"role": "user", "content": "Generate a 3 liner joke for me"},
         ]
         # test without max tokens
-        response = completion(
-            model="claude-2.1", messages=messages, request_timeout=10, max_tokens=10
-        )
+        response = completion(model="claude-2.1", messages=messages)
         # Add any assertions here to check the response
         print(response)
         print(response.usage)
diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py
index 9c90014c0d..311bbfa571 100644
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@@ -70,7 +70,7 @@ models = ["command-nightly"]
 @pytest.mark.parametrize("model", models)
 def test_context_window_with_fallbacks(model):
     ctx_window_fallback_dict = {
-        "command-nightly": "claude-2",
+        "command-nightly": "claude-2.1",
         "gpt-3.5-turbo-instruct": "gpt-3.5-turbo-16k",
         "azure/chatgpt-v-2": "gpt-3.5-turbo-16k",
     }
diff --git a/litellm/tests/test_provider_specific_config.py b/litellm/tests/test_provider_specific_config.py
index 5e5d19c786..dcb4dcb4c7 100644
--- a/litellm/tests/test_provider_specific_config.py
+++ b/litellm/tests/test_provider_specific_config.py
@@ -53,7 +53,7 @@ def claude_test_completion():
     try:
         # OVERRIDE WITH DYNAMIC MAX TOKENS
         response_1 = litellm.completion(
-            model="claude-instant-1",
+            model="claude-instant-1.2",
             messages=[{"content": "Hello, how are you?", "role": "user"}],
             max_tokens=10,
         )
@@ -63,7 +63,7 @@ def claude_test_completion():
 
         # USE CONFIG TOKENS
         response_2 = litellm.completion(
-            model="claude-instant-1",
+            model="claude-instant-1.2",
             messages=[{"content": "Hello, how are you?", "role": "user"}],
         )
         # Add any assertions here to check the response
@@ -74,7 +74,7 @@ def claude_test_completion():
 
         try:
             response_3 = litellm.completion(
-                model="claude-instant-1",
+                model="claude-instant-1.2",
                 messages=[{"content": "Hello, how are you?", "role": "user"}],
                 n=2,
             )
diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py
index 127caf223b..7c182ee686 100644
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@@ -933,7 +933,7 @@ def test_router_anthropic_key_dynamic():
         {
             "model_name": "anthropic-claude",
             "litellm_params": {
-                "model": "claude-instant-1",
+                "model": "claude-instant-1.2",
                 "api_key": anthropic_api_key,
             },
         }
diff --git a/litellm/tests/test_router_timeout.py b/litellm/tests/test_router_timeout.py
index 139914f6df..3816c649e9 100644
--- a/litellm/tests/test_router_timeout.py
+++ b/litellm/tests/test_router_timeout.py
@@ -35,7 +35,7 @@ def test_router_timeouts():
         {
             "model_name": "anthropic-claude-instant-1.2",
             "litellm_params": {
-                "model": "claude-instant-1",
+                "model": "claude-instant-1.2",
                 "api_key": "os.environ/ANTHROPIC_API_KEY",
             },
             "tpm": 20000,
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index 083953f574..5767a944b2 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -348,7 +348,7 @@ def test_completion_claude_stream():
             },
         ]
         response = completion(
-            model="claude-instant-1", messages=messages, stream=True, max_tokens=50
+            model="claude-instant-1.2", messages=messages, stream=True, max_tokens=50
         )
         complete_response = ""
         # Add any assertions here to check the response
diff --git a/litellm/tests/test_text_completion.py b/litellm/tests/test_text_completion.py
index 0b98111839..a16b864494 100644
--- a/litellm/tests/test_text_completion.py
+++ b/litellm/tests/test_text_completion.py
@@ -2836,6 +2836,8 @@ def test_completion_hf_prompt_array():
         print(str(e))
         if "is currently loading" in str(e):
             return
+        if "Service Unavailable" in str(e):
+            return
         pytest.fail(f"Error occurred: {e}")
 
 
diff --git a/litellm/utils.py b/litellm/utils.py
index b590d9c688..53e6e82450 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -4200,7 +4200,7 @@ def get_optional_params(
         if top_p is not None:
             optional_params["top_p"] = top_p
         if max_tokens is not None:
-            optional_params["max_tokens_to_sample"] = max_tokens
+            optional_params["max_tokens"] = max_tokens
     elif custom_llm_provider == "cohere":
         ## check if unsupported param passed in
         supported_params = [
@@ -8032,10 +8032,21 @@ class CustomStreamWrapper:
         finish_reason = None
         if str_line.startswith("data:"):
             data_json = json.loads(str_line[5:])
-            text = data_json.get("completion", "")
-            if data_json.get("stop_reason", None):
+            type_chunk = data_json.get("type", None)
+            if type_chunk == "content_block_delta":
+                """
+                Anthropic content chunk
+                chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}}
+                """
+                text = data_json.get("delta", {}).get("text", "")
+            elif type_chunk == "message_delta":
+                """
+                Anthropic
+                chunk = {'type': 'message_delta', 'delta': {'stop_reason': 'max_tokens', 'stop_sequence': None}, 'usage': {'output_tokens': 10}}
+                """
+                # TODO - get usage from this chunk, set in response
+                finish_reason = data_json.get("delta", {}).get("stop_reason", None)
                 is_finished = True
-                finish_reason = data_json["stop_reason"]
             return {
                 "text": text,
                 "is_finished": is_finished,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 1c79f7d2a6..23afaf04d6 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -643,6 +643,22 @@
         "litellm_provider": "anthropic",
         "mode": "chat"
     },
+    "claude-3-opus-20240229": {
+        "max_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000075,
+        "litellm_provider": "anthropic",
+        "mode": "chat"
+    },
+    "claude-3-sonnet-20240229": {
+        "max_tokens": 200000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000015,
+        "litellm_provider": "anthropic",
+        "mode": "chat"
+    },
     "text-bison": {
         "max_tokens": 8192,
         "input_cost_per_token": 0.000000125,