diff --git a/litellm/__init__.py b/litellm/__init__.py
index 9bb9a81cd..5eea6346c 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -357,6 +357,7 @@ vertex_text_models: List = []
 vertex_code_text_models: List = []
 vertex_embedding_models: List = []
 vertex_anthropic_models: List = []
+vertex_llama3_models: List = []
 ai21_models: List = []
 nlp_cloud_models: List = []
 aleph_alpha_models: List = []
@@ -828,6 +829,7 @@ from .llms.petals import PetalsConfig
 from .llms.vertex_httpx import VertexGeminiConfig, GoogleAIStudioGeminiConfig
 from .llms.vertex_ai import VertexAIConfig, VertexAITextEmbeddingConfig
 from .llms.vertex_ai_anthropic import VertexAIAnthropicConfig
+from .llms.vertex_ai_llama import VertexAILlama3Config
 from .llms.sagemaker import SagemakerConfig
 from .llms.ollama import OllamaConfig
 from .llms.ollama_chat import OllamaChatConfig
diff --git a/litellm/llms/vertex_ai_llama.py b/litellm/llms/vertex_ai_llama.py
index 4b5407faa..f33c127f7 100644
--- a/litellm/llms/vertex_ai_llama.py
+++ b/litellm/llms/vertex_ai_llama.py
@@ -53,39 +53,20 @@ class VertexAIError(Exception):
 
 class VertexAILlama3Config:
     """
-    Reference:https://docs.anthropic.com/claude/reference/messages_post
+    Reference:https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/llama#streaming
 
-    Note that the API for Claude on Vertex differs from the Anthropic API documentation in the following ways:
-
-    - `model` is not a valid parameter. The model is instead specified in the Google Cloud endpoint URL.
-    - `anthropic_version` is a required parameter and must be set to "vertex-2023-10-16".
-
-    The class `VertexAIAnthropicConfig` provides configuration for the VertexAI's Anthropic API interface. Below are the parameters:
+    The class `VertexAILlama3Config` provides configuration for the VertexAI's Llama API interface. Below are the parameters:
 
     - `max_tokens` Required (integer) max tokens,
-    - `anthropic_version` Required (string) version of anthropic for bedrock - e.g. "bedrock-2023-05-31"
-    - `system` Optional (string) the system prompt, conversion from openai format to this is handled in factory.py
-    - `temperature` Optional (float) The amount of randomness injected into the response
-    - `top_p` Optional (float) Use nucleus sampling.
-    - `top_k` Optional (int) Only sample from the top K options for each subsequent token
-    - `stop_sequences` Optional (List[str]) Custom text sequences that cause the model to stop generating
 
     Note: Please make sure to modify the default parameters as required for your use case.
     """
 
-    max_tokens: Optional[int] = (
-        4096  # anthropic max - setting this doesn't impact response, but is required by anthropic.
-    )
-    system: Optional[str] = None
-    temperature: Optional[float] = None
-    top_p: Optional[float] = None
-    top_k: Optional[int] = None
-    stop_sequences: Optional[List[str]] = None
+    max_tokens: Optional[int] = None
 
     def __init__(
         self,
         max_tokens: Optional[int] = None,
-        anthropic_version: Optional[str] = None,
     ) -> None:
         locals_ = locals()
         for key, value in locals_.items():
@@ -115,61 +96,13 @@ class VertexAILlama3Config:
     def get_supported_openai_params(self):
         return [
             "max_tokens",
-            "tools",
-            "tool_choice",
             "stream",
-            "stop",
-            "temperature",
-            "top_p",
-            "response_format",
         ]
 
     def map_openai_params(self, non_default_params: dict, optional_params: dict):
         for param, value in non_default_params.items():
             if param == "max_tokens":
                 optional_params["max_tokens"] = value
-            if param == "tools":
-                optional_params["tools"] = value
-            if param == "tool_choice":
-                _tool_choice: Optional[AnthropicMessagesToolChoice] = None
-                if value == "auto":
-                    _tool_choice = {"type": "auto"}
-                elif value == "required":
-                    _tool_choice = {"type": "any"}
-                elif isinstance(value, dict):
-                    _tool_choice = {"type": "tool", "name": value["function"]["name"]}
-
-                if _tool_choice is not None:
-                    optional_params["tool_choice"] = _tool_choice
-            if param == "stream":
-                optional_params["stream"] = value
-            if param == "stop":
-                optional_params["stop_sequences"] = value
-            if param == "temperature":
-                optional_params["temperature"] = value
-            if param == "top_p":
-                optional_params["top_p"] = value
-            if param == "response_format" and "response_schema" in value:
-                """
-                When using tools in this way: - https://docs.anthropic.com/en/docs/build-with-claude/tool-use#json-mode
-                - You usually want to provide a single tool
-                - You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool
-                - Remember that the model will pass the input to the tool, so the name of the tool and description should be from the model’s perspective.
-                """
-                _tool_choice = None
-                _tool_choice = {"name": "json_tool_call", "type": "tool"}
-
-                _tool = AnthropicMessagesTool(
-                    name="json_tool_call",
-                    input_schema={
-                        "type": "object",
-                        "properties": {"values": value["response_schema"]},  # type: ignore
-                    },
-                )
-
-                optional_params["tools"] = [_tool]
-                optional_params["tool_choice"] = _tool_choice
-                optional_params["json_mode"] = True
 
         return optional_params
 
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index f86ea8bd7..e9e599945 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -1948,6 +1948,16 @@
         "supports_function_calling": true,
         "supports_vision": true
     },
+    "vertex_ai/meta/llama3-405b-instruct-maas": {
+        "max_tokens": 32000,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 32000,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "vertex_ai-llama_models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models"
+    },
     "vertex_ai/imagegeneration@006": {
         "cost_per_image": 0.020,
         "litellm_provider": "vertex_ai-image-models",
diff --git a/litellm/tests/test_optional_params.py b/litellm/tests/test_optional_params.py
index bbfc88710..b8011960e 100644
--- a/litellm/tests/test_optional_params.py
+++ b/litellm/tests/test_optional_params.py
@@ -128,6 +128,19 @@ def test_azure_ai_mistral_optional_params():
     assert "user" not in optional_params
 
 
+def test_vertex_ai_llama_3_optional_params():
+    litellm.vertex_llama3_models = ["meta/llama3-405b-instruct-maas"]
+    litellm.drop_params = True
+    optional_params = get_optional_params(
+        model="meta/llama3-405b-instruct-maas",
+        user="John",
+        custom_llm_provider="vertex_ai",
+        max_tokens=10,
+        temperature=0.2,
+    )
+    assert "user" not in optional_params
+
+
 def test_azure_gpt_optional_params_gpt_vision():
     # for OpenAI, Azure all extra params need to get passed as extra_body to OpenAI python. We assert we actually set extra_body here
     optional_params = litellm.utils.get_optional_params(
diff --git a/litellm/utils.py b/litellm/utils.py
index 8baced4c5..035c1c72f 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -3088,6 +3088,15 @@ def get_optional_params(
             non_default_params=non_default_params,
             optional_params=optional_params,
         )
+    elif custom_llm_provider == "vertex_ai" and model in litellm.vertex_llama3_models:
+        supported_params = get_supported_openai_params(
+            model=model, custom_llm_provider=custom_llm_provider
+        )
+        _check_valid_arg(supported_params=supported_params)
+        optional_params = litellm.VertexAILlama3Config().map_openai_params(
+            non_default_params=non_default_params,
+            optional_params=optional_params,
+        )
     elif custom_llm_provider == "sagemaker":
         ## check if unsupported param passed in
         supported_params = get_supported_openai_params(
@@ -4189,6 +4198,9 @@ def get_supported_openai_params(
         return litellm.GoogleAIStudioGeminiConfig().get_supported_openai_params()
     elif custom_llm_provider == "vertex_ai":
         if request_type == "chat_completion":
+            if model.startswith("meta/"):
+                return litellm.VertexAILlama3Config().get_supported_openai_params()
+
             return litellm.VertexAIConfig().get_supported_openai_params()
         elif request_type == "embeddings":
             return litellm.VertexAITextEmbeddingConfig().get_supported_openai_params()
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index f86ea8bd7..e9e599945 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -1948,6 +1948,16 @@
         "supports_function_calling": true,
         "supports_vision": true
     },
+    "vertex_ai/meta/llama3-405b-instruct-maas": {
+        "max_tokens": 32000,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 32000,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "vertex_ai-llama_models",
+        "mode": "chat",
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models"
+    },
     "vertex_ai/imagegeneration@006": {
         "cost_per_image": 0.020,
         "litellm_provider": "vertex_ai-image-models",