diff --git a/.circleci/config.yml b/.circleci/config.yml
index e301bd96e..acd3a8058 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -230,6 +230,34 @@ jobs:
       # Store test results
       - store_test_results:
           path: test-results
+  llm_translation_testing:
+    docker:
+      - image: cimg/python:3.11
+    working_directory: ~/project
+
+    steps:
+      - checkout
+      - run:
+          name: Install Dependencies
+          command: |
+            python -m pip install --upgrade pip
+            python -m pip install -r requirements.txt
+            pip install "pytest==7.3.1"
+            pip install "pytest-retry==1.6.3"
+            pip install "pytest-asyncio==0.21.1"
+            pip install "respx==0.21.1"
+      # Run pytest and generate JUnit XML report
+      - run:
+          name: Run tests
+          command: |
+            pwd
+            ls
+            python -m pytest -vv tests/llm_translation -x -s -v --junitxml=test-results/junit.xml --durations=5
+          no_output_timeout: 120m
+
+      # Store test results
+      - store_test_results:
+          path: test-results
 
   installing_litellm_on_python:
     docker:
@@ -370,7 +398,7 @@ jobs:
           command: |
             pwd
             ls
-            python -m pytest -s -vv tests/ -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests
+            python -m pytest -s -vv tests/ -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation
           no_output_timeout: 120m
 
       # Store test results
@@ -694,6 +722,12 @@ workflows:
               only:
                 - main
                 - /litellm_.*/
+      - llm_translation_testing:
+          filters:
+            branches:
+              only:
+                - main
+                - /litellm_.*/
       - installing_litellm_on_python:
           filters:
             branches:
@@ -711,6 +745,7 @@ workflows:
             - local_testing
             - build_and_test
             - load_testing
+            - llm_translation_testing
             - litellm_router_testing
             - litellm_assistants_api_testing
             - ui_endpoint_testing
diff --git a/litellm/llms/AI21/chat.py b/litellm/llms/AI21/chat.py
index 4eabaaa87..7a60b1904 100644
--- a/litellm/llms/AI21/chat.py
+++ b/litellm/llms/AI21/chat.py
@@ -75,6 +75,7 @@ class AI21ChatConfig:
             "tools",
             "response_format",
             "max_tokens",
+            "max_completion_tokens",
             "temperature",
             "top_p",
             "stop",
@@ -90,6 +91,8 @@ class AI21ChatConfig:
     ) -> dict:
         supported_openai_params = self.get_supported_openai_params(model=model)
         for param, value in non_default_params.items():
-            if param in supported_openai_params:
+            if param == "max_completion_tokens":
+                optional_params["max_tokens"] = value
+            elif param in supported_openai_params:
                 optional_params[param] = value
         return optional_params
diff --git a/litellm/llms/AzureOpenAI/azure.py b/litellm/llms/AzureOpenAI/azure.py
index 8da9ee063..b1a9e1549 100644
--- a/litellm/llms/AzureOpenAI/azure.py
+++ b/litellm/llms/AzureOpenAI/azure.py
@@ -156,6 +156,7 @@ class AzureOpenAIConfig:
             "stream",
             "stop",
             "max_tokens",
+            "max_completion_tokens",
             "tools",
             "tool_choice",
             "presence_penalty",
@@ -268,6 +269,9 @@ class AzureOpenAIConfig:
                     optional_params["json_mode"] = True
                 else:
                     optional_params["response_format"] = value
+            elif param == "max_completion_tokens":
+                # TODO - Azure OpenAI will probably add support for this, we should pass it through when Azure adds support
+                optional_params["max_tokens"] = value
             elif param in supported_openai_params:
                 optional_params[param] = value
 
diff --git a/litellm/llms/OpenAI/gpt_transformation.py b/litellm/llms/OpenAI/gpt_transformation.py
index be14031bd..4ff4790c9 100644
--- a/litellm/llms/OpenAI/gpt_transformation.py
+++ b/litellm/llms/OpenAI/gpt_transformation.py
@@ -92,6 +92,7 @@ class OpenAIGPTConfig:
             "logprobs",
             "top_logprobs",
             "max_tokens",
+            "max_completion_tokens",
             "n",
             "presence_penalty",
             "seed",
diff --git a/litellm/llms/OpenAI/openai.py b/litellm/llms/OpenAI/openai.py
index 8504d5fe2..b37af10bf 100644
--- a/litellm/llms/OpenAI/openai.py
+++ b/litellm/llms/OpenAI/openai.py
@@ -190,6 +190,7 @@ class DeepInfraConfig:
             "functions",
             "logit_bias",
             "max_tokens",
+            "max_completion_tokens",
             "n",
             "presence_penalty",
             "stop",
@@ -229,7 +230,9 @@ class DeepInfraConfig:
                             ),
                             status_code=400,
                         )
-            if param in supported_openai_params:
+            elif param == "max_completion_tokens":
+                optional_params["max_tokens"] = value
+            elif param in supported_openai_params:
                 if value is not None:
                     optional_params[param] = value
         return optional_params
@@ -347,7 +350,9 @@ class OpenAIConfig:
 
     - `logit_bias` (map): This optional parameter modifies the likelihood of specified tokens appearing in the completion.
 
-    - `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion.
+    - `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion. OpenAI has now deprecated in favor of max_completion_tokens, and is not compatible with o1 series models.
+
+    - `max_completion_tokens` (integer or null): An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
 
     - `n` (integer or null): This optional parameter helps to set how many chat completion choices to generate for each input message.
 
@@ -364,6 +369,7 @@ class OpenAIConfig:
     function_call: Optional[Union[str, dict]] = None
     functions: Optional[list] = None
     logit_bias: Optional[dict] = None
+    max_completion_tokens: Optional[int] = None
     max_tokens: Optional[int] = None
     n: Optional[int] = None
     presence_penalty: Optional[int] = None
@@ -378,6 +384,7 @@ class OpenAIConfig:
         function_call: Optional[Union[str, dict]] = None,
         functions: Optional[list] = None,
         logit_bias: Optional[dict] = None,
+        max_completion_tokens: Optional[int] = None,
         max_tokens: Optional[int] = None,
         n: Optional[int] = None,
         presence_penalty: Optional[int] = None,
diff --git a/litellm/llms/anthropic/chat.py b/litellm/llms/anthropic/chat.py
index 0520506bb..cf4f23905 100644
--- a/litellm/llms/anthropic/chat.py
+++ b/litellm/llms/anthropic/chat.py
@@ -158,6 +158,7 @@ class AnthropicConfig:
             "temperature",
             "top_p",
             "max_tokens",
+            "max_completion_tokens",
             "tools",
             "tool_choice",
             "extra_headers",
@@ -173,6 +174,8 @@ class AnthropicConfig:
         for param, value in non_default_params.items():
             if param == "max_tokens":
                 optional_params["max_tokens"] = value
+            if param == "max_completion_tokens":
+                optional_params["max_tokens"] = value
             if param == "tools":
                 optional_params["tools"] = value
             if param == "tool_choice":
diff --git a/litellm/llms/azure_text.py b/litellm/llms/azure_text.py
index db8c516b2..6defd58ff 100644
--- a/litellm/llms/azure_text.py
+++ b/litellm/llms/azure_text.py
@@ -94,16 +94,16 @@ class AzureOpenAIConfig(OpenAIConfig):
         top_p: Optional[int] = None,
     ) -> None:
         super().__init__(
-            frequency_penalty,
-            function_call,
-            functions,
-            logit_bias,
-            max_tokens,
-            n,
-            presence_penalty,
-            stop,
-            temperature,
-            top_p,
+            frequency_penalty=frequency_penalty,
+            function_call=function_call,
+            functions=functions,
+            logit_bias=logit_bias,
+            max_tokens=max_tokens,
+            n=n,
+            presence_penalty=presence_penalty,
+            stop=stop,
+            temperature=temperature,
+            top_p=top_p,
         )
 
 
diff --git a/litellm/llms/bedrock/chat.py b/litellm/llms/bedrock/chat.py
index 8d6d98ba6..35f0c794a 100644
--- a/litellm/llms/bedrock/chat.py
+++ b/litellm/llms/bedrock/chat.py
@@ -161,6 +161,7 @@ class AmazonCohereChatConfig:
     def get_supported_openai_params(self) -> List[str]:
         return [
             "max_tokens",
+            "max_completion_tokens",
             "stream",
             "stop",
             "temperature",
@@ -177,7 +178,7 @@ class AmazonCohereChatConfig:
         self, non_default_params: dict, optional_params: dict
     ) -> dict:
         for param, value in non_default_params.items():
-            if param == "max_tokens":
+            if param == "max_tokens" or param == "max_completion_tokens":
                 optional_params["max_tokens"] = value
             if param == "stream":
                 optional_params["stream"] = value
@@ -1156,6 +1157,7 @@ class AmazonConverseConfig:
     def get_supported_openai_params(self, model: str) -> List[str]:
         supported_params = [
             "max_tokens",
+            "max_completion_tokens",
             "stream",
             "stream_options",
             "stop",
@@ -1263,7 +1265,7 @@ class AmazonConverseConfig:
                             ),
                             status_code=400,
                         )
-            if param == "max_tokens":
+            if param == "max_tokens" or param == "max_completion_tokens":
                 optional_params["maxTokens"] = value
             if param == "stream":
                 optional_params["stream"] = value
diff --git a/litellm/llms/bedrock/common_utils.py b/litellm/llms/bedrock/common_utils.py
index 25379474e..86fced96a 100644
--- a/litellm/llms/bedrock/common_utils.py
+++ b/litellm/llms/bedrock/common_utils.py
@@ -5,7 +5,7 @@ Common utilities used across bedrock chat/embedding/image generation
 import os
 import types
 from enum import Enum
-from typing import List, Optional, Union, Tuple
+from typing import List, Optional, Tuple, Union
 
 import httpx
 
@@ -158,6 +158,7 @@ class AmazonAnthropicClaude3Config:
     def get_supported_openai_params(self):
         return [
             "max_tokens",
+            "max_completion_tokens",
             "tools",
             "tool_choice",
             "stream",
@@ -169,7 +170,7 @@ class AmazonAnthropicClaude3Config:
 
     def map_openai_params(self, non_default_params: dict, optional_params: dict):
         for param, value in non_default_params.items():
-            if param == "max_tokens":
+            if param == "max_tokens" or param == "max_completion_tokens":
                 optional_params["max_tokens"] = value
             if param == "tools":
                 optional_params["tools"] = value
@@ -240,11 +241,18 @@ class AmazonAnthropicConfig:
     def get_supported_openai_params(
         self,
     ):
-        return ["max_tokens", "temperature", "stop", "top_p", "stream"]
+        return [
+            "max_tokens",
+            "max_completion_tokens",
+            "temperature",
+            "stop",
+            "top_p",
+            "stream",
+        ]
 
     def map_openai_params(self, non_default_params: dict, optional_params: dict):
         for param, value in non_default_params.items():
-            if param == "max_tokens":
+            if param == "max_tokens" or param == "max_completion_tokens":
                 optional_params["max_tokens_to_sample"] = value
             if param == "temperature":
                 optional_params["temperature"] = value
diff --git a/litellm/llms/cerebras/chat.py b/litellm/llms/cerebras/chat.py
index 13b8f0ee9..0b885a599 100644
--- a/litellm/llms/cerebras/chat.py
+++ b/litellm/llms/cerebras/chat.py
@@ -70,6 +70,7 @@ class CerebrasConfig:
 
         return [
             "max_tokens",
+            "max_completion_tokens",
             "response_format",
             "seed",
             "stop",
@@ -86,6 +87,8 @@ class CerebrasConfig:
     ) -> dict:
         supported_openai_params = self.get_supported_openai_params(model=model)
         for param, value in non_default_params.items():
-            if param in supported_openai_params:
+            if param == "max_completion_tokens":
+                optional_params["max_tokens"] = value
+            elif param in supported_openai_params:
                 optional_params[param] = value
         return optional_params
diff --git a/litellm/llms/databricks/chat.py b/litellm/llms/databricks/chat.py
index 0421cd9e4..739abb91f 100644
--- a/litellm/llms/databricks/chat.py
+++ b/litellm/llms/databricks/chat.py
@@ -106,11 +106,19 @@ class DatabricksConfig:
         ]
 
     def get_supported_openai_params(self):
-        return ["stream", "stop", "temperature", "top_p", "max_tokens", "n"]
+        return [
+            "stream",
+            "stop",
+            "temperature",
+            "top_p",
+            "max_tokens",
+            "max_completion_tokens",
+            "n",
+        ]
 
     def map_openai_params(self, non_default_params: dict, optional_params: dict):
         for param, value in non_default_params.items():
-            if param == "max_tokens":
+            if param == "max_tokens" or param == "max_completion_tokens":
                 optional_params["max_tokens"] = value
             if param == "n":
                 optional_params["n"] = value
diff --git a/litellm/llms/fireworks_ai.py b/litellm/llms/fireworks_ai.py
index e9caf887a..b6511689e 100644
--- a/litellm/llms/fireworks_ai.py
+++ b/litellm/llms/fireworks_ai.py
@@ -73,6 +73,7 @@ class FireworksAIConfig:
             "stream",
             "tools",
             "tool_choice",
+            "max_completion_tokens",
             "max_tokens",
             "temperature",
             "top_p",
@@ -102,6 +103,8 @@ class FireworksAIConfig:
                 else:
                     # pass through the value of tool choice
                     optional_params["tool_choice"] = value
+            elif param == "max_completion_tokens":
+                optional_params["max_tokens"] = value
             elif param in supported_openai_params:
                 if value is not None:
                     optional_params[param] = value
diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py
index 69715167b..fe44ea0d3 100644
--- a/litellm/llms/huggingface_restapi.py
+++ b/litellm/llms/huggingface_restapi.py
@@ -139,6 +139,7 @@ class HuggingfaceConfig:
             "stream",
             "temperature",
             "max_tokens",
+            "max_completion_tokens",
             "top_p",
             "stop",
             "n",
@@ -167,7 +168,7 @@ class HuggingfaceConfig:
                 optional_params["stream"] = value
             if param == "stop":
                 optional_params["stop"] = value
-            if param == "max_tokens":
+            if param == "max_tokens" or param == "max_completion_tokens":
                 # HF TGI raises the following exception when max_new_tokens==0
                 # Failed: Error occurred: HuggingfaceException - Input validation error: `max_new_tokens` must be strictly positive
                 if value == 0:
diff --git a/litellm/llms/nvidia_nim.py b/litellm/llms/nvidia_nim.py
index 6d2e4316b..99c88345e 100644
--- a/litellm/llms/nvidia_nim.py
+++ b/litellm/llms/nvidia_nim.py
@@ -78,6 +78,7 @@ class NvidiaNimConfig:
                 "temperature",
                 "top_p",
                 "max_tokens",
+                "max_completion_tokens",
             ]
         elif model == "nvidia/nemotron-4-340b-reward":
             return [
@@ -92,6 +93,7 @@ class NvidiaNimConfig:
                 "frequency_penalty",
                 "presence_penalty",
                 "max_tokens",
+                "max_completion_tokens",
                 "stop",
             ]
         else:
@@ -124,6 +126,7 @@ class NvidiaNimConfig:
                 "frequency_penalty",
                 "presence_penalty",
                 "max_tokens",
+                "max_completion_tokens",
                 "stop",
                 "seed",
             ]
@@ -133,6 +136,8 @@ class NvidiaNimConfig:
     ) -> dict:
         supported_openai_params = self.get_supported_openai_params(model=model)
         for param, value in non_default_params.items():
-            if param in supported_openai_params:
+            if param == "max_completion_tokens":
+                optional_params["max_tokens"] = value
+            elif param in supported_openai_params:
                 optional_params[param] = value
         return optional_params
diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py
index 2fc44f9cd..0191ea082 100644
--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@@ -140,6 +140,7 @@ class OllamaChatConfig:
     ):
         return [
             "max_tokens",
+            "max_completion_tokens",
             "stream",
             "top_p",
             "temperature",
@@ -156,7 +157,7 @@ class OllamaChatConfig:
         self, model: str, non_default_params: dict, optional_params: dict
     ):
         for param, value in non_default_params.items():
-            if param == "max_tokens":
+            if param == "max_tokens" or param == "max_completion_tokens":
                 optional_params["num_predict"] = value
             if param == "stream":
                 optional_params["stream"] = value
diff --git a/litellm/llms/predibase.py b/litellm/llms/predibase.py
index 81e28934d..1ec5937c6 100644
--- a/litellm/llms/predibase.py
+++ b/litellm/llms/predibase.py
@@ -154,6 +154,7 @@ class PredibaseConfig:
         return [
             "stream",
             "temperature",
+            "max_completion_tokens",
             "max_tokens",
             "top_p",
             "stop",
@@ -181,7 +182,7 @@ class PredibaseConfig:
                 optional_params["stream"] = value
             if param == "stop":
                 optional_params["stop"] = value
-            if param == "max_tokens":
+            if param == "max_tokens" or param == "max_completion_tokens":
                 # HF TGI raises the following exception when max_new_tokens==0
                 # Failed: Error occurred: HuggingfaceException - Input validation error: `max_new_tokens` must be strictly positive
                 if value == 0:
diff --git a/litellm/llms/text_completion_codestral.py b/litellm/llms/text_completion_codestral.py
index 9bcd64631..feb3c6dac 100644
--- a/litellm/llms/text_completion_codestral.py
+++ b/litellm/llms/text_completion_codestral.py
@@ -141,6 +141,7 @@ class MistralTextCompletionConfig:
             "temperature",
             "top_p",
             "max_tokens",
+            "max_completion_tokens",
             "stream",
             "seed",
             "stop",
@@ -154,7 +155,7 @@ class MistralTextCompletionConfig:
                 optional_params["temperature"] = value
             if param == "top_p":
                 optional_params["top_p"] = value
-            if param == "max_tokens":
+            if param == "max_tokens" or param == "max_completion_tokens":
                 optional_params["max_tokens"] = value
             if param == "stream" and value == True:
                 optional_params["stream"] = value
diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
index 712f51a98..858336bf0 100644
--- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
@@ -158,6 +158,7 @@ class VertexAIConfig:
             "temperature",
             "top_p",
             "max_tokens",
+            "max_completion_tokens",
             "stream",
             "tools",
             "tool_choice",
@@ -184,7 +185,7 @@ class VertexAIConfig:
                     optional_params["stop_sequences"] = [value]
                 elif isinstance(value, list):
                     optional_params["stop_sequences"] = value
-            if param == "max_tokens":
+            if param == "max_tokens" or param == "max_completion_tokens":
                 optional_params["max_output_tokens"] = value
             if param == "response_format" and value["type"] == "json_object":
                 optional_params["response_mime_type"] = "application/json"
@@ -319,6 +320,7 @@ class GoogleAIStudioGeminiConfig:  # key diff from VertexAI - 'frequency_penalty
             "temperature",
             "top_p",
             "max_tokens",
+            "max_completion_tokens",
             "stream",
             "tools",
             "tool_choice",
@@ -413,7 +415,7 @@ class GoogleAIStudioGeminiConfig:  # key diff from VertexAI - 'frequency_penalty
                     optional_params["stop_sequences"] = [value]
                 elif isinstance(value, list):
                     optional_params["stop_sequences"] = value
-            if param == "max_tokens":
+            if param == "max_tokens" or param == "max_completion_tokens":
                 optional_params["max_output_tokens"] = value
             if param == "response_format":  # type: ignore
                 if value["type"] == "json_object":  # type: ignore
@@ -554,6 +556,7 @@ class VertexGeminiConfig:
             "temperature",
             "top_p",
             "max_tokens",
+            "max_completion_tokens",
             "stream",
             "tools",
             "functions",
@@ -653,7 +656,7 @@ class VertexGeminiConfig:
                     optional_params["stop_sequences"] = [value]
                 elif isinstance(value, list):
                     optional_params["stop_sequences"] = value
-            if param == "max_tokens":
+            if param == "max_tokens" or param == "max_completion_tokens":
                 optional_params["max_output_tokens"] = value
             if param == "response_format" and isinstance(value, dict):  # type: ignore
                 if value["type"] == "json_object":
diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_anthropic.py b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_anthropic.py
index 13d8edeb9..ecb11e1c9 100644
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_anthropic.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_anthropic.py
@@ -114,6 +114,7 @@ class VertexAIAnthropicConfig:
     def get_supported_openai_params(self):
         return [
             "max_tokens",
+            "max_completion_tokens",
             "tools",
             "tool_choice",
             "stream",
@@ -125,7 +126,7 @@ class VertexAIAnthropicConfig:
 
     def map_openai_params(self, non_default_params: dict, optional_params: dict):
         for param, value in non_default_params.items():
-            if param == "max_tokens":
+            if param == "max_tokens" or param == "max_completion_tokens":
                 optional_params["max_tokens"] = value
             if param == "tools":
                 optional_params["tools"] = value
diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py
index 83b8dfcfb..2d9d6076e 100644
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/ai21/transformation.py
@@ -46,6 +46,10 @@ class VertexAIAi21Config:
     def map_openai_params(
         self, non_default_params: dict, optional_params: dict, model: str
     ):
+        if "max_completion_tokens" in non_default_params:
+            non_default_params["max_tokens"] = non_default_params.pop(
+                "max_completion_tokens"
+            )
         return litellm.OpenAIConfig().map_openai_params(
             non_default_params=non_default_params,
             optional_params=optional_params,
diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py
index 97223f1fd..683e0ff8e 100644
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/llama3/transformation.py
@@ -52,6 +52,10 @@ class VertexAILlama3Config:
     def map_openai_params(
         self, non_default_params: dict, optional_params: dict, model: str
     ):
+        if "max_completion_tokens" in non_default_params:
+            non_default_params["max_tokens"] = non_default_params.pop(
+                "max_completion_tokens"
+            )
         return litellm.OpenAIConfig().map_openai_params(
             non_default_params=non_default_params,
             optional_params=optional_params,
diff --git a/litellm/llms/volcengine.py b/litellm/llms/volcengine.py
index eb289d1c4..9b288c868 100644
--- a/litellm/llms/volcengine.py
+++ b/litellm/llms/volcengine.py
@@ -60,6 +60,7 @@ class VolcEngineConfig:
             "logit_bias",
             "logprobs",
             "top_logprobs",
+            "max_completion_tokens",
             "max_tokens",
             "n",
             "presence_penalty",
@@ -82,6 +83,8 @@ class VolcEngineConfig:
     ) -> dict:
         supported_openai_params = self.get_supported_openai_params(model)
         for param, value in non_default_params.items():
-            if param in supported_openai_params:
+            if param == "max_completion_tokens":
+                optional_params["max_tokens"] = value
+            elif param in supported_openai_params:
                 optional_params[param] = value
         return optional_params
diff --git a/litellm/main.py b/litellm/main.py
index a50c908c6..80136e997 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -264,6 +264,7 @@ async def acompletion(
     stream_options: Optional[dict] = None,
     stop=None,
     max_tokens: Optional[int] = None,
+    max_completion_tokens: Optional[int] = None,
     presence_penalty: Optional[float] = None,
     frequency_penalty: Optional[float] = None,
     logit_bias: Optional[dict] = None,
@@ -303,6 +304,7 @@ async def acompletion(
         stream_options (dict, optional): A dictionary containing options for the streaming response. Only use this if stream is True.
         stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens.
         max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
+        max_completion_tokens (integer, optional): An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
         presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
         frequency_penalty: It is used to penalize new tokens based on their frequency in the text so far.
         logit_bias (dict, optional): Used to modify the probability of specific tokens appearing in the completion.
@@ -341,6 +343,7 @@ async def acompletion(
         "stream_options": stream_options,
         "stop": stop,
         "max_tokens": max_tokens,
+        "max_completion_tokens": max_completion_tokens,
         "presence_penalty": presence_penalty,
         "frequency_penalty": frequency_penalty,
         "logit_bias": logit_bias,
@@ -633,6 +636,7 @@ def completion(
     stream: Optional[bool] = None,
     stream_options: Optional[dict] = None,
     stop=None,
+    max_completion_tokens: Optional[int] = None,
     max_tokens: Optional[int] = None,
     presence_penalty: Optional[float] = None,
     frequency_penalty: Optional[float] = None,
@@ -675,6 +679,7 @@ def completion(
         stream_options (dict, optional): A dictionary containing options for the streaming response. Only set this when you set stream: true.
         stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens.
         max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
+        max_completion_tokens (integer, optional): An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
         presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
         frequency_penalty: It is used to penalize new tokens based on their frequency in the text so far.
         logit_bias (dict, optional): Used to modify the probability of specific tokens appearing in the completion.
@@ -759,6 +764,7 @@ def completion(
         "stream",
         "stream_options",
         "stop",
+        "max_completion_tokens",
         "max_tokens",
         "presence_penalty",
         "frequency_penalty",
@@ -917,6 +923,7 @@ def completion(
             stream_options=stream_options,
             stop=stop,
             max_tokens=max_tokens,
+            max_completion_tokens=max_completion_tokens,
             presence_penalty=presence_penalty,
             frequency_penalty=frequency_penalty,
             logit_bias=logit_bias,
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 1fb2997c0..d501d8572 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -6,7 +6,7 @@
         "input_cost_per_token": 0.0000,
         "output_cost_per_token": 0.000,
         "litellm_provider": "one of https://docs.litellm.ai/docs/providers",
-        "mode": "one of chat, embedding, completion, image_generation, audio_transcription",
+        "mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech",
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_vision": true
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 6dd1bad5f..d6e424de5 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -1317,11 +1317,12 @@ import openai
 
 
 def test_completion_gpt4_turbo():
+    litellm.set_verbose = True
     try:
         response = completion(
             model="gpt-4-1106-preview",
             messages=messages,
-            max_tokens=10,
+            max_completion_tokens=10,
         )
         print(response)
     except openai.RateLimitError:
diff --git a/litellm/utils.py b/litellm/utils.py
index d3e757ae8..af0bbc98f 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2765,6 +2765,7 @@ def get_optional_params(
     stream_options=None,
     stop=None,
     max_tokens=None,
+    max_completion_tokens=None,
     presence_penalty=None,
     frequency_penalty=None,
     logit_bias=None,
@@ -2842,6 +2843,7 @@ def get_optional_params(
         "stream_options": None,
         "stop": None,
         "max_tokens": None,
+        "max_completion_tokens": None,
         "presence_penalty": None,
         "frequency_penalty": None,
         "logit_bias": None,
diff --git a/tests/llm_translation/conftest.py b/tests/llm_translation/conftest.py
new file mode 100644
index 000000000..eca0bc431
--- /dev/null
+++ b/tests/llm_translation/conftest.py
@@ -0,0 +1,54 @@
+# conftest.py
+
+import importlib
+import os
+import sys
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import litellm
+
+
+@pytest.fixture(scope="function", autouse=True)
+def setup_and_teardown():
+    """
+    This fixture reloads litellm before every function. To speed up testing by removing callbacks being chained.
+    """
+    curr_dir = os.getcwd()  # Get the current working directory
+    sys.path.insert(
+        0, os.path.abspath("../..")
+    )  # Adds the project directory to the system path
+
+    import litellm
+    from litellm import Router
+
+    importlib.reload(litellm)
+    import asyncio
+
+    loop = asyncio.get_event_loop_policy().new_event_loop()
+    asyncio.set_event_loop(loop)
+    print(litellm)
+    # from litellm import Router, completion, aembedding, acompletion, embedding
+    yield
+
+    # Teardown code (executes after the yield point)
+    loop.close()  # Close the loop created earlier
+    asyncio.set_event_loop(None)  # Remove the reference to the loop
+
+
+def pytest_collection_modifyitems(config, items):
+    # Separate tests in 'test_amazing_proxy_custom_logger.py' and other tests
+    custom_logger_tests = [
+        item for item in items if "custom_logger" in item.parent.name
+    ]
+    other_tests = [item for item in items if "custom_logger" not in item.parent.name]
+
+    # Sort tests based on their names
+    custom_logger_tests.sort(key=lambda x: x.name)
+    other_tests.sort(key=lambda x: x.name)
+
+    # Reorder the items list
+    items[:] = custom_logger_tests + other_tests
diff --git a/litellm/tests/test_fireworks_ai.py b/tests/llm_translation/test_fireworks_ai_translation.py
similarity index 100%
rename from litellm/tests/test_fireworks_ai.py
rename to tests/llm_translation/test_fireworks_ai_translation.py
diff --git a/tests/llm_translation/test_max_completion_tokens.py b/tests/llm_translation/test_max_completion_tokens.py
new file mode 100644
index 000000000..d44a2db37
--- /dev/null
+++ b/tests/llm_translation/test_max_completion_tokens.py
@@ -0,0 +1,342 @@
+import json
+import os
+import sys
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+from datetime import datetime
+from unittest.mock import AsyncMock
+from dotenv import load_dotenv
+
+load_dotenv()
+import httpx
+import pytest
+from respx import MockRouter
+
+import litellm
+from litellm import Choices, Message, ModelResponse
+
+# Adds the parent directory to the system path
+
+
+def return_mocked_response(model: str):
+    if model == "bedrock/mistral.mistral-large-2407-v1:0":
+        return {
+            "metrics": {"latencyMs": 316},
+            "output": {
+                "message": {
+                    "content": [{"text": "Hello! How are you doing today? How can"}],
+                    "role": "assistant",
+                }
+            },
+            "stopReason": "max_tokens",
+            "usage": {"inputTokens": 5, "outputTokens": 10, "totalTokens": 15},
+        }
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        "bedrock/mistral.mistral-large-2407-v1:0",
+    ],
+)
+@pytest.mark.respx
+@pytest.mark.asyncio()
+async def test_bedrock_max_completion_tokens(model: str, respx_mock: MockRouter):
+    """
+    Tests that:
+    - max_completion_tokens is passed as max_tokens to bedrock models
+    """
+    litellm.set_verbose = True
+
+    mock_response = return_mocked_response(model)
+    _model = model.split("/")[1]
+    print("\n\nmock_response: ", mock_response)
+    url = f"https://bedrock-runtime.us-west-2.amazonaws.com/model/{_model}/converse"
+    mock_request = respx_mock.post(url).mock(
+        return_value=httpx.Response(200, json=mock_response)
+    )
+
+    response = await litellm.acompletion(
+        model=model,
+        max_completion_tokens=10,
+        messages=[{"role": "user", "content": "Hello!"}],
+    )
+
+    assert mock_request.called
+    request_body = json.loads(mock_request.calls[0].request.content)
+
+    print("request_body: ", request_body)
+
+    assert request_body == {
+        "messages": [{"role": "user", "content": [{"text": "Hello!"}]}],
+        "additionalModelRequestFields": {},
+        "system": [],
+        "inferenceConfig": {"maxTokens": 10},
+    }
+    print(f"response: {response}")
+    assert isinstance(response, ModelResponse)
+
+
+@pytest.mark.parametrize(
+    "model",
+    ["anthropic/claude-3-sonnet-20240229", "anthropic/claude-3-opus-20240229,"],
+)
+@pytest.mark.respx
+@pytest.mark.asyncio()
+async def test_anthropic_api_max_completion_tokens(model: str, respx_mock: MockRouter):
+    """
+    Tests that:
+    - max_completion_tokens is passed as max_tokens to anthropic models
+    """
+    litellm.set_verbose = True
+
+    mock_response = {
+        "content": [{"text": "Hi! My name is Claude.", "type": "text"}],
+        "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+        "model": "claude-3-5-sonnet-20240620",
+        "role": "assistant",
+        "stop_reason": "end_turn",
+        "stop_sequence": None,
+        "type": "message",
+        "usage": {"input_tokens": 2095, "output_tokens": 503},
+    }
+
+    print("\n\nmock_response: ", mock_response)
+    url = f"https://api.anthropic.com/v1/messages"
+    mock_request = respx_mock.post(url).mock(
+        return_value=httpx.Response(200, json=mock_response)
+    )
+
+    response = await litellm.acompletion(
+        model=model,
+        max_completion_tokens=10,
+        messages=[{"role": "user", "content": "Hello!"}],
+    )
+
+    assert mock_request.called
+    request_body = json.loads(mock_request.calls[0].request.content)
+
+    print("request_body: ", request_body)
+
+    assert request_body == {
+        "messages": [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}],
+        "max_tokens": 10,
+        "model": model.split("/")[-1],
+    }
+    print(f"response: {response}")
+    assert isinstance(response, ModelResponse)
+
+
+def test_all_model_configs():
+    from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models.ai21.transformation import (
+        VertexAIAi21Config,
+    )
+    from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_partner_models.llama3.transformation import (
+        VertexAILlama3Config,
+    )
+
+    assert (
+        "max_completion_tokens" in VertexAILlama3Config().get_supported_openai_params()
+    )
+    assert VertexAILlama3Config().map_openai_params(
+        {"max_completion_tokens": 10}, {}, "llama3"
+    ) == {"max_tokens": 10}
+
+    assert "max_completion_tokens" in VertexAIAi21Config().get_supported_openai_params()
+    assert VertexAIAi21Config().map_openai_params(
+        {"max_completion_tokens": 10}, {}, "llama3"
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.fireworks_ai import FireworksAIConfig
+
+    assert "max_completion_tokens" in FireworksAIConfig().get_supported_openai_params()
+    assert FireworksAIConfig().map_openai_params(
+        {"max_completion_tokens": 10}, {}, "llama3"
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.huggingface_restapi import HuggingfaceConfig
+
+    assert "max_completion_tokens" in HuggingfaceConfig().get_supported_openai_params()
+    assert HuggingfaceConfig().map_openai_params({"max_completion_tokens": 10}, {}) == {
+        "max_new_tokens": 10
+    }
+
+    from litellm.llms.nvidia_nim import NvidiaNimConfig
+
+    assert "max_completion_tokens" in NvidiaNimConfig().get_supported_openai_params(
+        model="llama3"
+    )
+    assert NvidiaNimConfig().map_openai_params(
+        model="llama3",
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.ollama_chat import OllamaChatConfig
+
+    assert "max_completion_tokens" in OllamaChatConfig().get_supported_openai_params()
+    assert OllamaChatConfig().map_openai_params(
+        model="llama3",
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"num_predict": 10}
+
+    from litellm.llms.predibase import PredibaseConfig
+
+    assert "max_completion_tokens" in PredibaseConfig().get_supported_openai_params()
+    assert PredibaseConfig().map_openai_params(
+        {"max_completion_tokens": 10},
+        {},
+    ) == {"max_new_tokens": 10}
+
+    from litellm.llms.text_completion_codestral import MistralTextCompletionConfig
+
+    assert (
+        "max_completion_tokens"
+        in MistralTextCompletionConfig().get_supported_openai_params()
+    )
+    assert MistralTextCompletionConfig().map_openai_params(
+        {"max_completion_tokens": 10},
+        {},
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.volcengine import VolcEngineConfig
+
+    assert "max_completion_tokens" in VolcEngineConfig().get_supported_openai_params(
+        model="llama3"
+    )
+    assert VolcEngineConfig().map_openai_params(
+        model="llama3",
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.AI21.chat import AI21ChatConfig
+
+    assert "max_completion_tokens" in AI21ChatConfig().get_supported_openai_params(
+        "jamba-1.5-mini@001"
+    )
+    assert AI21ChatConfig().map_openai_params(
+        model="jamba-1.5-mini@001",
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.AzureOpenAI.azure import AzureOpenAIConfig
+
+    assert "max_completion_tokens" in AzureOpenAIConfig().get_supported_openai_params()
+    assert AzureOpenAIConfig().map_openai_params(
+        model="gpt-3.5-turbo",
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+        api_version="2022-12-01",
+        drop_params=False,
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.bedrock.chat import AmazonConverseConfig
+
+    assert (
+        "max_completion_tokens"
+        in AmazonConverseConfig().get_supported_openai_params(
+            model="anthropic.claude-3-sonnet-20240229-v1:0"
+        )
+    )
+    assert AmazonConverseConfig().map_openai_params(
+        model="anthropic.claude-3-sonnet-20240229-v1:0",
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+        drop_params=False,
+    ) == {"maxTokens": 10}
+
+    from litellm.llms.text_completion_codestral import MistralTextCompletionConfig
+
+    assert (
+        "max_completion_tokens"
+        in MistralTextCompletionConfig().get_supported_openai_params()
+    )
+    assert MistralTextCompletionConfig().map_openai_params(
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.bedrock.common_utils import (
+        AmazonAnthropicClaude3Config,
+        AmazonAnthropicConfig,
+    )
+
+    assert (
+        "max_completion_tokens"
+        in AmazonAnthropicClaude3Config().get_supported_openai_params()
+    )
+
+    assert AmazonAnthropicClaude3Config().map_openai_params(
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_tokens": 10}
+
+    assert (
+        "max_completion_tokens" in AmazonAnthropicConfig().get_supported_openai_params()
+    )
+
+    assert AmazonAnthropicConfig().map_openai_params(
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_tokens_to_sample": 10}
+
+    from litellm.llms.databricks.chat import DatabricksConfig
+
+    assert "max_completion_tokens" in DatabricksConfig().get_supported_openai_params()
+
+    assert DatabricksConfig().map_openai_params(
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.vertex_ai_and_google_ai_studio.vertex_ai_anthropic import (
+        VertexAIAnthropicConfig,
+    )
+
+    assert (
+        "max_completion_tokens"
+        in VertexAIAnthropicConfig().get_supported_openai_params()
+    )
+
+    assert VertexAIAnthropicConfig().map_openai_params(
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_tokens": 10}
+
+    from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import (
+        VertexAIConfig,
+        GoogleAIStudioGeminiConfig,
+        VertexGeminiConfig,
+    )
+
+    assert "max_completion_tokens" in VertexAIConfig().get_supported_openai_params()
+
+    assert VertexAIConfig().map_openai_params(
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_output_tokens": 10}
+
+    assert (
+        "max_completion_tokens"
+        in GoogleAIStudioGeminiConfig().get_supported_openai_params()
+    )
+
+    assert GoogleAIStudioGeminiConfig().map_openai_params(
+        model="gemini-1.0-pro",
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+    ) == {"max_output_tokens": 10}
+
+    assert "max_completion_tokens" in VertexGeminiConfig().get_supported_openai_params()
+
+    assert VertexGeminiConfig().map_openai_params(
+        model="gemini-1.0-pro",
+        non_default_params={"max_completion_tokens": 10},
+        optional_params={},
+        drop_params=False,
+    ) == {"max_output_tokens": 10}
diff --git a/litellm/tests/test_openai_o1.py b/tests/llm_translation/test_openai_o1.py
similarity index 50%
rename from litellm/tests/test_openai_o1.py
rename to tests/llm_translation/test_openai_o1.py
index 39dadc96e..70fe346b1 100644
--- a/litellm/tests/test_openai_o1.py
+++ b/tests/llm_translation/test_openai_o1.py
@@ -1,7 +1,14 @@
 import json
+import os
+import sys
 from datetime import datetime
 from unittest.mock import AsyncMock
 
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+
 import httpx
 import pytest
 from respx import MockRouter
@@ -50,3 +57,45 @@ async def test_o1_handle_system_role(respx_mock: MockRouter):
 
     print(f"response: {response}")
     assert isinstance(response, ModelResponse)
+
+
+@pytest.mark.asyncio
+@pytest.mark.respx
+@pytest.mark.parametrize("model", ["gpt-4", "gpt-4-0314", "gpt-4-32k", "o1-preview"])
+async def test_o1_max_completion_tokens(respx_mock: MockRouter, model: str):
+    """
+    Tests that:
+    - max_completion_tokens is passed directly to OpenAI chat completion models
+    """
+    litellm.set_verbose = True
+
+    mock_response = ModelResponse(
+        id="cmpl-mock",
+        choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
+        created=int(datetime.now().timestamp()),
+        model=model,
+    )
+
+    mock_request = respx_mock.post("https://api.openai.com/v1/chat/completions").mock(
+        return_value=httpx.Response(200, json=mock_response.dict())
+    )
+
+    response = await litellm.acompletion(
+        model=model,
+        max_completion_tokens=10,
+        messages=[{"role": "user", "content": "Hello!"}],
+    )
+
+    assert mock_request.called
+    request_body = json.loads(mock_request.calls[0].request.content)
+
+    print("request_body: ", request_body)
+
+    assert request_body == {
+        "model": model,
+        "max_completion_tokens": 10,
+        "messages": [{"role": "user", "content": "Hello!"}],
+    }
+
+    print(f"response: {response}")
+    assert isinstance(response, ModelResponse)
diff --git a/litellm/tests/test_optional_params.py b/tests/llm_translation/test_optional_params.py
similarity index 100%
rename from litellm/tests/test_optional_params.py
rename to tests/llm_translation/test_optional_params.py