diff --git a/cookbook/liteLLM_Ollama.ipynb b/cookbook/liteLLM_Ollama.ipynb
index 9abfadb17..c5770b02f 100644
--- a/cookbook/liteLLM_Ollama.ipynb
+++ b/cookbook/liteLLM_Ollama.ipynb
@@ -88,7 +88,7 @@
     }
    ],
    "source": [
-    "response = completion(model=\"llama2\", messages=messages, custom_api_base=\"http://localhost:11434\", custom_llm_provider=\"ollama\", stream=True)\n",
+    "response = completion(model=\"llama2\", messages=messages, api_base=\"http://localhost:11434\", custom_llm_provider=\"ollama\", stream=True)\n",
     "print(response)"
    ]
   },
diff --git a/docs/my-website/docs/completion/supported.md b/docs/my-website/docs/completion/supported.md
index 57670a86e..ea8978066 100644
--- a/docs/my-website/docs/completion/supported.md
+++ b/docs/my-website/docs/completion/supported.md
@@ -178,12 +178,12 @@ Ollama supported models: https://github.com/jmorganca/ollama
 
 | Model Name           | Function Call                                                                     | Required OS Variables          |
 |----------------------|-----------------------------------------------------------------------------------|--------------------------------|
-| Llama2 7B            | `completion(model='llama2', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
-| Llama2 13B           | `completion(model='llama2:13b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
-| Llama2 70B           | `completion(model='llama2:70b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
-| Llama2 Uncensored    | `completion(model='llama2-uncensored', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
-| Orca Mini            | `completion(model='orca-mini', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
-| Vicuna               | `completion(model='vicuna', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
-| Nous-Hermes          | `completion(model='nous-hermes', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
-| Nous-Hermes 13B      | `completion(model='nous-hermes:13b', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
-| Wizard Vicuna Uncensored | `completion(model='wizard-vicuna', messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Llama2 7B            | `completion(model='llama2', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Llama2 13B           | `completion(model='llama2:13b', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Llama2 70B           | `completion(model='llama2:70b', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Llama2 Uncensored    | `completion(model='llama2-uncensored', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Orca Mini            | `completion(model='orca-mini', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Vicuna               | `completion(model='vicuna', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Nous-Hermes          | `completion(model='nous-hermes', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Nous-Hermes 13B      | `completion(model='nous-hermes:13b', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
+| Wizard Vicuna Uncensored | `completion(model='wizard-vicuna', messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)` | No API Key required |
diff --git a/docs/my-website/docs/tutorials/huggingface_tutorial.md b/docs/my-website/docs/tutorials/huggingface_tutorial.md
index caae28ffa..8f37bb4b9 100644
--- a/docs/my-website/docs/tutorials/huggingface_tutorial.md
+++ b/docs/my-website/docs/tutorials/huggingface_tutorial.md
@@ -41,14 +41,14 @@ from litellm import completion
 
 model = "meta-llama/Llama-2-7b-hf"
 messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format 
-custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
+api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
 
 ### CALLING ENDPOINT
-completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base)
+completion(model=model, messages=messages, custom_llm_provider="huggingface", api_base=api_base)
 ```
 
 What's happening? 
-- custom_api_base: Optional param. Since this uses a deployed endpoint (not the [default huggingface inference endpoint](https://github.com/BerriAI/litellm/blob/6aff47083be659b80e00cb81eb783cb24db2e183/litellm/llms/huggingface_restapi.py#L35)), we pass that to LiteLLM. 
+- api_base: Optional param. Since this uses a deployed endpoint (not the [default huggingface inference endpoint](https://github.com/BerriAI/litellm/blob/6aff47083be659b80e00cb81eb783cb24db2e183/litellm/llms/huggingface_restapi.py#L35)), we pass that to LiteLLM. 
 
 ### Case 3: Call Llama2 private Huggingface endpoint
 
@@ -72,10 +72,10 @@ os.environ["HF_TOKEN] = "..."
 
 model = "meta-llama/Llama-2-7b-hf"
 messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format 
-custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
+api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
 
 ### CALLING ENDPOINT
-completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base)
+completion(model=model, messages=messages, custom_llm_provider="huggingface", api_base=api_base)
 ```
 
 **Setting it as package variable**  
@@ -93,10 +93,10 @@ litellm.huggingface_key = "..."
 
 model = "meta-llama/Llama-2-7b-hf"
 messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format 
-custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
+api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
 
 ### CALLING ENDPOINT
-completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base)
+completion(model=model, messages=messages, custom_llm_provider="huggingface", api_base=api_base)
 ```
 
 **Passed in during completion call**  
@@ -111,8 +111,8 @@ from litellm import completion
 
 model = "meta-llama/Llama-2-7b-hf"
 messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format 
-custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
+api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
 
 ### CALLING ENDPOINT
-completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base, api_key="...")
+completion(model=model, messages=messages, custom_llm_provider="huggingface", api_base=api_base, api_key="...")
 ```
diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py
index a07f07fbe..55614549c 100644
--- a/litellm/llms/huggingface_restapi.py
+++ b/litellm/llms/huggingface_restapi.py
@@ -38,7 +38,7 @@ class HuggingfaceRestAPILLM:
         self,
         model: str,
         messages: list,
-        custom_api_base: str,
+        api_base: str,
         model_response: ModelResponse,
         print_verbose: Callable,
         optional_params=None,
@@ -48,8 +48,8 @@ class HuggingfaceRestAPILLM:
         completion_url: str = ""
         if "https" in model:
             completion_url = model
-        elif custom_api_base:
-            completion_url = custom_api_base
+        elif api_base:
+            completion_url = api_base
         elif "HF_API_BASE" in os.environ:
             completion_url = os.getenv("HF_API_BASE", "")
         else:
diff --git a/litellm/main.py b/litellm/main.py
index 7fd789957..174077260 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -92,7 +92,7 @@ def completion(
     verbose=False,
     azure=False,
     custom_llm_provider=None,
-    custom_api_base=None,
+    api_base=None,
     litellm_call_id=None,
     litellm_logging_obj=None,
     use_client=False,
@@ -153,7 +153,7 @@ def completion(
             logger_fn=logger_fn,
             verbose=verbose,
             custom_llm_provider=custom_llm_provider,
-            custom_api_base=custom_api_base,
+            api_base=api_base,
             litellm_call_id=litellm_call_id,
             model_alias_map=litellm.model_alias_map,
             completion_call_id=id
@@ -223,7 +223,7 @@ def completion(
             # note: if a user sets a custom base - we should ensure this works
             # allow for the setting of dynamic and stateful api-bases
             api_base = (
-                custom_api_base
+                api_base
                 or litellm.api_base
                 or get_secret("OPENAI_API_BASE")
                 or "https://api.openai.com/v1"
@@ -567,7 +567,7 @@ def completion(
             model_response = huggingface_client.completion(
                 model=model,
                 messages=messages,
-                custom_api_base=custom_api_base,
+                api_base=api_base,
                 model_response=model_response,
                 print_verbose=print_verbose,
                 optional_params=optional_params,
@@ -692,7 +692,7 @@ def completion(
             response = model_response
         elif custom_llm_provider == "ollama":
             endpoint = (
-                litellm.api_base if litellm.api_base is not None else custom_api_base
+                litellm.api_base if litellm.api_base is not None else api_base
             )
             prompt = " ".join([message["content"] for message in messages])
 
diff --git a/litellm/testing.py b/litellm/testing.py
index 5db01d182..7cab6115c 100644
--- a/litellm/testing.py
+++ b/litellm/testing.py
@@ -31,9 +31,9 @@ def testing_batch_completion(*args, **kwargs):
                         if isinstance(model, dict) and "custom_llm_provider" in model
                         else None
                     )
-                    kwargs_modified["custom_api_base"] = (
-                        model["custom_api_base"]
-                        if isinstance(model, dict) and "custom_api_base" in model
+                    kwargs_modified["api_base"] = (
+                        model["api_base"]
+                        if isinstance(model, dict) and "api_base" in model
                         else None
                     )
                 for message_list in batch_messages:
diff --git a/litellm/tests/test_custom_api_base.py b/litellm/tests/test_custom_api_base.py
index 70a477eab..40506b57c 100644
--- a/litellm/tests/test_custom_api_base.py
+++ b/litellm/tests/test_custom_api_base.py
@@ -20,14 +20,14 @@ models = ["gorilla-7b-hf-v1", "gpt-4"]
 custom_llm_provider = None
 messages = [{"role": "user", "content": "Hey,  how's it going?"}]
 for model in models:  # iterate through list
-    custom_api_base = None
+    api_base = None
     if model == "gorilla-7b-hf-v1":
         custom_llm_provider = "custom_openai"
-        custom_api_base = "http://zanino.millennium.berkeley.edu:8000/v1"
+        api_base = "http://zanino.millennium.berkeley.edu:8000/v1"
     completion(
         model=model,
         messages=messages,
         custom_llm_provider=custom_llm_provider,
-        custom_api_base=custom_api_base,
+        api_base=api_base,
         logger_fn=logging_fn,
     )
diff --git a/litellm/tests/test_ollama_local.py b/litellm/tests/test_ollama_local.py
index a9431a932..cc15274bc 100644
--- a/litellm/tests/test_ollama_local.py
+++ b/litellm/tests/test_ollama_local.py
@@ -24,7 +24,7 @@
 
 # def test_completion_ollama():
 #     try:
-#         response = completion(model="llama2", messages=messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama")
+#         response = completion(model="llama2", messages=messages, api_base="http://localhost:11434", custom_llm_provider="ollama")
 #         print(response)
 #         string_response = asyncio.run(get_response(response))
 #         print(string_response)
@@ -36,7 +36,7 @@
 
 # def test_completion_ollama_stream():
 #     try:
-#         response = completion(model="llama2", messages=messages, custom_api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)
+#         response = completion(model="llama2", messages=messages, api_base="http://localhost:11434", custom_llm_provider="ollama", stream=True)
 #         print(response)
 #         string_response = asyncio.run(get_response(response))
 #         print(string_response)
diff --git a/litellm/utils.py b/litellm/utils.py
index 68076023c..1ecc130d7 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -658,7 +658,7 @@ def get_litellm_params(
     replicate=False,
     together_ai=False,
     custom_llm_provider=None,
-    custom_api_base=None,
+    api_base=None,
     litellm_call_id=None,
     model_alias_map=None,
     completion_call_id=None
@@ -670,7 +670,7 @@ def get_litellm_params(
         "logger_fn": logger_fn,
         "verbose": verbose,
         "custom_llm_provider": custom_llm_provider,
-        "custom_api_base": custom_api_base,
+        "api_base": api_base,
         "litellm_call_id": litellm_call_id,
         "model_alias_map": model_alias_map,
         "completion_call_id": completion_call_id,
@@ -834,7 +834,7 @@ def get_optional_params(  # use the openai defaults
 def load_test_model(
     model: str,
     custom_llm_provider: str = "",
-    custom_api_base: str = "",
+    api_base: str = "",
     prompt: str = "",
     num_calls: int = 0,
     force_timeout: int = 0,
@@ -852,7 +852,7 @@ def load_test_model(
             model=model,
             messages=messages,
             custom_llm_provider=custom_llm_provider,
-            custom_api_base=custom_api_base,
+            api_base=api_base,
             force_timeout=force_timeout,
         )
         end_time = time.time()