diff --git a/litellm/__init__.py b/litellm/__init__.py
index ca6830293..93d74ab91 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -113,7 +113,7 @@ open_ai_embedding_models = [
 ]
 
 from .timeout import timeout
-from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost
+from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost, load_test_model
 from .main import *  # Import all the symbols from main.py
 from .integrations import *
 from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
\ No newline at end of file
diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc
index 55f1f9c75..cd671c282 100644
Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ
diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index e9b3e7f1c..3e2e932e1 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 6a0dcbc33..acbc91552 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/main.py b/litellm/main.py
index 3a93f9c24..6e54d658e 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -59,7 +59,7 @@ def completion(
       # params to identify the model
       model=model, replicate=replicate, hugging_face=hugging_face, together_ai=together_ai
     )
-    if azure == True:
+    if azure == True or custom_llm_provider == "azure": # [TODO]: remove azure=True flag, move to 'custom_llm_provider' approach
       # azure configs
       openai.api_type = "azure"
       openai.api_base = litellm.api_base if litellm.api_base is not None else get_secret("AZURE_API_BASE")
@@ -153,7 +153,7 @@ def completion(
       model_response["model"] = model
       model_response["usage"] = response["usage"]
       response = model_response
-    elif "replicate" in model or replicate == True:
+    elif "replicate" in model or replicate == True or custom_llm_provider == "replicate":
       # import replicate/if it fails then pip install replicate
       install_and_import("replicate")
       import replicate
@@ -256,7 +256,7 @@ def completion(
         }
       response = model_response
 
-    elif model in litellm.openrouter_models:
+    elif model in litellm.openrouter_models or custom_llm_provider == "openrouter":
       openai.api_type = "openai"
       # not sure if this will work after someone first uses another API
       openai.api_base = litellm.api_base if litellm.api_base is not None else "https://openrouter.ai/api/v1"
@@ -338,7 +338,7 @@ def completion(
           "total_tokens": prompt_tokens + completion_tokens
         }
       response = model_response
-    elif hugging_face == True:
+    elif hugging_face == True or custom_llm_provider == "huggingface":
       import requests
       API_URL = f"https://api-inference.huggingface.co/models/{model}"
       HF_TOKEN = get_secret("HF_TOKEN")
@@ -364,7 +364,7 @@ def completion(
           "total_tokens": prompt_tokens + completion_tokens
         }
       response = model_response
-    elif together_ai == True:
+    elif together_ai == True or custom_llm_provider == "together_ai":
       import requests
       TOGETHER_AI_TOKEN = get_secret("TOGETHER_AI_TOKEN")
       headers = {"Authorization": f"Bearer {TOGETHER_AI_TOKEN}"}
@@ -430,7 +430,7 @@ def completion(
       ## LOGGING
       logging(model=model, input=messages, azure=azure, logger_fn=logger_fn)
       args = locals()
-      raise ValueError(f"No valid completion model args passed in - {args}")
+      raise ValueError(f"Invalid completion model args passed in. Check your input - {args}")
     return response
   except Exception as e:
     ## LOGGING
diff --git a/litellm/tests/test_load_test_model.py b/litellm/tests/test_load_test_model.py
new file mode 100644
index 000000000..1ff74d580
--- /dev/null
+++ b/litellm/tests/test_load_test_model.py
@@ -0,0 +1,8 @@
+import sys, os
+import traceback
+sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+import litellm
+from litellm import load_test_model
+
+result = load_test_model(model="gpt-3.5-turbo", num_calls=5)
+print(result)
\ No newline at end of file
diff --git a/litellm/utils.py b/litellm/utils.py
index 4fa2751a6..ca0c6d83e 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -302,6 +302,25 @@ def get_optional_params(
     return optional_params
   return optional_params
 
+def load_test_model(model: str, custom_llm_provider: str = None, prompt: str = None, num_calls: int = None):
+  test_prompt = "Hey, how's it going"
+  test_calls = 100
+  if prompt:
+     test_prompt = prompt
+  if num_calls:
+     test_calls = num_calls
+  messages = [[{"role": "user", "content": test_prompt}] for _ in range(test_calls)]
+  start_time = time.time()
+  try:
+    litellm.batch_completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider)
+    end_time = time.time() 
+    response_time = end_time - start_time
+    return {"total_response_time": response_time, "calls_made": 100, "status": "success", "exception": None}
+  except Exception as e:
+    end_time = time.time() 
+    response_time = end_time - start_time
+    return {"total_response_time": response_time, "calls_made": 100, "status": "failed", "exception": e}
+
 def set_callbacks(callback_list):
   global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient
   try:
diff --git a/pyproject.toml b/pyproject.toml
index 9fe3e3433..78f9d4712 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.381"
+version = "0.1.382"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"