diff --git a/litellm/main.py b/litellm/main.py
index df1166116..dd4312f0c 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -609,6 +609,7 @@ def completion(
         "cache",
         "no-log",
         "base_model",
+        "stream_timeout",
     ]
     default_params = openai_params + litellm_params
     non_default_params = {
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 9049d78e4..00b783952 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -4,6 +4,14 @@ model_list:
     model: openai/my-fake-model
     api_key: my-fake-key
     api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
+    stream_timeout: 0.001
+- litellm_params:
+      model: azure/chatgpt-v-2
+      api_base: os.environ/AZURE_API_BASE
+      api_key: os.environ/AZURE_API_KEY
+      api_version: "2023-07-01-preview"
+      stream_timeout: 0.001
+  model_name: azure-gpt-3.5
 - model_name: gpt-instruct
   litellm_params:
     model: gpt-3.5-turbo-instruct
diff --git a/litellm/tests/test_router_init.py b/litellm/tests/test_router_init.py
index 5fa142053..4fdceaf36 100644
--- a/litellm/tests/test_router_init.py
+++ b/litellm/tests/test_router_init.py
@@ -252,24 +252,31 @@ def test_stream_timeouts_router():
                     "api_version": os.getenv("AZURE_API_VERSION"),
                     "api_base": os.getenv("AZURE_API_BASE"),
                     "timeout": 200,  # regular calls will not timeout, stream calls will
-                    "stream_timeout": 0.000_001,
+                    "stream_timeout": 10,
                 },
             },
         ]
         router = Router(model_list=model_list)
 
         print("PASSED !")
+        data = {
+            "model": "gpt-3.5-turbo",
+            "messages": [{"role": "user", "content": "hello, write a 20 pg essay"}],
+            "stream": True,
+        }
         selected_client = router._get_client(
             deployment=router.model_list[0],
-            kwargs={
-                "model": "gpt-3.5-turbo",
-                "messages": [{"role": "user", "content": "hello, write a 20 pg essay"}],
-                "stream": True,
-            },
+            kwargs=data,
             client_type=None,
         )
         print("Select client timeout", selected_client.timeout)
-        assert selected_client.timeout == 0.000_001
+        assert selected_client.timeout == 10
+
+        # make actual call
+        response = router.completion(**data)
+
+        for chunk in response:
+            print(f"chunk: {chunk}")
     except openai.APITimeoutError as e:
         print(
             "Passed: Raised correct exception. Got openai.APITimeoutError\nGood Job", e
diff --git a/proxy_server_config.yaml b/proxy_server_config.yaml
index c723bd31d..d1d06eb58 100644
--- a/proxy_server_config.yaml
+++ b/proxy_server_config.yaml
@@ -9,12 +9,18 @@ model_list:
     litellm_params: 
       model: "gpt-3.5-turbo-1106"
       api_key: os.environ/OPENAI_API_KEY
+      rpm: 480
+      timeout: 300
+      stream_timeout: 60
   - model_name: gpt-4
     litellm_params:
       model: azure/chatgpt-v-2
       api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
       api_version: "2023-05-15"
       api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
+      rpm: 480
+      timeout: 300
+      stream_timeout: 60
   - model_name: sagemaker-completion-model
     litellm_params:
       model: sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4