diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 5fd4e5b62c..762155ed98 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -4,6 +4,9 @@ model_list:
       model: openai/fake
       api_key: fake-key
       api_base: https://exampleopenaiendpoint-production.up.railway.app/
+  - model_name: gemini-flash
+    litellm_params:
+      model: gemini/gemini-1.5-flash
 
 general_settings: 
   master_key: sk-1234 
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index a90f93484f..9c32fbf4d6 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -73,6 +73,7 @@ class ModelInfo(TypedDict, total=False):
     supported_openai_params: Required[Optional[List[str]]]
     supports_system_messages: Optional[bool]
     supports_response_schema: Optional[bool]
+    supports_vision: Optional[bool]
 
 
 class GenericStreamingChunk(TypedDict):
diff --git a/litellm/utils.py b/litellm/utils.py
index cf2c679a84..2dab185a3d 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -4829,6 +4829,7 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
                 supports_response_schema=_model_info.get(
                     "supports_response_schema", None
                 ),
+                supports_vision=_model_info.get("supports_vision", None),
             )
     except Exception:
         raise Exception(
@@ -8126,7 +8127,7 @@ class CustomStreamWrapper:
 
             if chunk.startswith(self.complete_response):
                 # Remove last_sent_chunk only if it appears at the start of the new chunk
-                chunk = chunk[len(self.complete_response):]
+                chunk = chunk[len(self.complete_response) :]
 
             self.complete_response += chunk
             return chunk
@@ -10124,7 +10125,7 @@ def mock_completion_streaming_obj(
     model_response, mock_response, model, n: Optional[int] = None
 ):
     for i in range(0, len(mock_response), 3):
-        completion_obj = Delta(role="assistant", content=mock_response[i: i + 3])
+        completion_obj = Delta(role="assistant", content=mock_response[i : i + 3])
         if n is None:
             model_response.choices[0].delta = completion_obj
         else:
@@ -10133,7 +10134,7 @@ def mock_completion_streaming_obj(
                 _streaming_choice = litellm.utils.StreamingChoices(
                     index=j,
                     delta=litellm.utils.Delta(
-                        role="assistant", content=mock_response[i: i + 3]
+                        role="assistant", content=mock_response[i : i + 3]
                     ),
                 )
                 _all_choices.append(_streaming_choice)
@@ -10145,7 +10146,7 @@ async def async_mock_completion_streaming_obj(
     model_response, mock_response, model, n: Optional[int] = None
 ):
     for i in range(0, len(mock_response), 3):
-        completion_obj = Delta(role="assistant", content=mock_response[i: i + 3])
+        completion_obj = Delta(role="assistant", content=mock_response[i : i + 3])
         if n is None:
             model_response.choices[0].delta = completion_obj
         else:
@@ -10154,7 +10155,7 @@ async def async_mock_completion_streaming_obj(
                 _streaming_choice = litellm.utils.StreamingChoices(
                     index=j,
                     delta=litellm.utils.Delta(
-                        role="assistant", content=mock_response[i: i + 3]
+                        role="assistant", content=mock_response[i : i + 3]
                     ),
                 )
                 _all_choices.append(_streaming_choice)