diff --git a/.circleci/config.yml b/.circleci/config.yml
index 6bddd80f1..2aa6a1863 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -404,7 +404,7 @@ jobs:
       # Store test results
       - store_test_results:
           path: test-results
-  proxy_log_to_otel_tests:
+  proxy_logging_guardrails_model_info_tests:
     machine:
       image: ubuntu-2204:2023.10.1
     resource_class: xlarge
@@ -476,6 +476,7 @@ jobs:
               -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
               -e AWS_REGION_NAME=$AWS_REGION_NAME \
               -e APORIA_API_KEY_1=$APORIA_API_KEY_1 \
+              -e COHERE_API_KEY=$COHERE_API_KEY \
               --name my-app \
               -v $(pwd)/litellm/proxy/example_config_yaml/otel_test_config.yaml:/app/config.yaml \
               -v $(pwd)/litellm/proxy/example_config_yaml/custom_guardrail.py:/app/custom_guardrail.py \
@@ -503,7 +504,7 @@ jobs:
           command: |
             pwd
             ls
-            python -m pytest -vv tests/otel_tests/test_otel.py -x --junitxml=test-results/junit.xml --durations=5
+            python -m pytest -vv tests/otel_tests -x --junitxml=test-results/junit.xml --durations=5
           no_output_timeout: 120m
 
       # Store test results
@@ -711,7 +712,7 @@ workflows:
               only:
                 - main
                 - /litellm_.*/
-      - proxy_log_to_otel_tests:
+      - proxy_logging_guardrails_model_info_tests:
           filters:
             branches:
               only:
@@ -751,7 +752,7 @@ workflows:
             - litellm_assistants_api_testing
             - ui_endpoint_testing
             - installing_litellm_on_python
-            - proxy_log_to_otel_tests
+            - proxy_logging_guardrails_model_info_tests
             - proxy_pass_through_endpoint_tests
           filters:
             branches:
diff --git a/docs/my-website/docs/completion/vision.md b/docs/my-website/docs/completion/vision.md
index 69af03c98..0880d0ec4 100644
--- a/docs/my-website/docs/completion/vision.md
+++ b/docs/my-website/docs/completion/vision.md
@@ -1,8 +1,16 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
 # Using Vision Models
 
 ## Quick Start
 Example passing images to a model 
 
+
+<Tabs>
+
+<TabItem label="LiteLLMPython SDK" value="Python">
+
 ```python
 import os 
 from litellm import completion
@@ -33,8 +41,80 @@ response = completion(
 
 ```
 
+</TabItem>
+<TabItem label="LiteLLM Proxy Server" value="proxy">
+
+1. Define vision models on config.yaml
+
+```yaml
+model_list:
+  - model_name: gpt-4-vision-preview # OpenAI gpt-4-vision-preview
+    litellm_params:
+      model: openai/gpt-4-vision-preview
+      api_key: os.environ/OPENAI_API_KEY
+  - model_name: llava-hf          # Custom OpenAI compatible model
+    litellm_params:
+      model: openai/llava-hf/llava-v1.6-vicuna-7b-hf
+      api_base: http://localhost:8000
+      api_key: fake-key
+    model_info:
+      supports_vision: True        # set supports_vision to True so /model/info returns this attribute as True
+
+```
+
+2. Run proxy server
+
+```bash
+litellm --config config.yaml
+```
+
+3. Test it using the OpenAI Python SDK
+
+
+```python
+import os 
+from openai import OpenAI
+
+client = OpenAI(
+    api_key="sk-1234", # your litellm proxy api key
+)
+
+response = client.chat.completions.create(
+    model = "gpt-4-vision-preview",  # use model="llava-hf" to test your custom OpenAI endpoint
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                            {
+                                "type": "text",
+                                "text": "What’s in this image?"
+                            },
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+                                }
+                            }
+                        ]
+        }
+    ],
+)
+
+```
+
+
+
+
+</TabItem>
+</Tabs>
+
+
+
 ## Checking if a model supports `vision`
 
+<Tabs>
+<TabItem label="LiteLLM Python SDK" value="Python">
+
 Use `litellm.supports_vision(model="")` -> returns `True` if model supports `vision` and `False` if not
 
 ```python
@@ -42,4 +122,69 @@ assert litellm.supports_vision(model="gpt-4-vision-preview") == True
 assert litellm.supports_vision(model="gemini-1.0-pro-vision") == True
 assert litellm.supports_vision(model="gpt-3.5-turbo") == False
 ```
+</TabItem>
 
+<TabItem label="LiteLLM Proxy Server" value="proxy">
+
+
+1. Define vision models on config.yaml
+
+```yaml
+model_list:
+  - model_name: gpt-4-vision-preview # OpenAI gpt-4-vision-preview
+    litellm_params:
+      model: openai/gpt-4-vision-preview
+      api_key: os.environ/OPENAI_API_KEY
+  - model_name: llava-hf          # Custom OpenAI compatible model
+    litellm_params:
+      model: openai/llava-hf/llava-v1.6-vicuna-7b-hf
+      api_base: http://localhost:8000
+      api_key: fake-key
+    model_info:
+      supports_vision: True        # set supports_vision to True so /model/info returns this attribute as True
+```
+
+2. Run proxy server
+
+```bash
+litellm --config config.yaml
+```
+
+3. Call `/model_group/info` to check if your model supports `vision`
+
+```shell
+curl -X 'GET' \
+  'http://localhost:4000/model_group/info' \
+  -H 'accept: application/json' \
+  -H 'x-api-key: sk-1234'
+```
+
+Expected Response 
+
+```json
+{
+  "data": [
+    {
+      "model_group": "gpt-4-vision-preview",
+      "providers": ["openai"],
+      "max_input_tokens": 128000,
+      "max_output_tokens": 4096,
+      "mode": "chat",
+      "supports_vision": true, # 👈 supports_vision is true
+      "supports_function_calling": false
+    },
+    {
+      "model_group": "llava-hf",
+      "providers": ["openai"],
+      "max_input_tokens": null,
+      "max_output_tokens": null,
+      "mode": null,
+      "supports_vision": true, # 👈 supports_vision is true
+      "supports_function_calling": false
+    }
+  ]
+}
+```
+
+</TabItem>
+</Tabs>
\ No newline at end of file
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 4ddd5cb1a..8772c3100 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -1236,7 +1236,7 @@
     },
     "deepseek-chat": {
         "max_tokens": 4096,
-        "max_input_tokens": 32000,
+        "max_input_tokens": 128000,
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.00000014,
         "input_cost_per_token_cache_hit": 0.000000014,
diff --git a/litellm/proxy/example_config_yaml/otel_test_config.yaml b/litellm/proxy/example_config_yaml/otel_test_config.yaml
index 80f24bfea..e040cfd68 100644
--- a/litellm/proxy/example_config_yaml/otel_test_config.yaml
+++ b/litellm/proxy/example_config_yaml/otel_test_config.yaml
@@ -15,10 +15,17 @@ model_list:
      tags: ["teamB"]
    model_info:
      id: "team-b-model"
- - model_name: rerank-english-v3.0  # Fixed indentation here
+ - model_name: rerank-english-v3.0
    litellm_params:
      model: cohere/rerank-english-v3.0
      api_key: os.environ/COHERE_API_KEY
+ - model_name: llava-hf
+   litellm_params:
+     model: openai/llava-hf/llava-v1.6-vicuna-7b-hf
+     api_base: http://localhost:8000
+     api_key: fake-key
+   model_info:
+     supports_vision: True
 
 
 litellm_settings:
@@ -41,7 +48,7 @@ guardrails:
   - guardrail_name: "bedrock-pre-guard"
     litellm_params:
       guardrail: bedrock  # supported values: "aporia", "bedrock", "lakera"
-      mode: "pre_call"
+      mode: "during_call"
       guardrailIdentifier: ff6ujrregl1q
       guardrailVersion: "DRAFT"
   - guardrail_name: "custom-pre-guard"
@@ -55,4 +62,7 @@ guardrails:
   - guardrail_name: "custom-post-guard"
     litellm_params:
       guardrail: custom_guardrail.myCustomGuardrail
-      mode: "post_call"
\ No newline at end of file
+      mode: "post_call"
+
+router_settings:
+  enable_tag_filtering: True # 👈 Key Change
\ No newline at end of file
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index e20aa8c28..ed5e703f5 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -1,32 +1,57 @@
 model_list:
-  - model_name: gemini-vision
-    litellm_params:
-      model: vertex_ai/gemini-1.5-pro
-      api_base: https://exampleopenaiendpoint-production.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001
-      vertex_project: "adroit-crow-413218"
-      vertex_location: "us-central1"
-      vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json"
-  - model_name: gemini-vision
-    litellm_params:
-      model: vertex_ai/gemini-1.0-pro-vision-001
-      api_base: https://exampleopenaiendpoint-production-c715.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001
-      vertex_project: "adroit-crow-413218"
-      vertex_location: "us-central1"
-      vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json"
+ - model_name: gpt-3.5-turbo
+   litellm_params:
+     model: openai/gpt-3.5-turbo
+     api_key: fake-key
+     api_base: https://exampleopenaiendpoint-production.up.railway.app/
+     tags: ["teamB"]
+   model_info:
+     id: "team-b-model"
+ - model_name: rerank-english-v3.0
+   litellm_params:
+     model: cohere/rerank-english-v3.0
+     api_key: os.environ/COHERE_API_KEY
+ - model_name: llava-hf
+   litellm_params:
+     model: openai/llava-hf/llava-v1.6-vicuna-7b-hf
+     api_base: http://localhost:8000
+     api_key: fake-key
+   model_info:
+     supports_vision: True
 
-  - model_name: fake-azure-endpoint
-    litellm_params:
-      model: openai/429
-      api_key: fake-key
-      api_base: https://exampleopenaiendpoint-production.up.railway.app
-
-
-
-general_settings:
-  master_key: sk-1234
-  default_team_disabled: true
-  custom_sso: custom_sso.custom_sso_handler
 
 litellm_settings:
-  success_callback: ["prometheus"]
+  cache: true
+  # callbacks: ["otel"]
 
+guardrails:
+  - guardrail_name: "aporia-pre-guard"
+    litellm_params:
+      guardrail: aporia  # supported values: "aporia", "bedrock", "lakera"
+      mode: "post_call"
+      api_key: os.environ/APORIA_API_KEY_1
+      api_base: os.environ/APORIA_API_BASE_1
+  - guardrail_name: "aporia-post-guard"
+    litellm_params:
+      guardrail: aporia  # supported values: "aporia", "bedrock", "lakera"
+      mode: "post_call"
+      api_key: os.environ/APORIA_API_KEY_2
+      api_base: os.environ/APORIA_API_BASE_2
+  - guardrail_name: "bedrock-pre-guard"
+    litellm_params:
+      guardrail: bedrock  # supported values: "aporia", "bedrock", "lakera"
+      mode: "during_call"
+      guardrailIdentifier: ff6ujrregl1q
+      guardrailVersion: "DRAFT"
+  - guardrail_name: "custom-pre-guard"
+    litellm_params:
+      guardrail: custom_guardrail.myCustomGuardrail  
+      mode: "pre_call"
+  - guardrail_name: "custom-during-guard"
+    litellm_params:
+      guardrail: custom_guardrail.myCustomGuardrail  
+      mode: "during_call"
+  - guardrail_name: "custom-post-guard"
+    litellm_params:
+      guardrail: custom_guardrail.myCustomGuardrail
+      mode: "post_call"
\ No newline at end of file
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 686793e7f..d5abe7478 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -7595,7 +7595,6 @@ async def model_info_v1(
 
 @router.get(
     "/model_group/info",
-    description="Provides more info about each model in /models, including config.yaml descriptions (except api key and api base)",
     tags=["model management"],
     dependencies=[Depends(user_api_key_auth)],
 )
@@ -7603,7 +7602,134 @@ async def model_group_info(
     user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
     """
-    Returns model info at the model group level.
+    Get information about all the deployments on litellm proxy, including config.yaml descriptions (except api key and api base)
+
+    - /models returns all deployments. Proxy Admins can use this to list all deployments setup on the proxy
+    - /model_group/info returns all model groups. End users of proxy should use /model_group/info since those models will be used for /chat/completions, /embeddings, etc.
+
+
+    ```shell
+    curl -X 'GET' \
+    'http://localhost:4000/model_group/info' \
+    -H 'accept: application/json' \
+    -H 'x-api-key: sk-1234'
+    ```
+
+    Example Response:
+    ```json
+        {
+            "data": [
+                {
+                "model_group": "rerank-english-v3.0",
+                "providers": [
+                    "cohere"
+                ],
+                "max_input_tokens": null,
+                "max_output_tokens": null,
+                "input_cost_per_token": 0.0,
+                "output_cost_per_token": 0.0,
+                "mode": null,
+                "tpm": null,
+                "rpm": null,
+                "supports_parallel_function_calling": false,
+                "supports_vision": false,
+                "supports_function_calling": false,
+                "supported_openai_params": [
+                    "stream",
+                    "temperature",
+                    "max_tokens",
+                    "logit_bias",
+                    "top_p",
+                    "frequency_penalty",
+                    "presence_penalty",
+                    "stop",
+                    "n",
+                    "extra_headers"
+                ]
+                },
+                {
+                "model_group": "gpt-3.5-turbo",
+                "providers": [
+                    "openai"
+                ],
+                "max_input_tokens": 16385.0,
+                "max_output_tokens": 4096.0,
+                "input_cost_per_token": 1.5e-06,
+                "output_cost_per_token": 2e-06,
+                "mode": "chat",
+                "tpm": null,
+                "rpm": null,
+                "supports_parallel_function_calling": false,
+                "supports_vision": false,
+                "supports_function_calling": true,
+                "supported_openai_params": [
+                    "frequency_penalty",
+                    "logit_bias",
+                    "logprobs",
+                    "top_logprobs",
+                    "max_tokens",
+                    "max_completion_tokens",
+                    "n",
+                    "presence_penalty",
+                    "seed",
+                    "stop",
+                    "stream",
+                    "stream_options",
+                    "temperature",
+                    "top_p",
+                    "tools",
+                    "tool_choice",
+                    "function_call",
+                    "functions",
+                    "max_retries",
+                    "extra_headers",
+                    "parallel_tool_calls",
+                    "response_format"
+                ]
+                },
+                {
+                "model_group": "llava-hf",
+                "providers": [
+                    "openai"
+                ],
+                "max_input_tokens": null,
+                "max_output_tokens": null,
+                "input_cost_per_token": 0.0,
+                "output_cost_per_token": 0.0,
+                "mode": null,
+                "tpm": null,
+                "rpm": null,
+                "supports_parallel_function_calling": false,
+                "supports_vision": true,
+                "supports_function_calling": false,
+                "supported_openai_params": [
+                    "frequency_penalty",
+                    "logit_bias",
+                    "logprobs",
+                    "top_logprobs",
+                    "max_tokens",
+                    "max_completion_tokens",
+                    "n",
+                    "presence_penalty",
+                    "seed",
+                    "stop",
+                    "stream",
+                    "stream_options",
+                    "temperature",
+                    "top_p",
+                    "tools",
+                    "tool_choice",
+                    "function_call",
+                    "functions",
+                    "max_retries",
+                    "extra_headers",
+                    "parallel_tool_calls",
+                    "response_format"
+                ]
+                }
+            ]
+            }
+    ```
     """
     global llm_model_list, general_settings, user_config_file_path, proxy_config, llm_router
 
diff --git a/tests/llm_translation/test_supports_vision.py b/tests/llm_translation/test_supports_vision.py
new file mode 100644
index 000000000..01188d3b9
--- /dev/null
+++ b/tests/llm_translation/test_supports_vision.py
@@ -0,0 +1,94 @@
+import json
+import os
+import sys
+from datetime import datetime
+from unittest.mock import AsyncMock
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+
+import httpx
+import pytest
+from respx import MockRouter
+
+import litellm
+from litellm import Choices, Message, ModelResponse
+
+
+@pytest.mark.asyncio()
+@pytest.mark.respx
+async def test_vision_with_custom_model(respx_mock: MockRouter):
+    """
+    Tests that an OpenAI compatible endpoint when sent an image will receive the image in the request
+
+    """
+    import base64
+    import requests
+
+    litellm.set_verbose = True
+    api_base = "https://my-custom.api.openai.com"
+
+    # Fetch and encode a test image
+    url = "https://dummyimage.com/100/100/fff&text=Test+image"
+    response = requests.get(url)
+    file_data = response.content
+    encoded_file = base64.b64encode(file_data).decode("utf-8")
+    base64_image = f"data:image/png;base64,{encoded_file}"
+
+    mock_response = ModelResponse(
+        id="cmpl-mock",
+        choices=[Choices(message=Message(content="Mocked response", role="assistant"))],
+        created=int(datetime.now().timestamp()),
+        model="my-custom-model",
+    )
+
+    mock_request = respx_mock.post(f"{api_base}/chat/completions").mock(
+        return_value=httpx.Response(200, json=mock_response.dict())
+    )
+
+    response = await litellm.acompletion(
+        model="openai/my-custom-model",
+        max_tokens=10,
+        api_base=api_base,  # use the mock api
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "What's in this image?"},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": base64_image},
+                    },
+                ],
+            }
+        ],
+    )
+
+    assert mock_request.called
+    request_body = json.loads(mock_request.calls[0].request.content)
+
+    print("request_body: ", request_body)
+
+    assert request_body == {
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "What's in this image?"},
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAGQAAABkBAMAAACCzIhnAAAAG1BMVEURAAD///+ln5/h39/Dv79qX18uHx+If39MPz9oMSdmAAAACXBIWXMAAA7EAAAOxAGVKw4bAAABB0lEQVRYhe2SzWrEIBCAh2A0jxEs4j6GLDS9hqWmV5Flt0cJS+lRwv742DXpEjY1kOZW6HwHFZnPmVEBEARBEARB/jd0KYA/bcUYbPrRLh6amXHJ/K+ypMoyUaGthILzw0l+xI0jsO7ZcmCcm4ILd+QuVYgpHOmDmz6jBeJImdcUCmeBqQpuqRIbVmQsLCrAalrGpfoEqEogqbLTWuXCPCo+Ki1XGqgQ+jVVuhB8bOaHkvmYuzm/b0KYLWwoK58oFqi6XfxQ4Uz7d6WeKpna6ytUs5e8betMcqAv5YPC5EZB2Lm9FIn0/VP6R58+/GEY1X1egVoZ/3bt/EqF6malgSAIgiDIH+QL41409QMY0LMAAAAASUVORK5CYII="
+                        },
+                    },
+                ],
+            }
+        ],
+        "model": "my-custom-model",
+        "max_tokens": 10,
+    }
+
+    print(f"response: {response}")
+    assert isinstance(response, ModelResponse)
diff --git a/tests/otel_tests/test_guardrails.py b/tests/otel_tests/test_guardrails.py
index 2b5bfc644..342ce33b9 100644
--- a/tests/otel_tests/test_guardrails.py
+++ b/tests/otel_tests/test_guardrails.py
@@ -70,6 +70,7 @@ async def generate_key(session, guardrails):
 
 
 @pytest.mark.asyncio
+@pytest.mark.skip(reason="Aporia account disabled")
 async def test_llm_guard_triggered_safe_request():
     """
     - Tests a request where no content mod is triggered
@@ -99,6 +100,7 @@ async def test_llm_guard_triggered_safe_request():
 
 
 @pytest.mark.asyncio
+@pytest.mark.skip(reason="Aporia account disabled")
 async def test_llm_guard_triggered():
     """
     - Tests a request where no content mod is triggered
@@ -146,6 +148,7 @@ async def test_no_llm_guard_triggered():
 
 
 @pytest.mark.asyncio
+@pytest.mark.skip(reason="Aporia account disabled")
 async def test_guardrails_with_api_key_controls():
     """
     - Make two API Keys
diff --git a/tests/otel_tests/test_model_info.py b/tests/otel_tests/test_model_info.py
new file mode 100644
index 000000000..6136fe0e8
--- /dev/null
+++ b/tests/otel_tests/test_model_info.py
@@ -0,0 +1,28 @@
+"""
+/model/info test
+"""
+
+import httpx
+import pytest
+
+
+@pytest.mark.asyncio()
+async def test_custom_model_supports_vision():
+    async with httpx.AsyncClient() as client:
+        response = await client.get(
+            "http://localhost:4000/model/info",
+            headers={"Authorization": "Bearer sk-1234"},
+        )
+        assert response.status_code == 200
+
+        data = response.json()["data"]
+
+        print("response from /model/info", data)
+        llava_model = next(
+            (model for model in data if model["model_name"] == "llava-hf"), None
+        )
+
+        assert llava_model is not None, "llava-hf model not found in response"
+        assert (
+            llava_model["model_info"]["supports_vision"] == True
+        ), "llava-hf model should support vision"
diff --git a/tests/otel_tests/test_team_tag_routing.py b/tests/otel_tests/test_team_tag_routing.py
index 6c7d9b450..842b76d94 100644
--- a/tests/otel_tests/test_team_tag_routing.py
+++ b/tests/otel_tests/test_team_tag_routing.py
@@ -18,6 +18,7 @@ async def chat_completion(
         "Authorization": f"Bearer {key}",
         "Content-Type": "application/json",
     }
+    print("headers=", headers)
     data = {
         "model": model,
         "messages": [
@@ -96,16 +97,21 @@ async def test_team_tag_routing():
     async with aiohttp.ClientSession() as session:
         key = LITELLM_MASTER_KEY
         team_a_data = await create_team_with_tags(session, key, ["teamA"])
+        print("team_a_data=", team_a_data)
         team_a_id = team_a_data["team_id"]
 
         team_b_data = await create_team_with_tags(session, key, ["teamB"])
+        print("team_b_data=", team_b_data)
         team_b_id = team_b_data["team_id"]
 
         key_with_team_a = await create_key_with_team(session, key, team_a_id)
-        print(key_with_team_a)
+        print("key_with_team_a=", key_with_team_a)
         _key_with_team_a = key_with_team_a["key"]
         for _ in range(5):
-            response_a, headers = await chat_completion(session, _key_with_team_a)
+            response_a, headers = await chat_completion(
+                session=session, key=_key_with_team_a
+            )
+
             headers = dict(headers)
             print(response_a)
             print(headers)