diff --git a/docs/my-website/docs/completion/json_mode.md b/docs/my-website/docs/completion/json_mode.md
index 119cc033fe..0c3a930764 100644
--- a/docs/my-website/docs/completion/json_mode.md
+++ b/docs/my-website/docs/completion/json_mode.md
@@ -51,6 +51,9 @@ curl http://0.0.0.0:4000/v1/chat/completions \
 
 ## Check Model Support 
 
+
+### 1. Check if model supports `response_format`
+
 Call `litellm.get_supported_openai_params` to check if a model/provider supports `response_format`. 
 
 ```python
@@ -61,6 +64,20 @@ params = get_supported_openai_params(model="anthropic.claude-3", custom_llm_prov
 assert "response_format" in params
 ```
 
+### 2. Check if model supports `json_schema`
+
+This is used to check if you can pass 
+- `response_format={ "type": "json_schema", "json_schema": … , "strict": true }`
+- `response_format=<Pydantic Model>`
+
+```python
+from litellm import supports_response_schema
+
+assert supports_response_schema(model="gemini-1.5-pro-preview-0215", custom_llm_provider="bedrock")
+```
+
+Check out [model_prices_and_context_window.json](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) for a full list of models and their support for `response_schema`.
+
 ## Pass in 'json_schema' 
 
 To use Structured Outputs, simply specify
diff --git a/litellm/_service_logger.py b/litellm/_service_logger.py
index f777c93d45..5cba897cf3 100644
--- a/litellm/_service_logger.py
+++ b/litellm/_service_logger.py
@@ -7,6 +7,7 @@ from litellm._logging import verbose_logger
 from litellm.proxy._types import UserAPIKeyAuth
 
 from .integrations.custom_logger import CustomLogger
+from .integrations.datadog.datadog import DataDogLogger
 from .integrations.prometheus_services import PrometheusServicesLogger
 from .types.services import ServiceLoggerPayload, ServiceTypes
 
@@ -134,9 +135,7 @@ class ServiceLogging(CustomLogger):
                 await self.prometheusServicesLogger.async_service_success_hook(
                     payload=payload
                 )
-            elif callback == "datadog":
-                from litellm.integrations.datadog.datadog import DataDogLogger
-
+            elif callback == "datadog" or isinstance(callback, DataDogLogger):
                 await self.init_datadog_logger_if_none()
                 await self.dd_logger.async_service_success_hook(
                     payload=payload,
@@ -237,6 +236,7 @@ class ServiceLogging(CustomLogger):
             duration=duration,
             call_type=call_type,
         )
+
         for callback in litellm.service_callback:
             if callback == "prometheus_system":
                 await self.init_prometheus_services_logger_if_none()
@@ -244,7 +244,7 @@ class ServiceLogging(CustomLogger):
                     payload=payload,
                     error=error,
                 )
-            elif callback == "datadog":
+            elif callback == "datadog" or isinstance(callback, DataDogLogger):
                 await self.init_datadog_logger_if_none()
                 await self.dd_logger.async_service_failure_hook(
                     payload=payload,
diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py
index e8d7e80b74..6238fd8d57 100644
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@@ -5,7 +5,7 @@ Translating between OpenAI's `/chat/completion` format and Amazon's `/converse`
 import copy
 import time
 import types
-from typing import List, Optional, Union
+from typing import List, Literal, Optional, Tuple, Union, cast, overload
 
 import httpx
 
@@ -255,6 +255,59 @@ class AmazonConverseConfig:
                 )
         return optional_params
 
+    @overload
+    def _get_cache_point_block(
+        self, message_block: dict, block_type: Literal["system"]
+    ) -> Optional[SystemContentBlock]:
+        pass
+
+    @overload
+    def _get_cache_point_block(
+        self, message_block: dict, block_type: Literal["content_block"]
+    ) -> Optional[ContentBlock]:
+        pass
+
+    def _get_cache_point_block(
+        self, message_block: dict, block_type: Literal["system", "content_block"]
+    ) -> Optional[Union[SystemContentBlock, ContentBlock]]:
+        if message_block.get("cache_control", None) is None:
+            return None
+        if block_type == "system":
+            return SystemContentBlock(cachePoint=CachePointBlock(type="default"))
+        else:
+            return ContentBlock(cachePoint=CachePointBlock(type="default"))
+
+    def _transform_system_message(
+        self, messages: List[AllMessageValues]
+    ) -> Tuple[List[AllMessageValues], List[SystemContentBlock]]:
+        system_prompt_indices = []
+        system_content_blocks: List[SystemContentBlock] = []
+        for idx, message in enumerate(messages):
+            if message["role"] == "system":
+                _system_content_block: Optional[SystemContentBlock] = None
+                _cache_point_block: Optional[SystemContentBlock] = None
+                if isinstance(message["content"], str) and len(message["content"]) > 0:
+                    _system_content_block = SystemContentBlock(text=message["content"])
+                    _cache_point_block = self._get_cache_point_block(
+                        cast(dict, message), block_type="system"
+                    )
+                elif isinstance(message["content"], list):
+                    for m in message["content"]:
+                        if m.get("type", "") == "text" and len(m["text"]) > 0:
+                            _system_content_block = SystemContentBlock(text=m["text"])
+                            _cache_point_block = self._get_cache_point_block(
+                                m, block_type="system"
+                            )
+                if _system_content_block is not None:
+                    system_content_blocks.append(_system_content_block)
+                if _cache_point_block is not None:
+                    system_content_blocks.append(_cache_point_block)
+                system_prompt_indices.append(idx)
+        if len(system_prompt_indices) > 0:
+            for idx in reversed(system_prompt_indices):
+                messages.pop(idx)
+        return messages, system_content_blocks
+
     def _transform_request(
         self,
         model: str,
@@ -262,24 +315,7 @@ class AmazonConverseConfig:
         optional_params: dict,
         litellm_params: dict,
     ) -> RequestObject:
-        system_prompt_indices = []
-        system_content_blocks: List[SystemContentBlock] = []
-        for idx, message in enumerate(messages):
-            if message["role"] == "system":
-                _system_content_block: Optional[SystemContentBlock] = None
-                if isinstance(message["content"], str) and len(message["content"]) > 0:
-                    _system_content_block = SystemContentBlock(text=message["content"])
-                elif isinstance(message["content"], list):
-                    for m in message["content"]:
-                        if m.get("type", "") == "text" and len(m["text"]) > 0:
-                            _system_content_block = SystemContentBlock(text=m["text"])
-                if _system_content_block is not None:
-                    system_content_blocks.append(_system_content_block)
-                system_prompt_indices.append(idx)
-        if len(system_prompt_indices) > 0:
-            for idx in reversed(system_prompt_indices):
-                messages.pop(idx)
-
+        messages, system_content_blocks = self._transform_system_message(messages)
         inference_params = copy.deepcopy(optional_params)
         additional_request_keys = []
         additional_request_params = {}
diff --git a/litellm/llms/bedrock/rerank/handler.py b/litellm/llms/bedrock/rerank/handler.py
new file mode 100644
index 0000000000..d604760e35
--- /dev/null
+++ b/litellm/llms/bedrock/rerank/handler.py
@@ -0,0 +1,159 @@
+import copy
+import json
+import os
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast
+
+import httpx
+from openai.types.image import Image
+from pydantic import BaseModel
+
+import litellm
+from litellm._logging import verbose_logger
+from litellm.litellm_core_utils.litellm_logging import Logging as LitellmLogging
+from litellm.llms.custom_httpx.http_handler import (
+    _get_httpx_client,
+    get_async_httpx_client,
+)
+from litellm.types.llms.bedrock import BedrockPreparedRequest, BedrockRerankRequest
+from litellm.types.rerank import RerankRequest
+from litellm.types.utils import RerankResponse
+
+from ...base_aws_llm import BaseAWSLLM
+from ..common_utils import BedrockError
+from .transformation import BedrockRerankConfig
+
+if TYPE_CHECKING:
+    from botocore.awsrequest import AWSPreparedRequest
+else:
+    AWSPreparedRequest = Any
+
+
+class BedrockRerankHandler(BaseAWSLLM):
+    async def arerank(
+        self,
+        prepared_request: BedrockPreparedRequest,
+    ):
+        client = get_async_httpx_client(llm_provider=litellm.LlmProviders.BEDROCK)
+        try:
+            response = await client.post(url=prepared_request["endpoint_url"], headers=prepared_request["prepped"].headers, data=prepared_request["body"])  # type: ignore
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:
+            error_code = err.response.status_code
+            raise BedrockError(status_code=error_code, message=err.response.text)
+        except httpx.TimeoutException:
+            raise BedrockError(status_code=408, message="Timeout error occurred.")
+
+        return BedrockRerankConfig()._transform_response(response.json())
+
+    def rerank(
+        self,
+        model: str,
+        query: str,
+        documents: List[Union[str, Dict[str, Any]]],
+        optional_params: dict,
+        logging_obj: LitellmLogging,
+        top_n: Optional[int] = None,
+        rank_fields: Optional[List[str]] = None,
+        return_documents: Optional[bool] = True,
+        max_chunks_per_doc: Optional[int] = None,
+        _is_async: Optional[bool] = False,
+        api_base: Optional[str] = None,
+        extra_headers: Optional[dict] = None,
+    ) -> RerankResponse:
+        request_data = RerankRequest(
+            model=model,
+            query=query,
+            documents=documents,
+            top_n=top_n,
+            rank_fields=rank_fields,
+            return_documents=return_documents,
+        )
+        data = BedrockRerankConfig()._transform_request(request_data)
+
+        prepared_request = self._prepare_request(
+            optional_params=optional_params,
+            api_base=api_base,
+            extra_headers=extra_headers,
+            data=cast(dict, data),
+        )
+
+        logging_obj.pre_call(
+            input=data,
+            api_key="",
+            additional_args={
+                "complete_input_dict": data,
+                "api_base": prepared_request["endpoint_url"],
+                "headers": prepared_request["prepped"].headers,
+            },
+        )
+
+        if _is_async:
+            return self.arerank(prepared_request)  # type: ignore
+
+        client = _get_httpx_client()
+        try:
+            response = client.post(url=prepared_request["endpoint_url"], headers=prepared_request["prepped"].headers, data=prepared_request["body"])  # type: ignore
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:
+            error_code = err.response.status_code
+            raise BedrockError(status_code=error_code, message=err.response.text)
+        except httpx.TimeoutException:
+            raise BedrockError(status_code=408, message="Timeout error occurred.")
+
+        return BedrockRerankConfig()._transform_response(response.json())
+
+    def _prepare_request(
+        self,
+        api_base: Optional[str],
+        extra_headers: Optional[dict],
+        data: dict,
+        optional_params: dict,
+    ) -> BedrockPreparedRequest:
+        try:
+            import boto3
+            from botocore.auth import SigV4Auth
+            from botocore.awsrequest import AWSRequest
+            from botocore.credentials import Credentials
+        except ImportError:
+            raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.")
+        boto3_credentials_info = self._get_boto_credentials_from_optional_params(
+            optional_params
+        )
+
+        ### SET RUNTIME ENDPOINT ###
+        _, proxy_endpoint_url = self.get_runtime_endpoint(
+            api_base=api_base,
+            aws_bedrock_runtime_endpoint=boto3_credentials_info.aws_bedrock_runtime_endpoint,
+            aws_region_name=boto3_credentials_info.aws_region_name,
+        )
+        proxy_endpoint_url = proxy_endpoint_url.replace(
+            "bedrock-runtime", "bedrock-agent-runtime"
+        )
+        proxy_endpoint_url = f"{proxy_endpoint_url}/rerank"
+        sigv4 = SigV4Auth(
+            boto3_credentials_info.credentials,
+            "bedrock",
+            boto3_credentials_info.aws_region_name,
+        )
+        # Make POST Request
+        body = json.dumps(data).encode("utf-8")
+
+        headers = {"Content-Type": "application/json"}
+        if extra_headers is not None:
+            headers = {"Content-Type": "application/json", **extra_headers}
+        request = AWSRequest(
+            method="POST", url=proxy_endpoint_url, data=body, headers=headers
+        )
+        sigv4.add_auth(request)
+        if (
+            extra_headers is not None and "Authorization" in extra_headers
+        ):  # prevent sigv4 from overwriting the auth header
+            request.headers["Authorization"] = extra_headers["Authorization"]
+        prepped = request.prepare()
+
+        return BedrockPreparedRequest(
+            endpoint_url=proxy_endpoint_url,
+            prepped=prepped,
+            body=body,
+            data=data,
+        )
diff --git a/litellm/llms/bedrock/rerank/transformation.py b/litellm/llms/bedrock/rerank/transformation.py
new file mode 100644
index 0000000000..7dc9b0aab1
--- /dev/null
+++ b/litellm/llms/bedrock/rerank/transformation.py
@@ -0,0 +1,117 @@
+"""
+Translates from Cohere's `/v1/rerank` input format to Bedrock's `/rerank` input format.
+
+Why separate file? Make it easy to see how transformation works
+"""
+
+import uuid
+from typing import List, Optional, Union
+
+from litellm.types.llms.bedrock import (
+    BedrockRerankBedrockRerankingConfiguration,
+    BedrockRerankConfiguration,
+    BedrockRerankInlineDocumentSource,
+    BedrockRerankModelConfiguration,
+    BedrockRerankQuery,
+    BedrockRerankRequest,
+    BedrockRerankSource,
+    BedrockRerankTextDocument,
+    BedrockRerankTextQuery,
+)
+from litellm.types.rerank import (
+    RerankBilledUnits,
+    RerankRequest,
+    RerankResponse,
+    RerankResponseMeta,
+    RerankResponseResult,
+    RerankTokens,
+)
+
+
+class BedrockRerankConfig:
+
+    def _transform_sources(
+        self, documents: List[Union[str, dict]]
+    ) -> List[BedrockRerankSource]:
+        """
+        Transform the sources from RerankRequest format to Bedrock format.
+        """
+        _sources = []
+        for document in documents:
+            if isinstance(document, str):
+                _sources.append(
+                    BedrockRerankSource(
+                        inlineDocumentSource=BedrockRerankInlineDocumentSource(
+                            textDocument=BedrockRerankTextDocument(text=document),
+                            type="TEXT",
+                        ),
+                        type="INLINE",
+                    )
+                )
+            else:
+                _sources.append(
+                    BedrockRerankSource(
+                        inlineDocumentSource=BedrockRerankInlineDocumentSource(
+                            jsonDocument=document, type="JSON"
+                        ),
+                        type="INLINE",
+                    )
+                )
+        return _sources
+
+    def _transform_request(self, request_data: RerankRequest) -> BedrockRerankRequest:
+        """
+        Transform the request from RerankRequest format to Bedrock format.
+        """
+        _sources = self._transform_sources(request_data.documents)
+
+        return BedrockRerankRequest(
+            queries=[
+                BedrockRerankQuery(
+                    textQuery=BedrockRerankTextQuery(text=request_data.query),
+                    type="TEXT",
+                )
+            ],
+            rerankingConfiguration=BedrockRerankConfiguration(
+                bedrockRerankingConfiguration=BedrockRerankBedrockRerankingConfiguration(
+                    modelConfiguration=BedrockRerankModelConfiguration(
+                        modelArn=request_data.model
+                    ),
+                    numberOfResults=request_data.top_n or len(request_data.documents),
+                ),
+                type="BEDROCK_RERANKING_MODEL",
+            ),
+            sources=_sources,
+        )
+
+    def _transform_response(self, response: dict) -> RerankResponse:
+        """
+        Transform the response from Bedrock into the RerankResponse format.
+
+        example input:
+        {"results":[{"index":0,"relevanceScore":0.6847912669181824},{"index":1,"relevanceScore":0.5980774760246277}]}
+        """
+        _billed_units = RerankBilledUnits(**response.get("usage", {}))
+        _tokens = RerankTokens(**response.get("usage", {}))
+        rerank_meta = RerankResponseMeta(billed_units=_billed_units, tokens=_tokens)
+
+        _results: Optional[List[RerankResponseResult]] = None
+
+        bedrock_results = response.get("results")
+        if bedrock_results:
+            _results = [
+                RerankResponseResult(
+                    index=result.get("index"),
+                    relevance_score=result.get("relevanceScore"),
+                )
+                for result in bedrock_results
+            ]
+
+        if _results is None:
+            raise ValueError(f"No results found in the response={response}")
+
+        return RerankResponse(
+            id=response.get("id") or str(uuid.uuid4()),
+            results=_results,
+            meta=rerank_meta,
+        )  # Return response
diff --git a/litellm/llms/jina_ai/rerank/transformation.py b/litellm/llms/jina_ai/rerank/transformation.py
index 82039a15b2..a6c0a810c7 100644
--- a/litellm/llms/jina_ai/rerank/transformation.py
+++ b/litellm/llms/jina_ai/rerank/transformation.py
@@ -31,6 +31,6 @@ class JinaAIRerankConfig:
 
         return RerankResponse(
             id=response.get("id") or str(uuid.uuid4()),
-            results=_results,
+            results=_results,  # type: ignore
             meta=rerank_meta,
         )  # Return response
diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py
index 2f55bb7bac..c218377e51 100644
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@@ -2485,10 +2485,24 @@ def _bedrock_converse_messages_pt(  # noqa: PLR0915
                                 image_url=image_url
                             )
                             _parts.append(_part)  # type: ignore
+                        _cache_point_block = (
+                            litellm.AmazonConverseConfig()._get_cache_point_block(
+                                element, block_type="content_block"
+                            )
+                        )
+                        if _cache_point_block is not None:
+                            _parts.append(_cache_point_block)
                 user_content.extend(_parts)
             else:
                 _part = BedrockContentBlock(text=messages[msg_i]["content"])
+                _cache_point_block = (
+                    litellm.AmazonConverseConfig()._get_cache_point_block(
+                        messages[msg_i], block_type="content_block"
+                    )
+                )
                 user_content.append(_part)
+                if _cache_point_block is not None:
+                    user_content.append(_cache_point_block)
 
             msg_i += 1
         if user_content:
diff --git a/litellm/llms/together_ai/chat.py b/litellm/llms/together_ai/chat.py
index cb12d61475..54b7e48680 100644
--- a/litellm/llms/together_ai/chat.py
+++ b/litellm/llms/together_ai/chat.py
@@ -6,8 +6,54 @@ Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
 Docs: https://docs.together.ai/reference/completions-1
 """
 
+from typing import Optional
+
+from litellm import get_model_info, verbose_logger
+
 from ..OpenAI.chat.gpt_transformation import OpenAIGPTConfig
 
 
 class TogetherAIConfig(OpenAIGPTConfig):
-    pass
+    def get_supported_openai_params(self, model: str) -> list:
+        """
+        Only some together models support response_format / tool calling
+
+        Docs: https://docs.together.ai/docs/json-mode
+        """
+        supports_function_calling: Optional[bool] = None
+        try:
+            model_info = get_model_info(model, custom_llm_provider="together_ai")
+            supports_function_calling = model_info.get(
+                "supports_function_calling", False
+            )
+        except Exception as e:
+            verbose_logger.debug(f"Error getting supported openai params: {e}")
+            pass
+
+        optional_params = super().get_supported_openai_params(model)
+        if supports_function_calling is not True:
+            verbose_logger.warning(
+                "Only some together models support function calling/response_format. Docs - https://docs.together.ai/docs/function-calling"
+            )
+            optional_params.remove("tools")
+            optional_params.remove("tool_choice")
+            optional_params.remove("function_call")
+            optional_params.remove("response_format")
+        return optional_params
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        mapped_openai_params = super().map_openai_params(
+            non_default_params, optional_params, model, drop_params
+        )
+
+        if "response_format" in mapped_openai_params and mapped_openai_params[
+            "response_format"
+        ] == {"type": "text"}:
+            mapped_openai_params.pop("response_format")
+        return mapped_openai_params
diff --git a/litellm/llms/together_ai/rerank/transformation.py b/litellm/llms/together_ai/rerank/transformation.py
index b2024b5cda..b74e0b6c00 100644
--- a/litellm/llms/together_ai/rerank/transformation.py
+++ b/litellm/llms/together_ai/rerank/transformation.py
@@ -29,6 +29,6 @@ class TogetherAIRerankConfig:
 
         return RerankResponse(
             id=response.get("id") or str(uuid.uuid4()),
-            results=_results,
+            results=_results,  # type: ignore
             meta=rerank_meta,
         )  # Return response
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 659b7dd8ea..7ad438b100 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -12,7 +12,8 @@
         "supports_vision": true,
         "supports_audio_input": true, 
         "supports_audio_output": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "gpt-4": {
         "max_tokens": 4096, 
@@ -4818,7 +4819,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_vision": true,
-        "supports_pdf_input": true
+        "supports_pdf_input": true, 
+        "supports_prompt_caching": true
     },
     "amazon.nova-lite-v1:0": {
         "max_tokens": 4096, 
@@ -4830,7 +4832,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_vision": true,
-        "supports_pdf_input": true
+        "supports_pdf_input": true,
+        "supports_prompt_caching": true
     },
     "amazon.nova-pro-v1:0": {
         "max_tokens": 4096, 
@@ -4842,7 +4845,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_vision": true,
-        "supports_pdf_input": true
+        "supports_pdf_input": true,
+        "supports_prompt_caching": true
     },
     "anthropic.claude-3-sonnet-20240229-v1:0": {
         "max_tokens": 4096, 
@@ -4876,7 +4880,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_vision": true,
-        "supports_assistant_prefill": true
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true
     },
     "anthropic.claude-3-haiku-20240307-v1:0": {
         "max_tokens": 4096, 
@@ -4898,7 +4903,8 @@
         "litellm_provider": "bedrock",
         "mode": "chat",
         "supports_assistant_prefill": true,
-        "supports_function_calling": true
+        "supports_function_calling": true,
+        "supports_prompt_caching": true
     },
     "anthropic.claude-3-opus-20240229-v1:0": {
         "max_tokens": 4096,
@@ -4911,139 +4917,6 @@
         "supports_function_calling": true,
         "supports_vision": true
     },
-    "us.anthropic.claude-3-sonnet-20240229-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000015,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true
-    },
-    "us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000015,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true
-    },
-    "us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
-        "max_tokens": 8192,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000015,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true,
-        "supports_assistant_prefill": true
-    },
-    "us.anthropic.claude-3-haiku-20240307-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.00000025,
-        "output_cost_per_token": 0.00000125,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true
-    },
-    "us.anthropic.claude-3-5-haiku-20241022-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000005,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_assistant_prefill": true,
-        "supports_function_calling": true
-    },
-    "us.anthropic.claude-3-opus-20240229-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000015,
-        "output_cost_per_token": 0.000075,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true
-    },
-    "eu.anthropic.claude-3-sonnet-20240229-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000015,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true
-    },
-    "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000015,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true
-    },
-    "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": {
-        "max_tokens": 8192,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000015,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true,
-        "supports_assistant_prefill": true
-    },
-    "eu.anthropic.claude-3-haiku-20240307-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.00000025,
-        "output_cost_per_token": 0.00000125,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true
-    },
-    "eu.anthropic.claude-3-5-haiku-20241022-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000005,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true
-    },
-    "eu.anthropic.claude-3-opus-20240229-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000015,
-        "output_cost_per_token": 0.000075,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true
-    },
     "anthropic.claude-v1": {
         "max_tokens": 8191, 
         "max_input_tokens": 100000,
@@ -6097,6 +5970,30 @@
         "litellm_provider": "together_ai",
         "mode": "embedding"
     },
+    "together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
+        "input_cost_per_token": 0.00000018,
+        "output_cost_per_token": 0.00000018,
+        "litellm_provider": "together_ai",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "mode": "chat"
+    },
+    "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
+        "input_cost_per_token": 0.00000088,
+        "output_cost_per_token": 0.00000088,
+        "litellm_provider": "together_ai",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "mode": "chat"
+    },
+    "together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {
+        "input_cost_per_token": 0.0000035,
+        "output_cost_per_token": 0.0000035,
+        "litellm_provider": "together_ai",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "mode": "chat"
+    },
     "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
         "input_cost_per_token": 0.0000006,
         "output_cost_per_token": 0.0000006,
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index f2569f2f28..599a1bf230 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -39,10 +39,10 @@ model_list:
       access_groups: ["private-openai-models"] 
       
 router_settings:
-  routing_strategy: usage-based-routing-v2
+  # routing_strategy: usage-based-routing-v2
   #redis_url: "os.environ/REDIS_URL"
   redis_host: "os.environ/REDIS_HOST"
   redis_port: "os.environ/REDIS_PORT"
 
 litellm_settings:
-  success_callback: ["langsmith"]
\ No newline at end of file
+  callbacks: ["datadog"]
\ No newline at end of file
diff --git a/litellm/rerank_api/main.py b/litellm/rerank_api/main.py
index 7e6dc75035..3b3eaad016 100644
--- a/litellm/rerank_api/main.py
+++ b/litellm/rerank_api/main.py
@@ -7,6 +7,7 @@ import litellm
 from litellm._logging import verbose_logger
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.llms.azure_ai.rerank import AzureAIRerank
+from litellm.llms.bedrock.rerank.handler import BedrockRerankHandler
 from litellm.llms.cohere.rerank import CohereRerank
 from litellm.llms.jina_ai.rerank.handler import JinaAIRerank
 from litellm.llms.together_ai.rerank.handler import TogetherAIRerank
@@ -21,6 +22,7 @@ cohere_rerank = CohereRerank()
 together_rerank = TogetherAIRerank()
 azure_ai_rerank = AzureAIRerank()
 jina_ai_rerank = JinaAIRerank()
+bedrock_rerank = BedrockRerankHandler()
 #################################################
 
 
@@ -70,7 +72,7 @@ async def arerank(
 
 
 @client
-def rerank(
+def rerank(  # noqa: PLR0915
     model: str,
     query: str,
     documents: List[Union[str, Dict[str, Any]]],
@@ -268,6 +270,27 @@ def rerank(
                 max_chunks_per_doc=max_chunks_per_doc,
                 _is_async=_is_async,
             )
+        elif _custom_llm_provider == "bedrock":
+            api_base = (
+                dynamic_api_base
+                or optional_params.api_base
+                or litellm.api_base
+                or get_secret("BEDROCK_API_BASE")  # type: ignore
+            )
+
+            response = bedrock_rerank.rerank(
+                model=model,
+                query=query,
+                documents=documents,
+                top_n=top_n,
+                rank_fields=rank_fields,
+                return_documents=return_documents,
+                max_chunks_per_doc=max_chunks_per_doc,
+                _is_async=_is_async,
+                optional_params=optional_params.model_dump(exclude_unset=True),
+                api_base=api_base,
+                logging_obj=litellm_logging_obj,
+            )
         else:
             raise ValueError(f"Unsupported provider: {_custom_llm_provider}")
 
diff --git a/litellm/types/llms/bedrock.py b/litellm/types/llms/bedrock.py
index 88f329adeb..e94ffd80a3 100644
--- a/litellm/types/llms/bedrock.py
+++ b/litellm/types/llms/bedrock.py
@@ -2,6 +2,7 @@ import json
 from typing import Any, List, Literal, Optional, TypedDict, Union
 
 from typing_extensions import (
+    TYPE_CHECKING,
     Protocol,
     Required,
     Self,
@@ -14,8 +15,13 @@ from typing_extensions import (
 from .openai import ChatCompletionToolCallChunk
 
 
-class SystemContentBlock(TypedDict):
+class CachePointBlock(TypedDict, total=False):
+    type: Literal["default"]
+
+
+class SystemContentBlock(TypedDict, total=False):
     text: str
+    cachePoint: CachePointBlock
 
 
 class SourceBlock(TypedDict):
@@ -58,6 +64,7 @@ class ContentBlock(TypedDict, total=False):
     document: DocumentBlock
     toolResult: ToolResultBlock
     toolUse: ToolUseBlock
+    cachePoint: CachePointBlock
 
 
 class MessageBlock(TypedDict):
@@ -312,3 +319,71 @@ class AmazonStability3TextToImageResponse(TypedDict, total=False):
     images: List[str]
     seeds: List[str]
     finish_reasons: List[str]
+
+
+if TYPE_CHECKING:
+    from botocore.awsrequest import AWSPreparedRequest
+else:
+    AWSPreparedRequest = Any
+
+from pydantic import BaseModel
+
+
+class BedrockPreparedRequest(TypedDict):
+    """
+    Internal/Helper class for preparing the request for bedrock image generation
+    """
+
+    endpoint_url: str
+    prepped: AWSPreparedRequest
+    body: bytes
+    data: dict
+
+
+class BedrockRerankTextQuery(TypedDict):
+    text: str
+
+
+class BedrockRerankQuery(TypedDict):
+    textQuery: BedrockRerankTextQuery
+    type: Literal["TEXT"]
+
+
+class BedrockRerankModelConfiguration(TypedDict, total=False):
+    modelArn: Required[str]
+    modelConfiguration: dict
+
+
+class BedrockRerankBedrockRerankingConfiguration(TypedDict):
+    modelConfiguration: BedrockRerankModelConfiguration
+    numberOfResults: int
+
+
+class BedrockRerankConfiguration(TypedDict):
+    bedrockRerankingConfiguration: BedrockRerankBedrockRerankingConfiguration
+    type: Literal["BEDROCK_RERANKING_MODEL"]
+
+
+class BedrockRerankTextDocument(TypedDict, total=False):
+    text: str
+
+
+class BedrockRerankInlineDocumentSource(TypedDict, total=False):
+    jsonDocument: dict
+    textDocument: BedrockRerankTextDocument
+    type: Literal["TEXT", "JSON"]
+
+
+class BedrockRerankSource(TypedDict):
+    inlineDocumentSource: BedrockRerankInlineDocumentSource
+    type: Literal["INLINE"]
+
+
+class BedrockRerankRequest(TypedDict):
+    """
+    Request for Bedrock Rerank API
+    """
+
+    queries: List[BedrockRerankQuery]
+    rerankingConfiguration: BedrockRerankConfiguration
+    sources: List[BedrockRerankSource]
diff --git a/litellm/types/rerank.py b/litellm/types/rerank.py
index 00b07ba139..8a2332fe36 100644
--- a/litellm/types/rerank.py
+++ b/litellm/types/rerank.py
@@ -36,9 +36,14 @@ class RerankResponseMeta(TypedDict, total=False):
     tokens: RerankTokens
 
 
+class RerankResponseResult(TypedDict):
+    index: int
+    relevance_score: float
+
+
 class RerankResponse(BaseModel):
     id: str
-    results: List[dict]  # Contains index and relevance_score
+    results: List[RerankResponseResult]  # Contains index and relevance_score
     meta: Optional[RerankResponseMeta] = None  # Contains api_version and billed_units
 
     # Define private attributes using PrivateAttr
diff --git a/litellm/utils.py b/litellm/utils.py
index 86c0a60294..b470cac61e 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1874,22 +1874,11 @@ def supports_prompt_caching(
     Raises:
     Exception: If the given model is not found or there's an error in retrieval.
     """
-    try:
-        model, custom_llm_provider, _, _ = litellm.get_llm_provider(
-            model=model, custom_llm_provider=custom_llm_provider
-        )
-
-        model_info = litellm.get_model_info(
-            model=model, custom_llm_provider=custom_llm_provider
-        )
-
-        if model_info.get("supports_prompt_caching", False) is True:
-            return True
-        return False
-    except Exception as e:
-        raise Exception(
-            f"Model not found or error in checking prompt caching support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}"
-        )
+    return _supports_factory(
+        model=model,
+        custom_llm_provider=custom_llm_provider,
+        key="supports_prompt_caching",
+    )
 
 
 def supports_vision(model: str, custom_llm_provider: Optional[str] = None) -> bool:
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 659b7dd8ea..7ad438b100 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -12,7 +12,8 @@
         "supports_vision": true,
         "supports_audio_input": true, 
         "supports_audio_output": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_response_schema": true
     },
     "gpt-4": {
         "max_tokens": 4096, 
@@ -4818,7 +4819,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_vision": true,
-        "supports_pdf_input": true
+        "supports_pdf_input": true, 
+        "supports_prompt_caching": true
     },
     "amazon.nova-lite-v1:0": {
         "max_tokens": 4096, 
@@ -4830,7 +4832,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_vision": true,
-        "supports_pdf_input": true
+        "supports_pdf_input": true,
+        "supports_prompt_caching": true
     },
     "amazon.nova-pro-v1:0": {
         "max_tokens": 4096, 
@@ -4842,7 +4845,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_vision": true,
-        "supports_pdf_input": true
+        "supports_pdf_input": true,
+        "supports_prompt_caching": true
     },
     "anthropic.claude-3-sonnet-20240229-v1:0": {
         "max_tokens": 4096, 
@@ -4876,7 +4880,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_vision": true,
-        "supports_assistant_prefill": true
+        "supports_assistant_prefill": true,
+        "supports_prompt_caching": true
     },
     "anthropic.claude-3-haiku-20240307-v1:0": {
         "max_tokens": 4096, 
@@ -4898,7 +4903,8 @@
         "litellm_provider": "bedrock",
         "mode": "chat",
         "supports_assistant_prefill": true,
-        "supports_function_calling": true
+        "supports_function_calling": true,
+        "supports_prompt_caching": true
     },
     "anthropic.claude-3-opus-20240229-v1:0": {
         "max_tokens": 4096,
@@ -4911,139 +4917,6 @@
         "supports_function_calling": true,
         "supports_vision": true
     },
-    "us.anthropic.claude-3-sonnet-20240229-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000015,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true
-    },
-    "us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000015,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true
-    },
-    "us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
-        "max_tokens": 8192,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000015,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true,
-        "supports_assistant_prefill": true
-    },
-    "us.anthropic.claude-3-haiku-20240307-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.00000025,
-        "output_cost_per_token": 0.00000125,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true
-    },
-    "us.anthropic.claude-3-5-haiku-20241022-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000005,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_assistant_prefill": true,
-        "supports_function_calling": true
-    },
-    "us.anthropic.claude-3-opus-20240229-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000015,
-        "output_cost_per_token": 0.000075,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true
-    },
-    "eu.anthropic.claude-3-sonnet-20240229-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000015,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true
-    },
-    "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000015,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true
-    },
-    "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": {
-        "max_tokens": 8192,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 8192,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000015,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true,
-        "supports_assistant_prefill": true
-    },
-    "eu.anthropic.claude-3-haiku-20240307-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.00000025,
-        "output_cost_per_token": 0.00000125,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true
-    },
-    "eu.anthropic.claude-3-5-haiku-20241022-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000001,
-        "output_cost_per_token": 0.000005,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true
-    },
-    "eu.anthropic.claude-3-opus-20240229-v1:0": {
-        "max_tokens": 4096,
-        "max_input_tokens": 200000,
-        "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000015,
-        "output_cost_per_token": 0.000075,
-        "litellm_provider": "bedrock",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_vision": true
-    },
     "anthropic.claude-v1": {
         "max_tokens": 8191, 
         "max_input_tokens": 100000,
@@ -6097,6 +5970,30 @@
         "litellm_provider": "together_ai",
         "mode": "embedding"
     },
+    "together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
+        "input_cost_per_token": 0.00000018,
+        "output_cost_per_token": 0.00000018,
+        "litellm_provider": "together_ai",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "mode": "chat"
+    },
+    "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
+        "input_cost_per_token": 0.00000088,
+        "output_cost_per_token": 0.00000088,
+        "litellm_provider": "together_ai",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "mode": "chat"
+    },
+    "together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {
+        "input_cost_per_token": 0.0000035,
+        "output_cost_per_token": 0.0000035,
+        "litellm_provider": "together_ai",
+        "supports_function_calling": true,
+        "supports_parallel_function_calling": true,
+        "mode": "chat"
+    },
     "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
         "input_cost_per_token": 0.0000006,
         "output_cost_per_token": 0.0000006,
diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py
index 5004d45994..143a989c18 100644
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@@ -23,6 +23,34 @@ from litellm.utils import (
 from abc import ABC, abstractmethod
 
 
+def _usage_format_tests(usage: litellm.Usage):
+    """
+    OpenAI prompt caching
+    - prompt_tokens = sum of non-cache hit tokens + cache-hit tokens
+    - total_tokens = prompt_tokens + completion_tokens
+
+    Example
+    ```
+    "usage": {
+        "prompt_tokens": 2006,
+        "completion_tokens": 300,
+        "total_tokens": 2306,
+        "prompt_tokens_details": {
+            "cached_tokens": 1920
+        },
+        "completion_tokens_details": {
+            "reasoning_tokens": 0
+        }
+        # ANTHROPIC_ONLY #
+        "cache_creation_input_tokens": 0
+    }
+    ```
+    """
+    assert usage.total_tokens == usage.prompt_tokens + usage.completion_tokens
+
+    assert usage.prompt_tokens > usage.prompt_tokens_details.cached_tokens
+
+
 class BaseLLMChatTest(ABC):
     """
     Abstract base test class that enforces a common test across all test classes.
@@ -273,6 +301,78 @@ class BaseLLMChatTest(ABC):
         response = litellm.completion(**base_completion_call_args, messages=messages)
         assert response is not None
 
+    def test_prompt_caching(self):
+        litellm.set_verbose = True
+        from litellm.utils import supports_prompt_caching
+
+        os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+        litellm.model_cost = litellm.get_model_cost_map(url="")
+
+        base_completion_call_args = self.get_base_completion_call_args()
+        if not supports_prompt_caching(base_completion_call_args["model"], None):
+            print("Model does not support prompt caching")
+            pytest.skip("Model does not support prompt caching")
+
+        try:
+            for _ in range(2):
+                response = litellm.completion(
+                    **base_completion_call_args,
+                    messages=[
+                        # System Message
+                        {
+                            "role": "system",
+                            "content": [
+                                {
+                                    "type": "text",
+                                    "text": "Here is the full text of a complex legal agreement"
+                                    * 400,
+                                    "cache_control": {"type": "ephemeral"},
+                                }
+                            ],
+                        },
+                        # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
+                        {
+                            "role": "user",
+                            "content": [
+                                {
+                                    "type": "text",
+                                    "text": "What are the key terms and conditions in this agreement?",
+                                    "cache_control": {"type": "ephemeral"},
+                                }
+                            ],
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
+                        },
+                        # The final turn is marked with cache-control, for continuing in followups.
+                        {
+                            "role": "user",
+                            "content": [
+                                {
+                                    "type": "text",
+                                    "text": "What are the key terms and conditions in this agreement?",
+                                    "cache_control": {"type": "ephemeral"},
+                                }
+                            ],
+                        },
+                    ],
+                    temperature=0.2,
+                    max_tokens=10,
+                )
+
+                _usage_format_tests(response.usage)
+
+            print("response=", response)
+            print("response.usage=", response.usage)
+
+            _usage_format_tests(response.usage)
+
+            assert "prompt_tokens_details" in response.usage
+            assert response.usage.prompt_tokens_details.cached_tokens > 0
+        except litellm.InternalServerError:
+            pass
+
     @pytest.fixture
     def pdf_messages(self):
         import base64
diff --git a/tests/llm_translation/base_rerank_unit_tests.py b/tests/llm_translation/base_rerank_unit_tests.py
index 2a8b801946..54f6009fc6 100644
--- a/tests/llm_translation/base_rerank_unit_tests.py
+++ b/tests/llm_translation/base_rerank_unit_tests.py
@@ -79,6 +79,7 @@ class BaseLLMRerankTest(ABC):
     @pytest.mark.asyncio()
     @pytest.mark.parametrize("sync_mode", [True, False])
     async def test_basic_rerank(self, sync_mode):
+        litellm.set_verbose = True
         rerank_call_args = self.get_base_rerank_call_args()
         custom_llm_provider = self.get_custom_llm_provider()
         if sync_mode is True:
@@ -86,7 +87,7 @@ class BaseLLMRerankTest(ABC):
                 **rerank_call_args,
                 query="hello",
                 documents=["hello", "world"],
-                top_n=3,
+                top_n=2,
             )
 
             print("re rank response: ", response)
@@ -102,7 +103,7 @@ class BaseLLMRerankTest(ABC):
                 **rerank_call_args,
                 query="hello",
                 documents=["hello", "world"],
-                top_n=3,
+                top_n=2,
             )
 
             print("async re rank response: ", response)
diff --git a/tests/llm_translation/test_anthropic_completion.py b/tests/llm_translation/test_anthropic_completion.py
index b5a10953d4..32d0ef1a4f 100644
--- a/tests/llm_translation/test_anthropic_completion.py
+++ b/tests/llm_translation/test_anthropic_completion.py
@@ -666,7 +666,7 @@ from litellm import completion
 
 class TestAnthropicCompletion(BaseLLMChatTest):
     def get_base_completion_call_args(self) -> dict:
-        return {"model": "claude-3-haiku-20240307"}
+        return {"model": "anthropic/claude-3-5-sonnet-20240620"}
 
     def test_tool_call_no_arguments(self, tool_call_no_arguments):
         """Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833"""
diff --git a/tests/llm_translation/test_bedrock_completion.py b/tests/llm_translation/test_bedrock_completion.py
index 8a5e6818fc..60b3d2583c 100644
--- a/tests/llm_translation/test_bedrock_completion.py
+++ b/tests/llm_translation/test_bedrock_completion.py
@@ -1,3 +1,7 @@
+"""
+Tests Bedrock Completion + Rerank endpoints
+"""
+
 # @pytest.mark.skip(reason="AWS Suspended Account")
 import os
 import sys
@@ -31,6 +35,7 @@ from litellm.llms.bedrock.chat import BedrockLLM
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.llms.prompt_templates.factory import _bedrock_tools_pt
 from base_llm_unit_tests import BaseLLMChatTest
+from base_rerank_unit_tests import BaseLLMRerankTest
 
 # litellm.num_retries = 3
 litellm.cache = None
@@ -1971,13 +1976,67 @@ def test_bedrock_base_model_helper():
     assert model == "us.amazon.nova-pro-v1:0"
 
 
+@pytest.mark.parametrize(
+    "messages, expected_cache_control",
+    [
+        (
+            [  # test system prompt cache
+                {
+                    "role": "system",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "You are an AI assistant tasked with analyzing legal documents.",
+                        },
+                        {
+                            "type": "text",
+                            "text": "Here is the full text of a complex legal agreement",
+                            "cache_control": {"type": "ephemeral"},
+                        },
+                    ],
+                },
+                {
+                    "role": "user",
+                    "content": "what are the key terms and conditions in this agreement?",
+                },
+            ],
+            True,
+        ),
+        (
+            [  # test user prompt cache
+                {
+                    "role": "user",
+                    "content": "what are the key terms and conditions in this agreement?",
+                    "cache_control": {"type": "ephemeral"},
+                },
+            ],
+            True,
+        ),
+    ],
+)
+def test_bedrock_prompt_caching_message(messages, expected_cache_control):
+    import litellm
+    import json
+
+    transformed_messages = litellm.AmazonConverseConfig()._transform_request(
+        model="bedrock/anthropic.claude-3-5-haiku-20241022-v1:0",
+        messages=messages,
+        optional_params={},
+        litellm_params={},
+    )
+    if expected_cache_control:
+        assert "cachePoint" in json.dumps(transformed_messages)
+    else:
+        assert "cachePoint" not in json.dumps(transformed_messages)
+
+
 class TestBedrockConverseChat(BaseLLMChatTest):
     def get_base_completion_call_args(self) -> dict:
         os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
         litellm.model_cost = litellm.get_model_cost_map(url="")
         litellm.add_known_models()
         return {
-            "model": "bedrock/us.anthropic.claude-3-haiku-20240307-v1:0",
+            "model": "bedrock/anthropic.claude-3-5-haiku-20241022-v1:0",
         }
 
     def test_tool_call_no_arguments(self, tool_call_no_arguments):
@@ -1991,3 +2050,19 @@ class TestBedrockConverseChat(BaseLLMChatTest):
         Todo: if litellm.modify_params is True ensure it's a valid utf-8 sequence
         """
         pass
+
+    def test_prompt_caching(self):
+        """
+        Remove override once we have access to Bedrock prompt caching
+        """
+        pass
+
+
+class TestBedrockRerank(BaseLLMRerankTest):
+    def get_custom_llm_provider(self) -> litellm.LlmProviders:
+        return litellm.LlmProviders.BEDROCK
+
+    def get_base_rerank_call_args(self) -> dict:
+        return {
+            "model": "bedrock/arn:aws:bedrock:us-west-2::foundation-model/amazon.rerank-v1:0",
+        }
diff --git a/tests/llm_translation/test_together_ai.py b/tests/llm_translation/test_together_ai.py
new file mode 100644
index 0000000000..b83a700002
--- /dev/null
+++ b/tests/llm_translation/test_together_ai.py
@@ -0,0 +1,58 @@
+"""
+Test TogetherAI LLM
+"""
+
+from base_llm_unit_tests import BaseLLMChatTest
+import json
+import os
+import sys
+from datetime import datetime
+from unittest.mock import AsyncMock
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+import litellm
+import pytest
+
+
+class TestTogetherAI(BaseLLMChatTest):
+    def get_base_completion_call_args(self) -> dict:
+        litellm.set_verbose = True
+        return {"model": "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1"}
+
+    def test_tool_call_no_arguments(self, tool_call_no_arguments):
+        """Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833"""
+        pass
+
+    def test_multilingual_requests(self):
+        """
+        Mistral API raises a 400 BadRequest error when the request contains invalid utf-8 sequences.
+        """
+        pass
+
+    @pytest.mark.parametrize(
+        "model, expected_bool",
+        [
+            ("meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", True),
+            ("nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", False),
+        ],
+    )
+    def test_get_supported_response_format_together_ai(
+        self, model: str, expected_bool: bool
+    ) -> None:
+        os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+        litellm.model_cost = litellm.get_model_cost_map(url="")
+        optional_params = litellm.get_supported_openai_params(
+            model, custom_llm_provider="together_ai"
+        )
+        # Mapped provider
+        assert isinstance(optional_params, list)
+
+        if expected_bool:
+            assert "response_format" in optional_params
+            assert "tools" in optional_params
+        else:
+            assert "response_format" not in optional_params
+            assert "tools" not in optional_params
diff --git a/tests/local_testing/test_caching_handler.py b/tests/local_testing/test_caching_handler.py
index 11f7831bc3..b2c8022649 100644
--- a/tests/local_testing/test_caching_handler.py
+++ b/tests/local_testing/test_caching_handler.py
@@ -197,7 +197,7 @@ async def test_async_log_cache_hit_on_callbacks():
         ),
         (
             CallTypes.rerank.value,
-            {"id": "test", "results": [{"index": 0, "score": 0.9}]},
+            {"id": "test", "results": [{"index": 0, "relevance_score": 0.9}]},
             RerankResponse,
         ),
         (
diff --git a/tests/local_testing/test_prompt_caching.py b/tests/local_testing/test_prompt_caching.py
index c73bda04ec..11c4d583d0 100644
--- a/tests/local_testing/test_prompt_caching.py
+++ b/tests/local_testing/test_prompt_caching.py
@@ -38,76 +38,6 @@ def _usage_format_tests(usage: litellm.Usage):
     assert usage.prompt_tokens > usage.prompt_tokens_details.cached_tokens
 
 
-@pytest.mark.parametrize(
-    "model",
-    [
-        "anthropic/claude-3-5-sonnet-20240620",
-        # "openai/gpt-4o",
-        # "deepseek/deepseek-chat",
-    ],
-)
-def test_prompt_caching_model(model):
-    try:
-        for _ in range(2):
-            response = litellm.completion(
-                model=model,
-                messages=[
-                    # System Message
-                    {
-                        "role": "system",
-                        "content": [
-                            {
-                                "type": "text",
-                                "text": "Here is the full text of a complex legal agreement"
-                                * 400,
-                                "cache_control": {"type": "ephemeral"},
-                            }
-                        ],
-                    },
-                    # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
-                    {
-                        "role": "user",
-                        "content": [
-                            {
-                                "type": "text",
-                                "text": "What are the key terms and conditions in this agreement?",
-                                "cache_control": {"type": "ephemeral"},
-                            }
-                        ],
-                    },
-                    {
-                        "role": "assistant",
-                        "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
-                    },
-                    # The final turn is marked with cache-control, for continuing in followups.
-                    {
-                        "role": "user",
-                        "content": [
-                            {
-                                "type": "text",
-                                "text": "What are the key terms and conditions in this agreement?",
-                                "cache_control": {"type": "ephemeral"},
-                            }
-                        ],
-                    },
-                ],
-                temperature=0.2,
-                max_tokens=10,
-            )
-
-            _usage_format_tests(response.usage)
-
-        print("response=", response)
-        print("response.usage=", response.usage)
-
-        _usage_format_tests(response.usage)
-
-        assert "prompt_tokens_details" in response.usage
-        assert response.usage.prompt_tokens_details.cached_tokens > 0
-    except litellm.InternalServerError:
-        pass
-
-
 def test_supports_prompt_caching():
     from litellm.utils import supports_prompt_caching
 
diff --git a/tests/logging_callback_tests/test_log_db_redis_services.py b/tests/logging_callback_tests/test_log_db_redis_services.py
index 9824e1a5bb..fa0c3b595a 100644
--- a/tests/logging_callback_tests/test_log_db_redis_services.py
+++ b/tests/logging_callback_tests/test_log_db_redis_services.py
@@ -185,3 +185,22 @@ async def test_log_db_metrics_failure_error_types(exception, should_log):
         else:
             # Assert failure was NOT logged for non-DB errors
             mock_proxy_logging.service_logging_obj.async_service_failure_hook.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_dd_log_db_spend_failure_metrics():
+    from litellm._service_logger import ServiceLogging
+    from litellm.integrations.datadog.datadog import DataDogLogger
+
+    dd_logger = DataDogLogger()
+    with patch.object(dd_logger, "async_service_failure_hook", new_callable=AsyncMock):
+        service_logging_obj = ServiceLogging()
+
+        litellm.service_callback = [dd_logger]
+
+        await service_logging_obj.async_service_failure_hook(
+            service=ServiceTypes.DB,
+            call_type="test_call_type",
+            error="test_error",
+            duration=1.0,
+        )