diff --git a/.github/workflows/ghcr_deploy.yml b/.github/workflows/ghcr_deploy.yml
index f8064cf47..8c8060a92 100644
--- a/.github/workflows/ghcr_deploy.yml
+++ b/.github/workflows/ghcr_deploy.yml
@@ -194,6 +194,8 @@ jobs:
           platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
 
   build-and-push-helm-chart:
+    if: github.event.inputs.release_type  != 'dev'
+    needs: [docker-hub-deploy, build-and-push-image, build-and-push-image-database]
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
@@ -211,9 +213,17 @@ jobs:
       - name: lowercase github.repository_owner
         run: |
           echo "REPO_OWNER=`echo ${{github.repository_owner}} | tr '[:upper:]' '[:lower:]'`" >>${GITHUB_ENV}
+    
       - name: Get LiteLLM Latest Tag
         id: current_app_tag
-        uses: WyriHaximus/github-action-get-previous-tag@v1.3.0
+        shell: bash
+        run: |
+          LATEST_TAG=$(git describe --tags --exclude "*dev*" --abbrev=0)
+          if [ -z "${LATEST_TAG}" ]; then
+            echo "latest_tag=latest" | tee -a $GITHUB_OUTPUT
+          else
+            echo "latest_tag=${LATEST_TAG}" | tee -a $GITHUB_OUTPUT
+          fi
 
       - name: Get last published chart version
         id: current_version
@@ -241,7 +251,7 @@ jobs:
           name: ${{ env.CHART_NAME }}
           repository: ${{ env.REPO_OWNER }}
           tag: ${{ github.event.inputs.chartVersion || steps.bump_version.outputs.next-version || '0.1.0' }}
-          app_version: ${{ steps.current_app_tag.outputs.tag || 'latest' }}
+          app_version: ${{ steps.current_app_tag.outputs.latest_tag }}
           path: deploy/charts/${{ env.CHART_NAME }}
           registry: ${{ env.REGISTRY }}
           registry_username: ${{ github.actor }}
diff --git a/docs/my-website/docs/pass_through/langfuse.md b/docs/my-website/docs/pass_through/langfuse.md
new file mode 100644
index 000000000..8987842f7
--- /dev/null
+++ b/docs/my-website/docs/pass_through/langfuse.md
@@ -0,0 +1,132 @@
+# Langfuse Endpoints (Pass-Through)
+
+Pass-through endpoints for Langfuse - call langfuse endpoints with LiteLLM Virtual Key.
+
+Just replace `https://us.cloud.langfuse.com` with `LITELLM_PROXY_BASE_URL/langfuse` 🚀
+
+#### **Example Usage**
+```python
+from langfuse import Langfuse
+
+langfuse = Langfuse(
+    host="http://localhost:4000/langfuse", # your litellm proxy endpoint
+    public_key="anything",        # no key required since this is a pass through
+    secret_key="LITELLM_VIRTUAL_KEY",        # no key required since this is a pass through
+)
+
+print("sending langfuse trace request")
+trace = langfuse.trace(name="test-trace-litellm-proxy-passthrough")
+print("flushing langfuse request")
+langfuse.flush()
+
+print("flushed langfuse request")
+```
+
+Supports **ALL** Langfuse Endpoints.
+
+[**See All Langfuse Endpoints**](https://api.reference.langfuse.com/)
+
+## Quick Start
+
+Let's log a trace to Langfuse.
+
+1. Add Langfuse Public/Private keys to environment
+
+```bash
+export LANGFUSE_PUBLIC_KEY=""
+export LANGFUSE_PRIVATE_KEY=""
+```
+
+2. Start LiteLLM Proxy 
+
+```bash
+litellm
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+3. Test it! 
+
+Let's log a trace to Langfuse! 
+
+```python
+from langfuse import Langfuse
+
+langfuse = Langfuse(
+    host="http://localhost:4000/langfuse", # your litellm proxy endpoint
+    public_key="anything",        # no key required since this is a pass through
+    secret_key="anything",        # no key required since this is a pass through
+)
+
+print("sending langfuse trace request")
+trace = langfuse.trace(name="test-trace-litellm-proxy-passthrough")
+print("flushing langfuse request")
+langfuse.flush()
+
+print("flushed langfuse request")
+```
+
+
+## Advanced - Use with Virtual Keys 
+
+Pre-requisites
+- [Setup proxy with DB](../proxy/virtual_keys.md#setup)
+
+Use this, to avoid giving developers the raw Google AI Studio key, but still letting them use Google AI Studio endpoints.
+
+### Usage
+
+1. Setup environment
+
+```bash
+export DATABASE_URL=""
+export LITELLM_MASTER_KEY=""
+export LANGFUSE_PUBLIC_KEY=""
+export LANGFUSE_PRIVATE_KEY=""
+```
+
+```bash
+litellm
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+2. Generate virtual key 
+
+```bash
+curl -X POST 'http://0.0.0.0:4000/key/generate' \
+-H 'Authorization: Bearer sk-1234' \
+-H 'Content-Type: application/json' \
+-d '{}'
+```
+
+Expected Response 
+
+```bash
+{
+    ...
+    "key": "sk-1234ewknldferwedojwojw"
+}
+```
+
+3. Test it! 
+
+
+```python
+from langfuse import Langfuse
+
+langfuse = Langfuse(
+    host="http://localhost:4000/langfuse", # your litellm proxy endpoint
+    public_key="anything",        # no key required since this is a pass through
+    secret_key="sk-1234ewknldferwedojwojw",        # no key required since this is a pass through
+)
+
+print("sending langfuse trace request")
+trace = langfuse.trace(name="test-trace-litellm-proxy-passthrough")
+print("flushing langfuse request")
+langfuse.flush()
+
+print("flushed langfuse request")
+```
+
+## [Advanced - Log to separate langfuse projects (by key/team)](../proxy/team_logging.md)
\ No newline at end of file
diff --git a/docs/my-website/docs/proxy/team_logging.md b/docs/my-website/docs/proxy/team_logging.md
index e36cb8f66..ef4ebe591 100644
--- a/docs/my-website/docs/proxy/team_logging.md
+++ b/docs/my-website/docs/proxy/team_logging.md
@@ -207,7 +207,7 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
 -H 'Content-Type: application/json' \
 -d '{
     "metadata": {
-        "logging": {
+        "logging": [{
             "callback_name": "langfuse", # 'otel', 'langfuse', 'lunary'
             "callback_type": "success" # set, if required by integration - future improvement, have logging tools work for success + failure by default 
             "callback_vars": {
@@ -215,7 +215,7 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
                 "langfuse_secret_key": "os.environ/LANGFUSE_SECRET_KEY", # [RECOMMENDED] reference key in proxy environment
                 "langfuse_host": "https://cloud.langfuse.com"
             }
-        }
+        }]
     }
 }'
 
diff --git a/docs/my-website/docs/tutorials/litellm_proxy_aporia.md b/docs/my-website/docs/tutorials/litellm_proxy_aporia.md
index 480c411c0..1fea3037f 100644
--- a/docs/my-website/docs/tutorials/litellm_proxy_aporia.md
+++ b/docs/my-website/docs/tutorials/litellm_proxy_aporia.md
@@ -61,7 +61,7 @@ guardrails:
 
 - `pre_call` Run **before** LLM call, on **input**
 - `post_call` Run **after** LLM call, on **input & output**
-- `during_call` Run **during** LLM call, on **input**
+- `during_call` Run **during** LLM call, on **input** Same as `pre_call` but runs in parallel as LLM call.  Response not returned until guardrail check completes
 
 ## 3. Start LiteLLM Gateway 
 
@@ -72,6 +72,8 @@ litellm --config config.yaml --detailed_debug
 
 ## 4. Test request 
 
+**[Langchain, OpenAI SDK Usage Examples](../proxy/user_keys##request-format)**
+
 <Tabs>
 <TabItem label="Unsuccessful call" value = "not-allowed">
 
@@ -134,12 +136,10 @@ curl -i http://localhost:4000/v1/chat/completions \
 
 </Tabs>
 
-## Advanced
-### Control Guardrails per Project (API Key)
+## 5. Control Guardrails per Project (API Key)
 
-Use this to control what guardrails run per project. In this tutorial we only want the following guardrails to run for 1 project
-- `pre_call_guardrails`: ["aporia-pre-guard"]
-- `post_call_guardrails`: ["aporia-post-guard"]
+Use this to control what guardrails run per project. In this tutorial we only want the following guardrails to run for 1 project (API Key)
+- `guardrails`: ["aporia-pre-guard", "aporia-post-guard"]
 
 **Step 1** Create Key with guardrail settings
 
@@ -151,8 +151,7 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
     -H 'Authorization: Bearer sk-1234' \
     -H 'Content-Type: application/json' \
     -D '{
-            "pre_call_guardrails": ["aporia-pre-guard"],
-            "post_call_guardrails": ["aporia"]
+            "guardrails": ["aporia-pre-guard", "aporia-post-guard"]
         }
     }'
 ```
@@ -166,8 +165,7 @@ curl --location 'http://0.0.0.0:4000/key/update' \
     --header 'Content-Type: application/json' \
     --data '{
         "key": "sk-jNm1Zar7XfNdZXp49Z1kSQ",
-        "pre_call_guardrails": ["aporia"],
-        "post_call_guardrails": ["aporia"]
+        "guardrails": ["aporia-pre-guard", "aporia-post-guard"]
         }
 }'
 ```
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 6501ebd75..1dcaf008e 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -195,7 +195,8 @@ const sidebars = {
         "pass_through/vertex_ai",
         "pass_through/google_ai_studio",
         "pass_through/cohere",
-        "pass_through/bedrock"
+        "pass_through/bedrock",
+        "pass_through/langfuse"
       ],
     },
     "scheduler",
diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py
index 12b60c2c5..9f62bab20 100644
--- a/litellm/llms/ollama.py
+++ b/litellm/llms/ollama.py
@@ -509,16 +509,16 @@ async def ollama_acompletion(
 async def ollama_aembeddings(
     api_base: str,
     model: str,
-    prompts: list,
+    prompts: List[str],
     model_response: litellm.EmbeddingResponse,
     optional_params: dict,
     logging_obj=None,
     encoding=None,
 ):
-    if api_base.endswith("/api/embeddings"):
+    if api_base.endswith("/api/embed"):
         url = api_base
     else:
-        url = f"{api_base}/api/embeddings"
+        url = f"{api_base}/api/embed"
 
     ## Load Config
     config = litellm.OllamaConfig.get_config()
@@ -528,64 +528,53 @@ async def ollama_aembeddings(
         ):  # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
             optional_params[k] = v
 
-    input_data: Dict[str, Any] = {"model": model}
+    data: Dict[str, Any] = {"model": model, "input": prompts}
     special_optional_params = ["truncate", "options", "keep_alive"]
 
     for k, v in optional_params.items():
         if k in special_optional_params:
-            input_data[k] = v
+            data[k] = v
         else:
             # Ensure "options" is a dictionary before updating it
-            input_data.setdefault("options", {})
-            if isinstance(input_data["options"], dict):
-                input_data["options"].update({k: v})
+            data.setdefault("options", {})
+            if isinstance(data["options"], dict):
+                data["options"].update({k: v})
     total_input_tokens = 0
     output_data = []
 
     timeout = aiohttp.ClientTimeout(total=litellm.request_timeout)  # 10 minutes
     async with aiohttp.ClientSession(timeout=timeout) as session:
-        for idx, prompt in enumerate(prompts):
-            data = deepcopy(input_data)
-            data["prompt"] = prompt
-            ## LOGGING
-            logging_obj.pre_call(
-                input=None,
-                api_key=None,
-                additional_args={
-                    "api_base": url,
-                    "complete_input_dict": data,
-                    "headers": {},
-                },
-            )
+        ## LOGGING
+        logging_obj.pre_call(
+            input=None,
+            api_key=None,
+            additional_args={
+                "api_base": url,
+                "complete_input_dict": data,
+                "headers": {},
+            },
+        )
 
-            response = await session.post(url, json=data)
-            if response.status != 200:
-                text = await response.text()
-                raise OllamaError(status_code=response.status, message=text)
+        response = await session.post(url, json=data)
 
-            ## LOGGING
-            logging_obj.post_call(
-                input=prompt,
-                api_key="",
-                original_response=response.text,
-                additional_args={
-                    "headers": None,
-                    "api_base": api_base,
-                },
-            )
+        if response.status != 200:
+            text = await response.text()
+            raise OllamaError(status_code=response.status, message=text)
 
-            response_json = await response.json()
-            embeddings: list[float] = response_json["embedding"]
-            output_data.append(
-                {"object": "embedding", "index": idx, "embedding": embeddings}
-            )
+        response_json = await response.json()
 
-            input_tokens = len(encoding.encode(prompt))
-            total_input_tokens += input_tokens
+        embeddings: List[List[float]] = response_json["embeddings"]
+        for idx, emb in enumerate(embeddings):
+            output_data.append({"object": "embedding", "index": idx, "embedding": emb})
+
+        input_tokens = response_json.get("prompt_eval_count") or len(
+            encoding.encode("".join(prompt for prompt in prompts))
+        )
+        total_input_tokens += input_tokens
 
     model_response.object = "list"
     model_response.data = output_data
-    model_response.model = model
+    model_response.model = "ollama/" + model
     setattr(
         model_response,
         "usage",
diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py
index 7ec21e8bb..c9e691c00 100644
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@@ -2195,7 +2195,7 @@ def _convert_to_bedrock_tool_call_invoke(
 
 def _convert_to_bedrock_tool_call_result(
     message: dict,
-) -> BedrockMessageBlock:
+) -> BedrockContentBlock:
     """
     OpenAI message with a tool result looks like:
     {
@@ -2247,7 +2247,7 @@ def _convert_to_bedrock_tool_call_result(
     )
     content_block = BedrockContentBlock(toolResult=tool_result)
 
-    return BedrockMessageBlock(role="user", content=[content_block])
+    return content_block
 
 
 def _bedrock_converse_messages_pt(
@@ -2289,6 +2289,12 @@ def _bedrock_converse_messages_pt(
 
             msg_i += 1
 
+        ## MERGE CONSECUTIVE TOOL CALL MESSAGES ##
+        while msg_i < len(messages) and messages[msg_i]["role"] == "tool":
+            tool_call_result = _convert_to_bedrock_tool_call_result(messages[msg_i])
+
+            user_content.append(tool_call_result)
+            msg_i += 1
         if user_content:
             contents.append(BedrockMessageBlock(role="user", content=user_content))
         assistant_content: List[BedrockContentBlock] = []
@@ -2332,11 +2338,6 @@ def _bedrock_converse_messages_pt(
                 BedrockMessageBlock(role="assistant", content=assistant_content)
             )
 
-        ## APPEND TOOL CALL MESSAGES ##
-        if msg_i < len(messages) and messages[msg_i]["role"] == "tool":
-            tool_call_result = _convert_to_bedrock_tool_call_result(messages[msg_i])
-            contents.append(tool_call_result)
-            msg_i += 1
         if msg_i == init_msg_i:  # prevent infinite loops
             raise litellm.BadRequestError(
                 message=BAD_MESSAGE_ERROR_STR + f"passed in {messages[msg_i]}",
diff --git a/litellm/llms/text_completion_codestral.py b/litellm/llms/text_completion_codestral.py
index a6865b953..9dbe3bb37 100644
--- a/litellm/llms/text_completion_codestral.py
+++ b/litellm/llms/text_completion_codestral.py
@@ -365,6 +365,7 @@ class CodestralTextCompletion(BaseLLM):
         stream = optional_params.pop("stream", False)
 
         data = {
+            "model": model,
             "prompt": prompt,
             **optional_params,
         }
diff --git a/litellm/main.py b/litellm/main.py
index 12f8cceb5..f2c6df306 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -253,7 +253,7 @@ async def acompletion(
     logit_bias: Optional[dict] = None,
     user: Optional[str] = None,
     # openai v1.0+ new params
-    response_format: Optional[dict] = None,
+    response_format: Optional[Union[dict, Type[BaseModel]]] = None,
     seed: Optional[int] = None,
     tools: Optional[List] = None,
     tool_choice: Optional[str] = None,
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 521a034d6..f72e37278 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,6 +1,4 @@
 model_list:
-  - model_name: gpt-3.5-turbo
+  - model_name: ollama/mistral
     litellm_params:
-      model: azure/chatgpt-v-2
-      api_key: os.environ/AZURE_API_KEY
-      api_base: os.environ/AZURE_API_BASE
\ No newline at end of file
+      model: ollama/mistral
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index 4f9d39d77..75934ee1f 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -587,6 +587,7 @@ class GenerateKeyRequest(GenerateRequestBase):
     send_invite_email: Optional[bool] = None
     model_rpm_limit: Optional[dict] = None
     model_tpm_limit: Optional[dict] = None
+    guardrails: Optional[List[str]] = None
 
 
 class GenerateKeyResponse(GenerateKeyRequest):
diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py
index 8d2c728f5..58a2aa164 100644
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@@ -1269,8 +1269,9 @@ def _get_user_role(
 
 def _get_request_ip_address(
     request: Request, use_x_forwarded_for: Optional[bool] = False
-) -> str:
+) -> Optional[str]:
 
+    client_ip = None
     if use_x_forwarded_for is True and "x-forwarded-for" in request.headers:
         client_ip = request.headers["x-forwarded-for"]
     elif request.client is not None:
diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
index ff5b6dc86..54f217194 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@@ -331,13 +331,33 @@ async def add_litellm_data_to_request(
 
     # Guardrails
     move_guardrails_to_metadata(
-        data=data, _metadata_variable_name=_metadata_variable_name
+        data=data,
+        _metadata_variable_name=_metadata_variable_name,
+        user_api_key_dict=user_api_key_dict,
     )
 
     return data
 
 
-def move_guardrails_to_metadata(data: dict, _metadata_variable_name: str):
+def move_guardrails_to_metadata(
+    data: dict,
+    _metadata_variable_name: str,
+    user_api_key_dict: UserAPIKeyAuth,
+):
+    """
+    Heper to add guardrails from request to metadata
+
+    - If guardrails set on API Key metadata then sets guardrails on request metadata
+    - If guardrails not set on API key, then checks request metadata
+
+    """
+    if user_api_key_dict.metadata:
+        if "guardrails" in user_api_key_dict.metadata:
+            data[_metadata_variable_name]["guardrails"] = user_api_key_dict.metadata[
+                "guardrails"
+            ]
+            return
+
     if "guardrails" in data:
         data[_metadata_variable_name]["guardrails"] = data["guardrails"]
         del data["guardrails"]
diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py
index 79e2dcc2d..1758b416d 100644
--- a/litellm/proxy/management_endpoints/key_management_endpoints.py
+++ b/litellm/proxy/management_endpoints/key_management_endpoints.py
@@ -66,6 +66,7 @@ async def generate_key_fn(
     - budget_duration: Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
     - max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
     - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
+    - guardrails: Optional[List[str]] - List of active guardrails for the key
     - permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
     - model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
     - model_rpm_limit: Optional[dict] - key-specific model rpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific rpm limit.
@@ -321,11 +322,12 @@ async def update_key_fn(
                 detail={"error": f"Team not found, passed team_id={data.team_id}"},
             )
 
+        _metadata_fields = ["model_rpm_limit", "model_tpm_limit", "guardrails"]
         # get non default values for key
         non_default_values = {}
         for k, v in data_json.items():
             # this field gets stored in metadata
-            if key == "model_rpm_limit" or key == "model_tpm_limit":
+            if key in _metadata_fields:
                 continue
             if v is not None and v not in (
                 [],
@@ -366,6 +368,14 @@ async def update_key_fn(
             non_default_values["metadata"] = _metadata
             non_default_values.pop("model_rpm_limit", None)
 
+        if data.guardrails:
+            _metadata = existing_key_row.metadata or {}
+            _metadata["guardrails"] = data.guardrails
+
+            # update values that will be written to the DB
+            non_default_values["metadata"] = _metadata
+            non_default_values.pop("guardrails", None)
+
         response = await prisma_client.update_data(
             token=key, data={**non_default_values, "token": key}
         )
@@ -734,6 +744,7 @@ async def generate_key_helper_fn(
     model_max_budget: Optional[dict] = {},
     model_rpm_limit: Optional[dict] = {},
     model_tpm_limit: Optional[dict] = {},
+    guardrails: Optional[list] = None,
     teams: Optional[list] = None,
     organization_id: Optional[str] = None,
     table_name: Optional[Literal["key", "user"]] = None,
@@ -783,6 +794,9 @@ async def generate_key_helper_fn(
     if model_tpm_limit is not None:
         metadata = metadata or {}
         metadata["model_tpm_limit"] = model_tpm_limit
+    if guardrails is not None:
+        metadata = metadata or {}
+        metadata["guardrails"] = guardrails
 
     metadata_json = json.dumps(metadata)
     model_max_budget_json = json.dumps(model_max_budget)
diff --git a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
index bc16c3555..b50fbb0c5 100644
--- a/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
+++ b/litellm/proxy/pass_through_endpoints/pass_through_endpoints.py
@@ -360,24 +360,22 @@ async def pass_through_request(
 
         # combine url with query params for logging
 
-        # requested_query_params = query_params or request.query_params.__dict__
-        # requested_query_params_str = "&".join(
-        #     f"{k}={v}" for k, v in requested_query_params.items()
-        # )
+        requested_query_params = query_params or request.query_params.__dict__
+        requested_query_params_str = "&".join(
+            f"{k}={v}" for k, v in requested_query_params.items()
+        )
 
-        requested_query_params = None
-
-        # if "?" in str(url):
-        #     logging_url = str(url) + "&" + requested_query_params_str
-        # else:
-        #     logging_url = str(url) + "?" + requested_query_params_str
+        if "?" in str(url):
+            logging_url = str(url) + "&" + requested_query_params_str
+        else:
+            logging_url = str(url) + "?" + requested_query_params_str
 
         logging_obj.pre_call(
             input=[{"role": "user", "content": "no-message-pass-through-endpoint"}],
             api_key="",
             additional_args={
                 "complete_input_dict": _parsed_body,
-                "api_base": str(url),
+                "api_base": str(logging_url),
                 "headers": headers,
             },
         )
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 0fbf10a2b..12069d5e8 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -2350,7 +2350,8 @@ async def initialize(
     config=None,
 ):
     global user_model, user_api_base, user_debug, user_detailed_debug, user_user_max_tokens, user_request_timeout, user_temperature, user_telemetry, user_headers, experimental, llm_model_list, llm_router, general_settings, master_key, user_custom_auth, prisma_client
-    generate_feedback_box()
+    if os.getenv("LITELLM_DONT_SHOW_FEEDBACK_BOX", "").lower() != "true":
+        generate_feedback_box()
     user_model = model
     user_debug = debug
     if debug is True:  # this needs to be first, so users can see Router init debugg
@@ -8065,14 +8066,14 @@ async def login(request: Request):
             return redirect_response
         else:
             raise ProxyException(
-                message=f"Invalid credentials used to access UI. Passed in username: {username}, passed in password: {password}.\nNot valid credentials for {username}",
+                message=f"Invalid credentials used to access UI.\nNot valid credentials for {username}",
                 type=ProxyErrorTypes.auth_error,
                 param="invalid_credentials",
                 code=status.HTTP_401_UNAUTHORIZED,
             )
     else:
         raise ProxyException(
-            message=f"Invalid credentials used to access UI. Passed in username: {username}, passed in password: {password}.\nCheck 'UI_USERNAME', 'UI_PASSWORD' in .env file",
+            message="Invalid credentials used to access UI.\nCheck 'UI_USERNAME', 'UI_PASSWORD' in .env file",
             type=ProxyErrorTypes.auth_error,
             param="invalid_credentials",
             code=status.HTTP_401_UNAUTHORIZED,
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 31bfa9332..9e2066511 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -23,7 +23,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.llms.prompt_templates.factory import anthropic_messages_pt
 
-# litellm.num_retries =3
+# litellm.num_retries=3
 litellm.cache = None
 litellm.success_callback = []
 user_message = "Write a short poem about the sky"
diff --git a/litellm/tests/test_function_calling.py b/litellm/tests/test_function_calling.py
index 6e4e9d3e8..5f97dbf87 100644
--- a/litellm/tests/test_function_calling.py
+++ b/litellm/tests/test_function_calling.py
@@ -1,18 +1,20 @@
-import sys, os
+import os
+import sys
 import traceback
+
 from dotenv import load_dotenv
 
 load_dotenv()
-import os, io
+import io
+import os
 
 sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import pytest
+
 import litellm
-from litellm import embedding, completion, completion_cost, Timeout
-from litellm import RateLimitError
-import pytest
+from litellm import RateLimitError, Timeout, completion, completion_cost, embedding
 
 litellm.num_retries = 0
 litellm.cache = None
@@ -41,7 +43,14 @@ def get_current_weather(location, unit="fahrenheit"):
 
 # In production, this could be your backend API or an external API
 @pytest.mark.parametrize(
-    "model", ["gpt-3.5-turbo-1106", "mistral/mistral-large-latest"]
+    "model",
+    [
+        "gpt-3.5-turbo-1106",
+        "mistral/mistral-large-latest",
+        "claude-3-haiku-20240307",
+        "gemini/gemini-1.5-pro",
+        "anthropic.claude-3-sonnet-20240229-v1:0",
+    ],
 )
 def test_parallel_function_call(model):
     try:
@@ -124,7 +133,12 @@ def test_parallel_function_call(model):
                 )  # extend conversation with function response
             print(f"messages: {messages}")
             second_response = litellm.completion(
-                model=model, messages=messages, temperature=0.2, seed=22
+                model=model,
+                messages=messages,
+                temperature=0.2,
+                seed=22,
+                tools=tools,
+                drop_params=True,
             )  # get a new response from the model where it can see the function response
             print("second response\n", second_response)
     except Exception as e:
diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py
index 907262d48..2641edbb4 100644
--- a/litellm/tests/test_key_generate_prisma.py
+++ b/litellm/tests/test_key_generate_prisma.py
@@ -2770,6 +2770,60 @@ async def test_generate_key_with_model_tpm_limit(prisma_client):
     }
 
 
+@pytest.mark.asyncio()
+async def test_generate_key_with_guardrails(prisma_client):
+    print("prisma client=", prisma_client)
+
+    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    await litellm.proxy.proxy_server.prisma_client.connect()
+    request = GenerateKeyRequest(
+        guardrails=["aporia-pre-call"],
+        metadata={
+            "team": "litellm-team3",
+        },
+    )
+    key = await generate_key_fn(
+        data=request,
+        user_api_key_dict=UserAPIKeyAuth(
+            user_role=LitellmUserRoles.PROXY_ADMIN,
+            api_key="sk-1234",
+            user_id="1234",
+        ),
+    )
+    print("generated key=", key)
+
+    generated_key = key.key
+
+    # use generated key to auth in
+    result = await info_key_fn(key=generated_key)
+    print("result from info_key_fn", result)
+    assert result["key"] == generated_key
+    print("\n info for key=", result["info"])
+    assert result["info"]["metadata"] == {
+        "team": "litellm-team3",
+        "guardrails": ["aporia-pre-call"],
+    }
+
+    # Update model tpm_limit and rpm_limit
+    request = UpdateKeyRequest(
+        key=generated_key,
+        guardrails=["aporia-pre-call", "aporia-post-call"],
+    )
+    _request = Request(scope={"type": "http"})
+    _request._url = URL(url="/update/key")
+
+    await update_key_fn(data=request, request=_request)
+    result = await info_key_fn(key=generated_key)
+    print("result from info_key_fn", result)
+    assert result["key"] == generated_key
+    print("\n info for key=", result["info"])
+    assert result["info"]["metadata"] == {
+        "team": "litellm-team3",
+        "guardrails": ["aporia-pre-call", "aporia-post-call"],
+    }
+
+
 @pytest.mark.asyncio()
 async def test_team_access_groups(prisma_client):
     """
diff --git a/litellm/tests/test_ollama.py b/litellm/tests/test_ollama.py
index 0d0b07672..de41e24b8 100644
--- a/litellm/tests/test_ollama.py
+++ b/litellm/tests/test_ollama.py
@@ -132,6 +132,7 @@ def test_ollama_aembeddings(mock_aembeddings):
 # test_ollama_aembeddings()
 
 
+@pytest.mark.skip(reason="local only test")
 def test_ollama_chat_function_calling():
     import json
 
diff --git a/litellm/tests/test_prompt_factory.py b/litellm/tests/test_prompt_factory.py
index 93e92a792..81339e831 100644
--- a/litellm/tests/test_prompt_factory.py
+++ b/litellm/tests/test_prompt_factory.py
@@ -313,3 +313,78 @@ def test_anthropic_cache_controls_pt():
             assert msg["content"][0]["cache_control"] == {"type": "ephemeral"}
 
     print("translated_messages: ", translated_messages)
+
+
+@pytest.mark.parametrize("provider", ["bedrock", "anthropic"])
+def test_bedrock_parallel_tool_calling_pt(provider):
+    """
+    Make sure parallel tool call blocks are merged correctly - https://github.com/BerriAI/litellm/issues/5277
+    """
+    from litellm.llms.prompt_templates.factory import _bedrock_converse_messages_pt
+    from litellm.types.utils import ChatCompletionMessageToolCall, Function, Message
+
+    messages = [
+        {
+            "role": "user",
+            "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
+        },
+        Message(
+            content="Here are the current weather conditions for San Francisco, Tokyo, and Paris:",
+            role="assistant",
+            tool_calls=[
+                ChatCompletionMessageToolCall(
+                    index=1,
+                    function=Function(
+                        arguments='{"city": "New York"}',
+                        name="get_current_weather",
+                    ),
+                    id="tooluse_XcqEBfm8R-2YVaPhDUHsPQ",
+                    type="function",
+                ),
+                ChatCompletionMessageToolCall(
+                    index=2,
+                    function=Function(
+                        arguments='{"city": "London"}',
+                        name="get_current_weather",
+                    ),
+                    id="tooluse_VB9nk7UGRniVzGcaj6xrAQ",
+                    type="function",
+                ),
+            ],
+            function_call=None,
+        ),
+        {
+            "tool_call_id": "tooluse_XcqEBfm8R-2YVaPhDUHsPQ",
+            "role": "tool",
+            "name": "get_current_weather",
+            "content": "25 degrees celsius.",
+        },
+        {
+            "tool_call_id": "tooluse_VB9nk7UGRniVzGcaj6xrAQ",
+            "role": "tool",
+            "name": "get_current_weather",
+            "content": "28 degrees celsius.",
+        },
+    ]
+
+    if provider == "bedrock":
+        translated_messages = _bedrock_converse_messages_pt(
+            messages=messages,
+            model="anthropic.claude-3-sonnet-20240229-v1:0",
+            llm_provider="bedrock",
+        )
+    else:
+        translated_messages = anthropic_messages_pt(
+            messages=messages,
+            model="claude-3-sonnet-20240229-v1:0",
+            llm_provider=provider,
+        )
+    print(translated_messages)
+
+    number_of_messages = len(translated_messages)
+
+    # assert last 2 messages are not the same role
+    assert (
+        translated_messages[number_of_messages - 1]["role"]
+        != translated_messages[number_of_messages - 2]["role"]
+    )
diff --git a/pyproject.toml b/pyproject.toml
index 355a5ea3c..9498cc128 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.43.18"
+version = "1.43.19"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.43.18"
+version = "1.43.19"
 version_files = [
     "pyproject.toml:^version"
 ]
diff --git a/tests/otel_tests/test_guardrails.py b/tests/otel_tests/test_guardrails.py
index c48a5ba79..7e9ff613a 100644
--- a/tests/otel_tests/test_guardrails.py
+++ b/tests/otel_tests/test_guardrails.py
@@ -22,10 +22,6 @@ async def chat_completion(
     data = {
         "model": model,
         "messages": messages,
-        "guardrails": [
-            "aporia-post-guard",
-            "aporia-pre-guard",
-        ],  # default guardrails for all tests
     }
 
     if guardrails is not None:
@@ -41,7 +37,7 @@ async def chat_completion(
         print()
 
         if status != 200:
-            return response_text
+            raise Exception(response_text)
 
         # response headers
         response_headers = response.headers
@@ -50,6 +46,29 @@ async def chat_completion(
         return await response.json(), response_headers
 
 
+async def generate_key(session, guardrails):
+    url = "http://0.0.0.0:4000/key/generate"
+    headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
+    if guardrails:
+        data = {
+            "guardrails": guardrails,
+        }
+    else:
+        data = {}
+
+    async with session.post(url, headers=headers, json=data) as response:
+        status = response.status
+        response_text = await response.text()
+
+        print(response_text)
+        print()
+
+        if status != 200:
+            raise Exception(f"Request did not return a 200 status code: {status}")
+
+        return await response.json()
+
+
 @pytest.mark.asyncio
 async def test_llm_guard_triggered_safe_request():
     """
@@ -62,6 +81,10 @@ async def test_llm_guard_triggered_safe_request():
             "sk-1234",
             model="fake-openai-endpoint",
             messages=[{"role": "user", "content": f"Hello what's the weather"}],
+            guardrails=[
+                "aporia-post-guard",
+                "aporia-pre-guard",
+            ],
         )
         await asyncio.sleep(3)
 
@@ -90,6 +113,10 @@ async def test_llm_guard_triggered():
                 messages=[
                     {"role": "user", "content": f"Hello my name is ishaan@berri.ai"}
                 ],
+                guardrails=[
+                    "aporia-post-guard",
+                    "aporia-pre-guard",
+                ],
             )
             pytest.fail("Should have thrown an exception")
         except Exception as e:
@@ -116,3 +143,54 @@ async def test_no_llm_guard_triggered():
         print("response=", response, "response headers", headers)
 
         assert "x-litellm-applied-guardrails" not in headers
+
+@pytest.mark.asyncio
+async def test_guardrails_with_api_key_controls():
+    """
+    - Make two API Keys
+        - Key 1 with no guardrails
+        - Key 2 with guardrails
+    - Request to Key 1 -> should be success with no guardrails
+    - Request to Key 2 -> should be error since guardrails are triggered
+    """
+    async with aiohttp.ClientSession() as session:
+        key_with_guardrails = await generate_key(
+            session=session,
+            guardrails=[
+                "aporia-post-guard",
+                "aporia-pre-guard",
+            ],
+        )
+
+        key_with_guardrails = key_with_guardrails["key"]
+
+        key_without_guardrails = await generate_key(session=session, guardrails=None)
+
+        key_without_guardrails = key_without_guardrails["key"]
+
+        # test no guardrails triggered for key without guardrails
+        response, headers = await chat_completion(
+            session,
+            key_without_guardrails,
+            model="fake-openai-endpoint",
+            messages=[{"role": "user", "content": f"Hello what's the weather"}],
+        )
+        await asyncio.sleep(3)
+
+        print("response=", response, "response headers", headers)
+        assert "x-litellm-applied-guardrails" not in headers
+
+        # test guardrails triggered for key with guardrails
+        try:
+            response, headers = await chat_completion(
+                session,
+                key_with_guardrails,
+                model="fake-openai-endpoint",
+                messages=[
+                    {"role": "user", "content": f"Hello my name is ishaan@berri.ai"}
+                ],
+            )
+            pytest.fail("Should have thrown an exception")
+        except Exception as e:
+            print(e)
+            assert "Aporia detected and blocked PII" in str(e)