From eab550f7d2409ec6fc487948295466f41cf0f6e6 Mon Sep 17 00:00:00 2001
From: Jash Gulabrai <37194352+JashG@users.noreply.github.com>
Date: Wed, 30 Apr 2025 12:01:28 -0400
Subject: [PATCH] fix: Fix messages format in NVIDIA safety check request body
 (#2063)

# What does this PR do?
When running a Llama Stack server and invoking the
`/v1/safety/run-shield` endpoint, the NVIDIA Guardrails endpoint in some
cases errors with a `422: Unprocessable Entity` due to malformed input.

For example, given an request body like:
```
{
  "model": "test",
  "messages": [
    { "role": "user", "content": "You are stupid." }
  ]
}
```
`convert_pydantic_to_json_value` converts the message to:
```
{ "role": "user", "content": "You are stupid.", "context": null }
```
Which causes NVIDIA Guardrails to return an error `HTTPError: 422 Client
Error: Unprocessable Entity for url:
http://nemo.test/v1/guardrail/checks`, because `context` shouldn't be
included in the body.

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
I ran the Llama Stack server locally and manually verified that the
endpoint now succeeds.

```
message = {"role": "user", "content": "You are stupid."}
response = client.safety.run_shield(messages=[message], shield_id=shield_id, params={})
```
Server logs:
```
14:29:09.656 [START] /v1/safety/run-shield
INFO:     127.0.0.1:54616 - "POST /v1/safety/run-shield HTTP/1.1" 200 OK
14:29:09.918 [END] /v1/safety/run-shield [StatusCode.OK] (262.26ms
```

[//]: # (## Documentation)

Co-authored-by: Jash Gulabrai <jgulabrai@nvidia.com>
---
 .../providers/remote/safety/nvidia/nvidia.py        |  6 +++---
 tests/unit/providers/nvidia/test_safety.py          | 13 ++++++-------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/llama_stack/providers/remote/safety/nvidia/nvidia.py b/llama_stack/providers/remote/safety/nvidia/nvidia.py
index 1ff4a6ad9..13bc212a1 100644
--- a/llama_stack/providers/remote/safety/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/safety/nvidia/nvidia.py
@@ -12,8 +12,8 @@ import requests
 from llama_stack.apis.inference import Message
 from llama_stack.apis.safety import RunShieldResponse, Safety, SafetyViolation, ViolationLevel
 from llama_stack.apis.shields import Shield
-from llama_stack.distribution.library_client import convert_pydantic_to_json_value
 from llama_stack.providers.datatypes import ShieldsProtocolPrivate
+from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new
 
 from .config import NVIDIASafetyConfig
 
@@ -28,7 +28,6 @@ class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate):
         Args:
             config (NVIDIASafetyConfig): The configuration containing the guardrails service URL and config ID.
         """
-        print(f"Initializing NVIDIASafetyAdapter({config.guardrails_service_url})...")
         self.config = config
 
     async def initialize(self) -> None:
@@ -127,9 +126,10 @@ class NeMoGuardrails:
         Raises:
             requests.HTTPError: If the POST request fails.
         """
+        request_messages = [await convert_message_to_openai_dict_new(message) for message in messages]
         request_data = {
             "model": self.model,
-            "messages": convert_pydantic_to_json_value(messages),
+            "messages": request_messages,
             "temperature": self.temperature,
             "top_p": 1,
             "frequency_penalty": 0,
diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py
index e7e1cb3dc..8c74f178b 100644
--- a/tests/unit/providers/nvidia/test_safety.py
+++ b/tests/unit/providers/nvidia/test_safety.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import json
 import os
 import unittest
 from typing import Any
@@ -139,8 +138,8 @@ class TestNVIDIASafetyAdapter(unittest.TestCase):
             data={
                 "model": shield_id,
                 "messages": [
-                    json.loads(messages[0].model_dump_json()),
-                    json.loads(messages[1].model_dump_json()),
+                    {"role": "user", "content": "Hello, how are you?"},
+                    {"role": "assistant", "content": "I'm doing well, thank you for asking!"},
                 ],
                 "temperature": 1.0,
                 "top_p": 1,
@@ -193,8 +192,8 @@ class TestNVIDIASafetyAdapter(unittest.TestCase):
             data={
                 "model": shield_id,
                 "messages": [
-                    json.loads(messages[0].model_dump_json()),
-                    json.loads(messages[1].model_dump_json()),
+                    {"role": "user", "content": "Hello, how are you?"},
+                    {"role": "assistant", "content": "I'm doing well, thank you for asking!"},
                 ],
                 "temperature": 1.0,
                 "top_p": 1,
@@ -269,8 +268,8 @@ class TestNVIDIASafetyAdapter(unittest.TestCase):
             data={
                 "model": shield_id,
                 "messages": [
-                    json.loads(messages[0].model_dump_json()),
-                    json.loads(messages[1].model_dump_json()),
+                    {"role": "user", "content": "Hello, how are you?"},
+                    {"role": "assistant", "content": "I'm doing well, thank you for asking!"},
                 ],
                 "temperature": 1.0,
                 "top_p": 1,