From eab550f7d2409ec6fc487948295466f41cf0f6e6 Mon Sep 17 00:00:00 2001 From: Jash Gulabrai <37194352+JashG@users.noreply.github.com> Date: Wed, 30 Apr 2025 12:01:28 -0400 Subject: [PATCH] fix: Fix messages format in NVIDIA safety check request body (#2063) # What does this PR do? When running a Llama Stack server and invoking the `/v1/safety/run-shield` endpoint, the NVIDIA Guardrails endpoint in some cases errors with a `422: Unprocessable Entity` due to malformed input. For example, given an request body like: ``` { "model": "test", "messages": [ { "role": "user", "content": "You are stupid." } ] } ``` `convert_pydantic_to_json_value` converts the message to: ``` { "role": "user", "content": "You are stupid.", "context": null } ``` Which causes NVIDIA Guardrails to return an error `HTTPError: 422 Client Error: Unprocessable Entity for url: http://nemo.test/v1/guardrail/checks`, because `context` shouldn't be included in the body. [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan I ran the Llama Stack server locally and manually verified that the endpoint now succeeds. ``` message = {"role": "user", "content": "You are stupid."} response = client.safety.run_shield(messages=[message], shield_id=shield_id, params={}) ``` Server logs: ``` 14:29:09.656 [START] /v1/safety/run-shield INFO: 127.0.0.1:54616 - "POST /v1/safety/run-shield HTTP/1.1" 200 OK 14:29:09.918 [END] /v1/safety/run-shield [StatusCode.OK] (262.26ms ``` [//]: # (## Documentation) Co-authored-by: Jash Gulabrai --- .../providers/remote/safety/nvidia/nvidia.py | 6 +++--- tests/unit/providers/nvidia/test_safety.py | 13 ++++++------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/llama_stack/providers/remote/safety/nvidia/nvidia.py b/llama_stack/providers/remote/safety/nvidia/nvidia.py index 1ff4a6ad9..13bc212a1 100644 --- a/llama_stack/providers/remote/safety/nvidia/nvidia.py +++ b/llama_stack/providers/remote/safety/nvidia/nvidia.py @@ -12,8 +12,8 @@ import requests from llama_stack.apis.inference import Message from llama_stack.apis.safety import RunShieldResponse, Safety, SafetyViolation, ViolationLevel from llama_stack.apis.shields import Shield -from llama_stack.distribution.library_client import convert_pydantic_to_json_value from llama_stack.providers.datatypes import ShieldsProtocolPrivate +from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new from .config import NVIDIASafetyConfig @@ -28,7 +28,6 @@ class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate): Args: config (NVIDIASafetyConfig): The configuration containing the guardrails service URL and config ID. """ - print(f"Initializing NVIDIASafetyAdapter({config.guardrails_service_url})...") self.config = config async def initialize(self) -> None: @@ -127,9 +126,10 @@ class NeMoGuardrails: Raises: requests.HTTPError: If the POST request fails. """ + request_messages = [await convert_message_to_openai_dict_new(message) for message in messages] request_data = { "model": self.model, - "messages": convert_pydantic_to_json_value(messages), + "messages": request_messages, "temperature": self.temperature, "top_p": 1, "frequency_penalty": 0, diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py index e7e1cb3dc..8c74f178b 100644 --- a/tests/unit/providers/nvidia/test_safety.py +++ b/tests/unit/providers/nvidia/test_safety.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import json import os import unittest from typing import Any @@ -139,8 +138,8 @@ class TestNVIDIASafetyAdapter(unittest.TestCase): data={ "model": shield_id, "messages": [ - json.loads(messages[0].model_dump_json()), - json.loads(messages[1].model_dump_json()), + {"role": "user", "content": "Hello, how are you?"}, + {"role": "assistant", "content": "I'm doing well, thank you for asking!"}, ], "temperature": 1.0, "top_p": 1, @@ -193,8 +192,8 @@ class TestNVIDIASafetyAdapter(unittest.TestCase): data={ "model": shield_id, "messages": [ - json.loads(messages[0].model_dump_json()), - json.loads(messages[1].model_dump_json()), + {"role": "user", "content": "Hello, how are you?"}, + {"role": "assistant", "content": "I'm doing well, thank you for asking!"}, ], "temperature": 1.0, "top_p": 1, @@ -269,8 +268,8 @@ class TestNVIDIASafetyAdapter(unittest.TestCase): data={ "model": shield_id, "messages": [ - json.loads(messages[0].model_dump_json()), - json.loads(messages[1].model_dump_json()), + {"role": "user", "content": "Hello, how are you?"}, + {"role": "assistant", "content": "I'm doing well, thank you for asking!"}, ], "temperature": 1.0, "top_p": 1,