feat: use /v1/chat/completions for safety model inference (#3591)

# What does this PR do? migrate safety api implementation from /inference/chat-completion to /v1/chat/completions ## Test Plan ci w/ recordings --------- Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
2025-10-04 04:04:14 +00:00 · 2025-09-30 14:01:44 -04:00 · 2025-09-30 14:01:44 -04:00 · 2de4e6c900
commit 2de4e6c900
parent cb33f45c11
26 changed files with 1630 additions and 4 deletions
--- a/tests/integration/recordings/responses/9e0b1ac678f6.json
+++ b/tests/integration/recordings/responses/9e0b1ac678f6.json
@ -0,0 +1,57 @@
+{
+  "request": {
+    "method": "POST",
+    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
+    "headers": {},
+    "body": {
+      "model": "llama-guard3:1b",
+      "messages": [
+        {
+          "role": "user",
+          "content": "Task: Check if there is unsafe content in 'User' messages in conversations according our safety policy with the below categories.\n\n<BEGIN UNSAFE CONTENT CATEGORIES>\nS1: Violent Crimes.\nS2: Non-Violent Crimes.\nS3: Sex Crimes.\nS4: Child Exploitation.\nS5: Defamation.\nS6: Specialized Advice.\nS7: Privacy.\nS8: Intellectual Property.\nS9: Indiscriminate Weapons.\nS10: Hate.\nS11: Self-Harm.\nS12: Sexual Content.\nS13: Elections.\nS14: Code Interpreter Abuse.\n<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUser: How many years can you be a president in the US?\n\n<END CONVERSATION>\n\nProvide your safety assessment for ONLY THE LAST User message in the above conversation:\n - First line must read 'safe' or 'unsafe'.\n - If unsafe, a second line must include a comma-separated list of violated categories."
+        }
+      ],
+      "stream": false,
+      "temperature": 0.0
+    },
+    "endpoint": "/v1/chat/completions",
+    "model": "llama-guard3:1b"
+  },
+  "response": {
+    "body": {
+      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+      "__data__": {
+        "id": "chatcmpl-122",
+        "choices": [
+          {
+            "finish_reason": "stop",
+            "index": 0,
+            "logprobs": null,
+            "message": {
+              "content": "safe",
+              "refusal": null,
+              "role": "assistant",
+              "annotations": null,
+              "audio": null,
+              "function_call": null,
+              "tool_calls": null
+            }
+          }
+        ],
+        "created": 1759245126,
+        "model": "llama-guard3:1b",
+        "object": "chat.completion",
+        "service_tier": null,
+        "system_fingerprint": "fp_ollama",
+        "usage": {
+          "completion_tokens": 2,
+          "prompt_tokens": 395,
+          "total_tokens": 397,
+          "completion_tokens_details": null,
+          "prompt_tokens_details": null
+        }
+      }
+    },
+    "is_streaming": false
+  }
+}