feat: Add moderations create api (#3020)

# What does this PR do?
This PR adds Open AI Compatible moderations api. Currently only
implementing for llama guard safety provider
Image support, expand to other safety providers and Deprecation of
run_shield will be next steps.


## Test Plan
Added 2 new tests for safe/ unsafe text prompt examples for the new open
ai compatible moderations api usage
`SAFETY_MODEL=llama-guard3:8b LLAMA_STACK_CONFIG=starter uv run pytest
-v tests/integration/safety/test_safety.py
--text-model=llama3.2:3b-instruct-fp16
--embedding-model=all-MiniLM-L6-v2 --safety-shield=ollama`
(Had some issue with previous PR
https://github.com/meta-llama/llama-stack/pull/2994 while updating and
accidentally close it , reopened new one )
This commit is contained in:
slekkala1 2025-08-06 13:51:23 -07:00 committed by GitHub
parent 0caef40e0d
commit 26d3d25c87
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 622 additions and 1 deletions

View file

@ -4734,6 +4734,49 @@
}
}
},
"/v1/openai/v1/moderations": {
"post": {
"responses": {
"200": {
"description": "A moderation object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ModerationObject"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Safety"
],
"description": "Classifies if text and/or image inputs are potentially harmful.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/RunModerationRequest"
}
}
},
"required": true
}
}
},
"/v1/safety/run-shield": {
"post": {
"responses": {
@ -16401,6 +16444,131 @@
],
"title": "RunEvalRequest"
},
"RunModerationRequest": {
"type": "object",
"properties": {
"input": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
}
],
"description": "Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models."
},
"model": {
"type": "string",
"description": "The content moderation model you would like to use."
}
},
"additionalProperties": false,
"required": [
"input",
"model"
],
"title": "RunModerationRequest"
},
"ModerationObject": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "The unique identifier for the moderation request."
},
"model": {
"type": "string",
"description": "The model used to generate the moderation results."
},
"results": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ModerationObjectResults"
},
"description": "A list of moderation objects"
}
},
"additionalProperties": false,
"required": [
"id",
"model",
"results"
],
"title": "ModerationObject",
"description": "A moderation object."
},
"ModerationObjectResults": {
"type": "object",
"properties": {
"flagged": {
"type": "boolean",
"description": "Whether any of the below categories are flagged."
},
"categories": {
"type": "object",
"additionalProperties": {
"type": "boolean"
},
"description": "A list of the categories, and whether they are flagged or not."
},
"category_applied_input_types": {
"type": "object",
"additionalProperties": {
"type": "array",
"items": {
"type": "string"
}
},
"description": "A list of the categories along with the input type(s) that the score applies to."
},
"category_scores": {
"type": "object",
"additionalProperties": {
"type": "number"
},
"description": "A list of the categories along with their scores as predicted by model. Required set of categories that need to be in response - violence - violence/graphic - harassment - harassment/threatening - hate - hate/threatening - illicit - illicit/violent - sexual - sexual/minors - self-harm - self-harm/intent - self-harm/instructions"
},
"user_message": {
"type": "string"
},
"metadata": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"flagged",
"metadata"
],
"title": "ModerationObjectResults",
"description": "A moderation object."
},
"RunShieldRequest": {
"type": "object",
"properties": {