mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-12 04:50:39 +00:00
feat: Add moderations create api (#3020)
# What does this PR do? This PR adds Open AI Compatible moderations api. Currently only implementing for llama guard safety provider Image support, expand to other safety providers and Deprecation of run_shield will be next steps. ## Test Plan Added 2 new tests for safe/ unsafe text prompt examples for the new open ai compatible moderations api usage `SAFETY_MODEL=llama-guard3:8b LLAMA_STACK_CONFIG=starter uv run pytest -v tests/integration/safety/test_safety.py --text-model=llama3.2:3b-instruct-fp16 --embedding-model=all-MiniLM-L6-v2 --safety-shield=ollama` (Had some issue with previous PR https://github.com/meta-llama/llama-stack/pull/2994 while updating and accidentally close it , reopened new one )
This commit is contained in:
parent
0caef40e0d
commit
26d3d25c87
6 changed files with 622 additions and 1 deletions
168
docs/_static/llama-stack-spec.html
vendored
168
docs/_static/llama-stack-spec.html
vendored
|
@ -4734,6 +4734,49 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/moderations": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A moderation object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ModerationObject"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Safety"
|
||||
],
|
||||
"description": "Classifies if text and/or image inputs are potentially harmful.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/RunModerationRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/safety/run-shield": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -16401,6 +16444,131 @@
|
|||
],
|
||||
"title": "RunEvalRequest"
|
||||
},
|
||||
"RunModerationRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models."
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The content moderation model you would like to use."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input",
|
||||
"model"
|
||||
],
|
||||
"title": "RunModerationRequest"
|
||||
},
|
||||
"ModerationObject": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The unique identifier for the moderation request."
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The model used to generate the moderation results."
|
||||
},
|
||||
"results": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ModerationObjectResults"
|
||||
},
|
||||
"description": "A list of moderation objects"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"model",
|
||||
"results"
|
||||
],
|
||||
"title": "ModerationObject",
|
||||
"description": "A moderation object."
|
||||
},
|
||||
"ModerationObjectResults": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"flagged": {
|
||||
"type": "boolean",
|
||||
"description": "Whether any of the below categories are flagged."
|
||||
},
|
||||
"categories": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"description": "A list of the categories, and whether they are flagged or not."
|
||||
},
|
||||
"category_applied_input_types": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"description": "A list of the categories along with the input type(s) that the score applies to."
|
||||
},
|
||||
"category_scores": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "number"
|
||||
},
|
||||
"description": "A list of the categories along with their scores as predicted by model. Required set of categories that need to be in response - violence - violence/graphic - harassment - harassment/threatening - hate - hate/threatening - illicit - illicit/violent - sexual - sexual/minors - self-harm - self-harm/intent - self-harm/instructions"
|
||||
},
|
||||
"user_message": {
|
||||
"type": "string"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"flagged",
|
||||
"metadata"
|
||||
],
|
||||
"title": "ModerationObjectResults",
|
||||
"description": "A moderation object."
|
||||
},
|
||||
"RunShieldRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
124
docs/_static/llama-stack-spec.yaml
vendored
124
docs/_static/llama-stack-spec.yaml
vendored
|
@ -3358,6 +3358,36 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/RunEvalRequest'
|
||||
required: true
|
||||
/v1/openai/v1/moderations:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A moderation object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ModerationObject'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Safety
|
||||
description: >-
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RunModerationRequest'
|
||||
required: true
|
||||
/v1/safety/run-shield:
|
||||
post:
|
||||
responses:
|
||||
|
@ -12184,6 +12214,100 @@ components:
|
|||
required:
|
||||
- benchmark_config
|
||||
title: RunEvalRequest
|
||||
RunModerationRequest:
|
||||
type: object
|
||||
properties:
|
||||
input:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
type: string
|
||||
description: >-
|
||||
Input (or inputs) to classify. Can be a single string, an array of strings,
|
||||
or an array of multi-modal input objects similar to other models.
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
The content moderation model you would like to use.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input
|
||||
- model
|
||||
title: RunModerationRequest
|
||||
ModerationObject:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: >-
|
||||
The unique identifier for the moderation request.
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
The model used to generate the moderation results.
|
||||
results:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ModerationObjectResults'
|
||||
description: A list of moderation objects
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- model
|
||||
- results
|
||||
title: ModerationObject
|
||||
description: A moderation object.
|
||||
ModerationObjectResults:
|
||||
type: object
|
||||
properties:
|
||||
flagged:
|
||||
type: boolean
|
||||
description: >-
|
||||
Whether any of the below categories are flagged.
|
||||
categories:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: boolean
|
||||
description: >-
|
||||
A list of the categories, and whether they are flagged or not.
|
||||
category_applied_input_types:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: >-
|
||||
A list of the categories along with the input type(s) that the score applies
|
||||
to.
|
||||
category_scores:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: number
|
||||
description: >-
|
||||
A list of the categories along with their scores as predicted by model.
|
||||
Required set of categories that need to be in response - violence - violence/graphic
|
||||
- harassment - harassment/threatening - hate - hate/threatening - illicit
|
||||
- illicit/violent - sexual - sexual/minors - self-harm - self-harm/intent
|
||||
- self-harm/instructions
|
||||
user_message:
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- flagged
|
||||
- metadata
|
||||
title: ModerationObjectResults
|
||||
description: A moderation object.
|
||||
RunShieldRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue