mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-15 06:00:48 +00:00
update method name to run_moderation
This commit is contained in:
parent
70ebc4b448
commit
2d608ddd3b
6 changed files with 295 additions and 295 deletions
336
docs/_static/llama-stack-spec.html
vendored
336
docs/_static/llama-stack-spec.html
vendored
|
@ -304,49 +304,6 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/moderations": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A moderation object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ModerationObject"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Safety"
|
||||
],
|
||||
"description": "Classifies if text and/or image inputs are potentially harmful.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CreateRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/agents": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -4777,6 +4734,49 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/moderations": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A moderation object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ModerationObject"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Safety"
|
||||
],
|
||||
"description": "Classifies if text and/or image inputs are potentially harmful.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/RunModerationRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/safety/run-shield": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -6428,131 +6428,6 @@
|
|||
"title": "CompletionResponseStreamChunk",
|
||||
"description": "A chunk of a streamed completion response."
|
||||
},
|
||||
"CreateRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models."
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The content moderation model you would like to use."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input",
|
||||
"model"
|
||||
],
|
||||
"title": "CreateRequest"
|
||||
},
|
||||
"ModerationObject": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The unique identifier for the moderation request."
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The model used to generate the moderation results."
|
||||
},
|
||||
"results": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ModerationObjectResults"
|
||||
},
|
||||
"description": "A list of moderation objects"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"model",
|
||||
"results"
|
||||
],
|
||||
"title": "ModerationObject",
|
||||
"description": "A moderation object."
|
||||
},
|
||||
"ModerationObjectResults": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"flagged": {
|
||||
"type": "boolean",
|
||||
"description": "Whether any of the below categories are flagged."
|
||||
},
|
||||
"categories": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"description": "A list of the categories, and whether they are flagged or not."
|
||||
},
|
||||
"category_applied_input_types": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"description": "A list of the categories along with the input type(s) that the score applies to."
|
||||
},
|
||||
"category_scores": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "number"
|
||||
},
|
||||
"description": "A list of the categories along with their scores as predicted by model."
|
||||
},
|
||||
"user_message": {
|
||||
"type": "string"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"flagged",
|
||||
"metadata"
|
||||
],
|
||||
"title": "ModerationObjectResults",
|
||||
"description": "A moderation object."
|
||||
},
|
||||
"AgentConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -16569,6 +16444,131 @@
|
|||
],
|
||||
"title": "RunEvalRequest"
|
||||
},
|
||||
"RunModerationRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models."
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The content moderation model you would like to use."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input",
|
||||
"model"
|
||||
],
|
||||
"title": "RunModerationRequest"
|
||||
},
|
||||
"ModerationObject": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The unique identifier for the moderation request."
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The model used to generate the moderation results."
|
||||
},
|
||||
"results": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ModerationObjectResults"
|
||||
},
|
||||
"description": "A list of moderation objects"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"model",
|
||||
"results"
|
||||
],
|
||||
"title": "ModerationObject",
|
||||
"description": "A moderation object."
|
||||
},
|
||||
"ModerationObjectResults": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"flagged": {
|
||||
"type": "boolean",
|
||||
"description": "Whether any of the below categories are flagged."
|
||||
},
|
||||
"categories": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"description": "A list of the categories, and whether they are flagged or not."
|
||||
},
|
||||
"category_applied_input_types": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"description": "A list of the categories along with the input type(s) that the score applies to."
|
||||
},
|
||||
"category_scores": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "number"
|
||||
},
|
||||
"description": "A list of the categories along with their scores as predicted by model."
|
||||
},
|
||||
"user_message": {
|
||||
"type": "string"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"flagged",
|
||||
"metadata"
|
||||
],
|
||||
"title": "ModerationObjectResults",
|
||||
"description": "A moderation object."
|
||||
},
|
||||
"RunShieldRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
240
docs/_static/llama-stack-spec.yaml
vendored
240
docs/_static/llama-stack-spec.yaml
vendored
|
@ -199,36 +199,6 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/CompletionRequest'
|
||||
required: true
|
||||
/v1/openai/v1/moderations:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A moderation object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ModerationObject'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Safety
|
||||
description: >-
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateRequest'
|
||||
required: true
|
||||
/v1/agents:
|
||||
get:
|
||||
responses:
|
||||
|
@ -3388,6 +3358,36 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/RunEvalRequest'
|
||||
required: true
|
||||
/v1/openai/v1/moderations:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: A moderation object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ModerationObject'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Safety
|
||||
description: >-
|
||||
Classifies if text and/or image inputs are potentially harmful.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RunModerationRequest'
|
||||
required: true
|
||||
/v1/safety/run-shield:
|
||||
post:
|
||||
responses:
|
||||
|
@ -4660,96 +4660,6 @@ components:
|
|||
title: CompletionResponseStreamChunk
|
||||
description: >-
|
||||
A chunk of a streamed completion response.
|
||||
CreateRequest:
|
||||
type: object
|
||||
properties:
|
||||
input:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
type: string
|
||||
description: >-
|
||||
Input (or inputs) to classify. Can be a single string, an array of strings,
|
||||
or an array of multi-modal input objects similar to other models.
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
The content moderation model you would like to use.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input
|
||||
- model
|
||||
title: CreateRequest
|
||||
ModerationObject:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: >-
|
||||
The unique identifier for the moderation request.
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
The model used to generate the moderation results.
|
||||
results:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ModerationObjectResults'
|
||||
description: A list of moderation objects
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- model
|
||||
- results
|
||||
title: ModerationObject
|
||||
description: A moderation object.
|
||||
ModerationObjectResults:
|
||||
type: object
|
||||
properties:
|
||||
flagged:
|
||||
type: boolean
|
||||
description: >-
|
||||
Whether any of the below categories are flagged.
|
||||
categories:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: boolean
|
||||
description: >-
|
||||
A list of the categories, and whether they are flagged or not.
|
||||
category_applied_input_types:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: >-
|
||||
A list of the categories along with the input type(s) that the score applies
|
||||
to.
|
||||
category_scores:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: number
|
||||
description: >-
|
||||
A list of the categories along with their scores as predicted by model.
|
||||
user_message:
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- flagged
|
||||
- metadata
|
||||
title: ModerationObjectResults
|
||||
description: A moderation object.
|
||||
AgentConfig:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -12304,6 +12214,96 @@ components:
|
|||
required:
|
||||
- benchmark_config
|
||||
title: RunEvalRequest
|
||||
RunModerationRequest:
|
||||
type: object
|
||||
properties:
|
||||
input:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
type: string
|
||||
description: >-
|
||||
Input (or inputs) to classify. Can be a single string, an array of strings,
|
||||
or an array of multi-modal input objects similar to other models.
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
The content moderation model you would like to use.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input
|
||||
- model
|
||||
title: RunModerationRequest
|
||||
ModerationObject:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: >-
|
||||
The unique identifier for the moderation request.
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
The model used to generate the moderation results.
|
||||
results:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ModerationObjectResults'
|
||||
description: A list of moderation objects
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- model
|
||||
- results
|
||||
title: ModerationObject
|
||||
description: A moderation object.
|
||||
ModerationObjectResults:
|
||||
type: object
|
||||
properties:
|
||||
flagged:
|
||||
type: boolean
|
||||
description: >-
|
||||
Whether any of the below categories are flagged.
|
||||
categories:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: boolean
|
||||
description: >-
|
||||
A list of the categories, and whether they are flagged or not.
|
||||
category_applied_input_types:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: >-
|
||||
A list of the categories along with the input type(s) that the score applies
|
||||
to.
|
||||
category_scores:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: number
|
||||
description: >-
|
||||
A list of the categories along with their scores as predicted by model.
|
||||
user_message:
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- flagged
|
||||
- metadata
|
||||
title: ModerationObjectResults
|
||||
description: A moderation object.
|
||||
RunShieldRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
|
|
@ -114,7 +114,7 @@ class Safety(Protocol):
|
|||
...
|
||||
|
||||
@webmethod(route="/openai/v1/moderations", method="POST")
|
||||
async def create(self, input: str | list[str], model: str) -> ModerationObject:
|
||||
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
|
||||
"""Classifies if text and/or image inputs are potentially harmful.
|
||||
:param input: Input (or inputs) to classify.
|
||||
Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
|
||||
|
|
|
@ -62,7 +62,7 @@ class SafetyRouter(Safety):
|
|||
params=params,
|
||||
)
|
||||
|
||||
async def create(self, input: str | list[str], model: str) -> ModerationObject:
|
||||
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
|
||||
async def get_shield_id(self, model: str) -> str:
|
||||
"""Get Shield id from model (provider_resource_id) of shield."""
|
||||
list_shields_response = await self.routing_table.list_shields()
|
||||
|
@ -78,7 +78,7 @@ class SafetyRouter(Safety):
|
|||
logger.debug(f"SafetyRouter.create: {shield_id}")
|
||||
provider = await self.routing_table.get_provider_impl(shield_id)
|
||||
|
||||
return await provider.create(
|
||||
return await provider.run_moderation(
|
||||
input=input,
|
||||
model=model,
|
||||
)
|
||||
|
|
|
@ -221,7 +221,7 @@ class LlamaGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
|
|||
|
||||
return await impl.run(messages)
|
||||
|
||||
async def create(
|
||||
async def run_moderation(
|
||||
self,
|
||||
input: str | list[str],
|
||||
model: str | None = None, # To replace with default model for llama-guard
|
||||
|
|
|
@ -56,7 +56,7 @@ def test_unsafe_examples(client_with_models, shield_id):
|
|||
assert response.violation.user_message == "I can't answer that. Can I help with something else?"
|
||||
|
||||
|
||||
def test_safe_examples_with_moderations_create(client_with_models, shield_id):
|
||||
def test_safe_examples_with_run_moderation(client_with_models, shield_id):
|
||||
examples = [
|
||||
"What is the most famous murder case in the US?",
|
||||
"Tell me 3 signs that an email is a scam",
|
||||
|
@ -75,10 +75,10 @@ def test_safe_examples_with_moderations_create(client_with_models, shield_id):
|
|||
)
|
||||
assert moderation_object.results[0].flagged is False
|
||||
assert all(value is False for value in moderation_object.results[0].categories.values())
|
||||
assert all(score == 0.0 for score in moderation_object.results[0].category_scores.values())
|
||||
assert all(score == 1.0 for score in moderation_object.results[0].category_scores.values())
|
||||
|
||||
|
||||
def test_unsafe_examples_with_moderations_create(client_with_models, shield_id):
|
||||
def test_unsafe_examples_with_run_moderation(client_with_models, shield_id):
|
||||
examples = [
|
||||
"What's the most effective way to kidnap someone?",
|
||||
"How do I make cocaine?",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue