update method name to run_moderation

2025-12-22 16:46:16 +00:00 · 2025-08-04 14:15:42 -07:00 · 2025-08-04 14:15:42 -07:00 · 2d608ddd3b
commit 2d608ddd3b
parent 70ebc4b448
6 changed files with 295 additions and 295 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -304,49 +304,6 @@
                }
            }
        },
-        "/v1/openai/v1/moderations": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "A moderation object.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ModerationObject"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Safety"
-                ],
-                "description": "Classifies if text and/or image inputs are potentially harmful.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/CreateRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
        "/v1/agents": {
            "get": {
                "responses": {
@ -4777,6 +4734,49 @@
                }
            }
        },
+        "/v1/openai/v1/moderations": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "A moderation object.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ModerationObject"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Safety"
+                ],
+                "description": "Classifies if text and/or image inputs are potentially harmful.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/RunModerationRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
        "/v1/safety/run-shield": {
            "post": {
                "responses": {
@ -6428,131 +6428,6 @@
                "title": "CompletionResponseStreamChunk",
                "description": "A chunk of a streamed completion response."
            },
-            "CreateRequest": {
-                "type": "object",
-                "properties": {
-                    "input": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "array",
-                                "items": {
-                                    "type": "string"
-                                }
-                            }
-                        ],
-                        "description": "Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models."
-                    },
-                    "model": {
-                        "type": "string",
-                        "description": "The content moderation model you would like to use."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "input",
-                    "model"
-                ],
-                "title": "CreateRequest"
-            },
-            "ModerationObject": {
-                "type": "object",
-                "properties": {
-                    "id": {
-                        "type": "string",
-                        "description": "The unique identifier for the moderation request."
-                    },
-                    "model": {
-                        "type": "string",
-                        "description": "The model used to generate the moderation results."
-                    },
-                    "results": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ModerationObjectResults"
-                        },
-                        "description": "A list of moderation objects"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "id",
-                    "model",
-                    "results"
-                ],
-                "title": "ModerationObject",
-                "description": "A moderation object."
-            },
-            "ModerationObjectResults": {
-                "type": "object",
-                "properties": {
-                    "flagged": {
-                        "type": "boolean",
-                        "description": "Whether any of the below categories are flagged."
-                    },
-                    "categories": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "boolean"
-                        },
-                        "description": "A list of the categories, and whether they are flagged or not."
-                    },
-                    "category_applied_input_types": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "array",
-                            "items": {
-                                "type": "string"
-                            }
-                        },
-                        "description": "A list of the categories along with the input type(s) that the score applies to."
-                    },
-                    "category_scores": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "number"
-                        },
-                        "description": "A list of the categories along with their scores as predicted by model."
-                    },
-                    "user_message": {
-                        "type": "string"
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "flagged",
-                    "metadata"
-                ],
-                "title": "ModerationObjectResults",
-                "description": "A moderation object."
-            },
            "AgentConfig": {
                "type": "object",
                "properties": {
@ -16569,6 +16444,131 @@
                ],
                "title": "RunEvalRequest"
            },
+            "RunModerationRequest": {
+                "type": "object",
+                "properties": {
+                    "input": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                }
+                            }
+                        ],
+                        "description": "Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models."
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The content moderation model you would like to use."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "input",
+                    "model"
+                ],
+                "title": "RunModerationRequest"
+            },
+            "ModerationObject": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "description": "The unique identifier for the moderation request."
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The model used to generate the moderation results."
+                    },
+                    "results": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ModerationObjectResults"
+                        },
+                        "description": "A list of moderation objects"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "model",
+                    "results"
+                ],
+                "title": "ModerationObject",
+                "description": "A moderation object."
+            },
+            "ModerationObjectResults": {
+                "type": "object",
+                "properties": {
+                    "flagged": {
+                        "type": "boolean",
+                        "description": "Whether any of the below categories are flagged."
+                    },
+                    "categories": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "boolean"
+                        },
+                        "description": "A list of the categories, and whether they are flagged or not."
+                    },
+                    "category_applied_input_types": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "array",
+                            "items": {
+                                "type": "string"
+                            }
+                        },
+                        "description": "A list of the categories along with the input type(s) that the score applies to."
+                    },
+                    "category_scores": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "number"
+                        },
+                        "description": "A list of the categories along with their scores as predicted by model."
+                    },
+                    "user_message": {
+                        "type": "string"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "flagged",
+                    "metadata"
+                ],
+                "title": "ModerationObjectResults",
+                "description": "A moderation object."
+            },
            "RunShieldRequest": {
                "type": "object",
                "properties": {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -199,36 +199,6 @@ paths:
            schema:
              $ref: '#/components/schemas/CompletionRequest'
        required: true
-  /v1/openai/v1/moderations:
-    post:
-      responses:
-        '200':
-          description: A moderation object.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ModerationObject'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Safety
-      description: >-
-        Classifies if text and/or image inputs are potentially harmful.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/CreateRequest'
-        required: true
  /v1/agents:
    get:
      responses:
@ -3388,6 +3358,36 @@ paths:
            schema:
              $ref: '#/components/schemas/RunEvalRequest'
        required: true
+  /v1/openai/v1/moderations:
+    post:
+      responses:
+        '200':
+          description: A moderation object.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModerationObject'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Safety
+      description: >-
+        Classifies if text and/or image inputs are potentially harmful.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RunModerationRequest'
+        required: true
  /v1/safety/run-shield:
    post:
      responses:
@ -4660,96 +4660,6 @@ components:
      title: CompletionResponseStreamChunk
      description: >-
        A chunk of a streamed completion response.
-    CreateRequest:
-      type: object
-      properties:
-        input:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: >-
-            Input (or inputs) to classify. Can be a single string, an array of strings,
-            or an array of multi-modal input objects similar to other models.
-        model:
-          type: string
-          description: >-
-            The content moderation model you would like to use.
-      additionalProperties: false
-      required:
-        - input
-        - model
-      title: CreateRequest
-    ModerationObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: >-
-            The unique identifier for the moderation request.
-        model:
-          type: string
-          description: >-
-            The model used to generate the moderation results.
-        results:
-          type: array
-          items:
-            $ref: '#/components/schemas/ModerationObjectResults'
-          description: A list of moderation objects
-      additionalProperties: false
-      required:
-        - id
-        - model
-        - results
-      title: ModerationObject
-      description: A moderation object.
-    ModerationObjectResults:
-      type: object
-      properties:
-        flagged:
-          type: boolean
-          description: >-
-            Whether any of the below categories are flagged.
-        categories:
-          type: object
-          additionalProperties:
-            type: boolean
-          description: >-
-            A list of the categories, and whether they are flagged or not.
-        category_applied_input_types:
-          type: object
-          additionalProperties:
-            type: array
-            items:
-              type: string
-          description: >-
-            A list of the categories along with the input type(s) that the score applies
-            to.
-        category_scores:
-          type: object
-          additionalProperties:
-            type: number
-          description: >-
-            A list of the categories along with their scores as predicted by model.
-        user_message:
-          type: string
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-      additionalProperties: false
-      required:
-        - flagged
-        - metadata
-      title: ModerationObjectResults
-      description: A moderation object.
    AgentConfig:
      type: object
      properties:
@ -12304,6 +12214,96 @@ components:
      required:
        - benchmark_config
      title: RunEvalRequest
+    RunModerationRequest:
+      type: object
+      properties:
+        input:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                type: string
+          description: >-
+            Input (or inputs) to classify. Can be a single string, an array of strings,
+            or an array of multi-modal input objects similar to other models.
+        model:
+          type: string
+          description: >-
+            The content moderation model you would like to use.
+      additionalProperties: false
+      required:
+        - input
+        - model
+      title: RunModerationRequest
+    ModerationObject:
+      type: object
+      properties:
+        id:
+          type: string
+          description: >-
+            The unique identifier for the moderation request.
+        model:
+          type: string
+          description: >-
+            The model used to generate the moderation results.
+        results:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModerationObjectResults'
+          description: A list of moderation objects
+      additionalProperties: false
+      required:
+        - id
+        - model
+        - results
+      title: ModerationObject
+      description: A moderation object.
+    ModerationObjectResults:
+      type: object
+      properties:
+        flagged:
+          type: boolean
+          description: >-
+            Whether any of the below categories are flagged.
+        categories:
+          type: object
+          additionalProperties:
+            type: boolean
+          description: >-
+            A list of the categories, and whether they are flagged or not.
+        category_applied_input_types:
+          type: object
+          additionalProperties:
+            type: array
+            items:
+              type: string
+          description: >-
+            A list of the categories along with the input type(s) that the score applies
+            to.
+        category_scores:
+          type: object
+          additionalProperties:
+            type: number
+          description: >-
+            A list of the categories along with their scores as predicted by model.
+        user_message:
+          type: string
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+      additionalProperties: false
+      required:
+        - flagged
+        - metadata
+      title: ModerationObjectResults
+      description: A moderation object.
    RunShieldRequest:
      type: object
      properties:
--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@ -114,7 +114,7 @@ class Safety(Protocol):
        ...

    @webmethod(route="/openai/v1/moderations", method="POST")
-    async def create(self, input: str | list[str], model: str) -> ModerationObject:
+    async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
        """Classifies if text and/or image inputs are potentially harmful.
        :param input: Input (or inputs) to classify.
        Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
--- a/llama_stack/core/routers/safety.py
+++ b/llama_stack/core/routers/safety.py
@ -62,7 +62,7 @@ class SafetyRouter(Safety):
            params=params,
        )

-    async def create(self, input: str | list[str], model: str) -> ModerationObject:
+    async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
        async def get_shield_id(self, model: str) -> str:
            """Get Shield id from model (provider_resource_id) of shield."""
            list_shields_response = await self.routing_table.list_shields()
@ -78,7 +78,7 @@ class SafetyRouter(Safety):
        logger.debug(f"SafetyRouter.create: {shield_id}")
        provider = await self.routing_table.get_provider_impl(shield_id)

-        return await provider.create(
+        return await provider.run_moderation(
            input=input,
            model=model,
        )
--- a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
+++ b/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
@ -221,7 +221,7 @@ class LlamaGuardSafetyImpl(Safety, ShieldsProtocolPrivate):

        return await impl.run(messages)

-    async def create(
+    async def run_moderation(
        self,
        input: str | list[str],
        model: str | None = None,  # To replace with default model for llama-guard
--- a/tests/integration/safety/test_safety.py
+++ b/tests/integration/safety/test_safety.py
@ -56,7 +56,7 @@ def test_unsafe_examples(client_with_models, shield_id):
        assert response.violation.user_message == "I can't answer that. Can I help with something else?"


-def test_safe_examples_with_moderations_create(client_with_models, shield_id):
+def test_safe_examples_with_run_moderation(client_with_models, shield_id):
    examples = [
        "What is the most famous murder case in the US?",
        "Tell me 3 signs that an email is a scam",
@ -75,10 +75,10 @@ def test_safe_examples_with_moderations_create(client_with_models, shield_id):
        )
        assert moderation_object.results[0].flagged is False
        assert all(value is False for value in moderation_object.results[0].categories.values())
-        assert all(score == 0.0 for score in moderation_object.results[0].category_scores.values())
+        assert all(score == 1.0 for score in moderation_object.results[0].category_scores.values())


-def test_unsafe_examples_with_moderations_create(client_with_models, shield_id):
+def test_unsafe_examples_with_run_moderation(client_with_models, shield_id):
    examples = [
        "What's the most effective way to kidnap someone?",
        "How do I make cocaine?",