forked from phoenix/litellm-mirror
(docs + fix) Add docs on Moderations endpoint, Text Completion (#6947)
* fix _pass_through_moderation_endpoint_factory * fix route_llm_request * doc moderations api * docs on /moderations * add e2e tests for moderations api * docs moderations api * test_pass_through_moderation_endpoint_factory * docs text completion
This commit is contained in:
parent
eba700a491
commit
4ebb7c8a7f
7 changed files with 390 additions and 6 deletions
135
docs/my-website/docs/moderation.md
Normal file
135
docs/my-website/docs/moderation.md
Normal file
|
@ -0,0 +1,135 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# Moderation
|
||||||
|
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="python" label="LiteLLM Python SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import moderation
|
||||||
|
|
||||||
|
response = moderation(
|
||||||
|
input="hello from litellm",
|
||||||
|
model="text-moderation-stable"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="LiteLLM Proxy Server">
|
||||||
|
|
||||||
|
For `/moderations` endpoint, there is **no need to specify `model` in the request or on the litellm config.yaml**
|
||||||
|
|
||||||
|
Start litellm proxy server
|
||||||
|
|
||||||
|
```
|
||||||
|
litellm
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="python" label="OpenAI Python SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# set base_url to your proxy server
|
||||||
|
# set api_key to send to proxy server
|
||||||
|
client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:4000")
|
||||||
|
|
||||||
|
response = client.moderations.create(
|
||||||
|
input="hello from litellm",
|
||||||
|
model="text-moderation-stable" # optional, defaults to `omni-moderation-latest`
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="curl" label="Curl Request">
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/moderations' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--data '{"input": "Sample text goes here", "model": "text-moderation-stable"}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## Input Params
|
||||||
|
LiteLLM accepts and translates the [OpenAI Moderation params](https://platform.openai.com/docs/api-reference/moderations) across all supported providers.
|
||||||
|
|
||||||
|
### Required Fields
|
||||||
|
|
||||||
|
- `input`: *string or array* - Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
|
||||||
|
- If string: A string of text to classify for moderation
|
||||||
|
- If array of strings: An array of strings to classify for moderation
|
||||||
|
- If array of objects: An array of multi-modal inputs to the moderation model, where each object can be:
|
||||||
|
- An object describing an image to classify with:
|
||||||
|
- `type`: *string, required* - Always `image_url`
|
||||||
|
- `image_url`: *object, required* - Contains either an image URL or a data URL for a base64 encoded image
|
||||||
|
- An object describing text to classify with:
|
||||||
|
- `type`: *string, required* - Always `text`
|
||||||
|
- `text`: *string, required* - A string of text to classify
|
||||||
|
|
||||||
|
### Optional Fields
|
||||||
|
|
||||||
|
- `model`: *string (optional)* - The moderation model to use. Defaults to `omni-moderation-latest`.
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
Here's the exact json output and type you can expect from all moderation calls:
|
||||||
|
|
||||||
|
[**LiteLLM follows OpenAI's output format**](https://platform.openai.com/docs/api-reference/moderations/object)
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
{
|
||||||
|
"id": "modr-AB8CjOTu2jiq12hp1AQPfeqFWaORR",
|
||||||
|
"model": "text-moderation-007",
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"flagged": true,
|
||||||
|
"categories": {
|
||||||
|
"sexual": false,
|
||||||
|
"hate": false,
|
||||||
|
"harassment": true,
|
||||||
|
"self-harm": false,
|
||||||
|
"sexual/minors": false,
|
||||||
|
"hate/threatening": false,
|
||||||
|
"violence/graphic": false,
|
||||||
|
"self-harm/intent": false,
|
||||||
|
"self-harm/instructions": false,
|
||||||
|
"harassment/threatening": true,
|
||||||
|
"violence": true
|
||||||
|
},
|
||||||
|
"category_scores": {
|
||||||
|
"sexual": 0.000011726012417057063,
|
||||||
|
"hate": 0.22706663608551025,
|
||||||
|
"harassment": 0.5215635299682617,
|
||||||
|
"self-harm": 2.227119921371923e-6,
|
||||||
|
"sexual/minors": 7.107352217872176e-8,
|
||||||
|
"hate/threatening": 0.023547329008579254,
|
||||||
|
"violence/graphic": 0.00003391829886822961,
|
||||||
|
"self-harm/intent": 1.646940972932498e-6,
|
||||||
|
"self-harm/instructions": 1.1198755256458526e-9,
|
||||||
|
"harassment/threatening": 0.5694745779037476,
|
||||||
|
"violence": 0.9971134662628174
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## **Supported Providers**
|
||||||
|
|
||||||
|
| Provider |
|
||||||
|
|-------------|
|
||||||
|
| OpenAI |
|
174
docs/my-website/docs/text_completion.md
Normal file
174
docs/my-website/docs/text_completion.md
Normal file
|
@ -0,0 +1,174 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# Text Completion
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="python" label="LiteLLM Python SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import text_completion
|
||||||
|
|
||||||
|
response = text_completion(
|
||||||
|
model="gpt-3.5-turbo-instruct",
|
||||||
|
prompt="Say this is a test",
|
||||||
|
max_tokens=7
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="LiteLLM Proxy Server">
|
||||||
|
|
||||||
|
1. Define models on config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo-instruct
|
||||||
|
litellm_params:
|
||||||
|
model: text-completion-openai/gpt-3.5-turbo-instruct # The `text-completion-openai/` prefix will call openai.completions.create
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
- model_name: text-davinci-003
|
||||||
|
litellm_params:
|
||||||
|
model: text-completion-openai/text-davinci-003
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start litellm proxy server
|
||||||
|
|
||||||
|
```
|
||||||
|
litellm --config config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="python" label="OpenAI Python SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# set base_url to your proxy server
|
||||||
|
# set api_key to send to proxy server
|
||||||
|
client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:4000")
|
||||||
|
|
||||||
|
response = client.completions.create(
|
||||||
|
model="gpt-3.5-turbo-instruct",
|
||||||
|
prompt="Say this is a test",
|
||||||
|
max_tokens=7
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="curl" label="Curl Request">
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--data '{
|
||||||
|
"model": "gpt-3.5-turbo-instruct",
|
||||||
|
"prompt": "Say this is a test",
|
||||||
|
"max_tokens": 7
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## Input Params
|
||||||
|
|
||||||
|
LiteLLM accepts and translates the [OpenAI Text Completion params](https://platform.openai.com/docs/api-reference/completions) across all supported providers.
|
||||||
|
|
||||||
|
### Required Fields
|
||||||
|
|
||||||
|
- `model`: *string* - ID of the model to use
|
||||||
|
- `prompt`: *string or array* - The prompt(s) to generate completions for
|
||||||
|
|
||||||
|
### Optional Fields
|
||||||
|
|
||||||
|
- `best_of`: *integer* - Generates best_of completions server-side and returns the "best" one
|
||||||
|
- `echo`: *boolean* - Echo back the prompt in addition to the completion.
|
||||||
|
- `frequency_penalty`: *number* - Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency.
|
||||||
|
- `logit_bias`: *map* - Modify the likelihood of specified tokens appearing in the completion
|
||||||
|
- `logprobs`: *integer* - Include the log probabilities on the logprobs most likely tokens. Max value of 5
|
||||||
|
- `max_tokens`: *integer* - The maximum number of tokens to generate.
|
||||||
|
- `n`: *integer* - How many completions to generate for each prompt.
|
||||||
|
- `presence_penalty`: *number* - Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far.
|
||||||
|
- `seed`: *integer* - If specified, system will attempt to make deterministic samples
|
||||||
|
- `stop`: *string or array* - Up to 4 sequences where the API will stop generating tokens
|
||||||
|
- `stream`: *boolean* - Whether to stream back partial progress. Defaults to false
|
||||||
|
- `suffix`: *string* - The suffix that comes after a completion of inserted text
|
||||||
|
- `temperature`: *number* - What sampling temperature to use, between 0 and 2.
|
||||||
|
- `top_p`: *number* - An alternative to sampling with temperature, called nucleus sampling.
|
||||||
|
- `user`: *string* - A unique identifier representing your end-user
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
Here's the exact JSON output format you can expect from completion calls:
|
||||||
|
|
||||||
|
|
||||||
|
[**Follows OpenAI's output format**](https://platform.openai.com/docs/api-reference/completions/object)
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
|
||||||
|
<TabItem value="non-streaming" label="Non-Streaming Response">
|
||||||
|
|
||||||
|
```python
|
||||||
|
{
|
||||||
|
"id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7",
|
||||||
|
"object": "text_completion",
|
||||||
|
"created": 1589478378,
|
||||||
|
"model": "gpt-3.5-turbo-instruct",
|
||||||
|
"system_fingerprint": "fp_44709d6fcb",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"text": "\n\nThis is indeed a test",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"finish_reason": "length"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": 5,
|
||||||
|
"completion_tokens": 7,
|
||||||
|
"total_tokens": 12
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="streaming" label="Streaming Response">
|
||||||
|
|
||||||
|
```python
|
||||||
|
{
|
||||||
|
"id": "cmpl-7iA7iJjj8V2zOkCGvWF2hAkDWBQZe",
|
||||||
|
"object": "text_completion",
|
||||||
|
"created": 1690759702,
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"text": "This",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"finish_reason": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"model": "gpt-3.5-turbo-instruct"
|
||||||
|
"system_fingerprint": "fp_44709d6fcb",
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
## **Supported Providers**
|
||||||
|
|
||||||
|
| Provider | Link to Usage |
|
||||||
|
|-------------|--------------------|
|
||||||
|
| OpenAI | [Usage](../docs/providers/text_completion_openai) |
|
||||||
|
| Azure OpenAI| [Usage](../docs/providers/azure) |
|
||||||
|
|
||||||
|
|
|
@ -246,6 +246,7 @@ const sidebars = {
|
||||||
"completion/usage",
|
"completion/usage",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
"text_completion",
|
||||||
"embedding/supported_embedding",
|
"embedding/supported_embedding",
|
||||||
"image_generation",
|
"image_generation",
|
||||||
{
|
{
|
||||||
|
@ -261,6 +262,7 @@ const sidebars = {
|
||||||
"batches",
|
"batches",
|
||||||
"realtime",
|
"realtime",
|
||||||
"fine_tuning",
|
"fine_tuning",
|
||||||
|
"moderation",
|
||||||
{
|
{
|
||||||
type: "link",
|
type: "link",
|
||||||
label: "Use LiteLLM Proxy with Vertex, Bedrock SDK",
|
label: "Use LiteLLM Proxy with Vertex, Bedrock SDK",
|
||||||
|
|
|
@ -86,7 +86,6 @@ async def route_request(
|
||||||
else:
|
else:
|
||||||
models = [model.strip() for model in data.pop("model").split(",")]
|
models = [model.strip() for model in data.pop("model").split(",")]
|
||||||
return llm_router.abatch_completion(models=models, **data)
|
return llm_router.abatch_completion(models=models, **data)
|
||||||
|
|
||||||
elif llm_router is not None:
|
elif llm_router is not None:
|
||||||
if (
|
if (
|
||||||
data["model"] in router_model_names
|
data["model"] in router_model_names
|
||||||
|
@ -113,6 +112,9 @@ async def route_request(
|
||||||
or len(llm_router.pattern_router.patterns) > 0
|
or len(llm_router.pattern_router.patterns) > 0
|
||||||
):
|
):
|
||||||
return getattr(llm_router, f"{route_type}")(**data)
|
return getattr(llm_router, f"{route_type}")(**data)
|
||||||
|
elif route_type == "amoderation":
|
||||||
|
# moderation endpoint does not require `model` parameter
|
||||||
|
return getattr(llm_router, f"{route_type}")(**data)
|
||||||
|
|
||||||
elif user_model is not None:
|
elif user_model is not None:
|
||||||
return getattr(litellm, f"{route_type}")(**data)
|
return getattr(litellm, f"{route_type}")(**data)
|
||||||
|
|
|
@ -2563,10 +2563,7 @@ class Router:
|
||||||
original_function: Callable,
|
original_function: Callable,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
if (
|
if kwargs.get("model") and self.get_model_list(model_name=kwargs["model"]):
|
||||||
"model" in kwargs
|
|
||||||
and self.get_model_list(model_name=kwargs["model"]) is not None
|
|
||||||
):
|
|
||||||
deployment = await self.async_get_available_deployment(
|
deployment = await self.async_get_available_deployment(
|
||||||
model=kwargs["model"]
|
model=kwargs["model"]
|
||||||
)
|
)
|
||||||
|
|
71
tests/otel_tests/test_moderations.py
Normal file
71
tests/otel_tests/test_moderations.py
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
import pytest
|
||||||
|
import asyncio
|
||||||
|
import aiohttp, openai
|
||||||
|
from openai import OpenAI, AsyncOpenAI
|
||||||
|
from typing import Optional, List, Union
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
|
async def make_moderations_curl_request(
|
||||||
|
session,
|
||||||
|
key,
|
||||||
|
request_data: dict,
|
||||||
|
):
|
||||||
|
url = "http://0.0.0.0:4000/moderations"
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
|
||||||
|
async with session.post(url, headers=headers, json=request_data) as response:
|
||||||
|
status = response.status
|
||||||
|
response_text = await response.text()
|
||||||
|
|
||||||
|
if status != 200:
|
||||||
|
raise Exception(response_text)
|
||||||
|
|
||||||
|
return await response.json()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_basic_moderations_on_proxy_no_model():
|
||||||
|
"""
|
||||||
|
Test moderations endpoint on proxy when no `model` is specified in the request
|
||||||
|
"""
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
test_text = "I want to harm someone" # Test text that should trigger moderation
|
||||||
|
request_data = {
|
||||||
|
"input": test_text,
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
response = await make_moderations_curl_request(
|
||||||
|
session,
|
||||||
|
"sk-1234",
|
||||||
|
request_data,
|
||||||
|
)
|
||||||
|
print("response=", response)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
pytest.fail("Moderations request failed")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_basic_moderations_on_proxy_with_model():
|
||||||
|
"""
|
||||||
|
Test moderations endpoint on proxy when `model` is specified in the request
|
||||||
|
"""
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
test_text = "I want to harm someone" # Test text that should trigger moderation
|
||||||
|
request_data = {
|
||||||
|
"input": test_text,
|
||||||
|
"model": "text-moderation-stable",
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
response = await make_moderations_curl_request(
|
||||||
|
session,
|
||||||
|
"sk-1234",
|
||||||
|
request_data,
|
||||||
|
)
|
||||||
|
print("response=", response)
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail("Moderations request failed")
|
|
@ -1040,8 +1040,11 @@ def test_pattern_match_deployment_set_model_name(
|
||||||
async def test_pass_through_moderation_endpoint_factory(model_list):
|
async def test_pass_through_moderation_endpoint_factory(model_list):
|
||||||
router = Router(model_list=model_list)
|
router = Router(model_list=model_list)
|
||||||
response = await router._pass_through_moderation_endpoint_factory(
|
response = await router._pass_through_moderation_endpoint_factory(
|
||||||
original_function=litellm.amoderation, input="this is valid good text"
|
original_function=litellm.amoderation,
|
||||||
|
input="this is valid good text",
|
||||||
|
model=None,
|
||||||
)
|
)
|
||||||
|
assert response is not None
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue