From d717fd8fcb02b1ee4c13812f8c59fed6e2eff53f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 27 Nov 2024 15:32:06 -0800 Subject: [PATCH 1/8] fix _pass_through_moderation_endpoint_factory --- litellm/router.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index d09f3be8b..3751b2403 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2563,10 +2563,7 @@ class Router: original_function: Callable, **kwargs, ): - if ( - "model" in kwargs - and self.get_model_list(model_name=kwargs["model"]) is not None - ): + if kwargs.get("model") and self.get_model_list(model_name=kwargs["model"]): deployment = await self.async_get_available_deployment( model=kwargs["model"] ) From 2c84b19550b84e4e62bacc362b737223807f86d8 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 27 Nov 2024 15:35:19 -0800 Subject: [PATCH 2/8] fix route_llm_request --- litellm/proxy/route_llm_request.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/route_llm_request.py b/litellm/proxy/route_llm_request.py index 3c5c8b3b4..ec9850eeb 100644 --- a/litellm/proxy/route_llm_request.py +++ b/litellm/proxy/route_llm_request.py @@ -86,7 +86,6 @@ async def route_request( else: models = [model.strip() for model in data.pop("model").split(",")] return llm_router.abatch_completion(models=models, **data) - elif llm_router is not None: if ( data["model"] in router_model_names @@ -113,6 +112,9 @@ async def route_request( or len(llm_router.pattern_router.patterns) > 0 ): return getattr(llm_router, f"{route_type}")(**data) + elif route_type == "amoderation": + # moderation endpoint does not require `model` parameter + return getattr(llm_router, f"{route_type}")(**data) elif user_model is not None: return getattr(litellm, f"{route_type}")(**data) From a9b564782ca4208619781b6e4dc383bf13352122 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 27 Nov 2024 15:35:48 -0800 Subject: [PATCH 3/8] doc moderations api --- docs/my-website/docs/moderation.md | 113 +++++++++++++++++++++++++++++ docs/my-website/sidebars.js | 2 + 2 files changed, 115 insertions(+) create mode 100644 docs/my-website/docs/moderation.md diff --git a/docs/my-website/docs/moderation.md b/docs/my-website/docs/moderation.md new file mode 100644 index 000000000..bd756fe8d --- /dev/null +++ b/docs/my-website/docs/moderation.md @@ -0,0 +1,113 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Moderation + + +### Usage + + + +```python +from litellm import moderation + +response = moderation( + input="hello from litellm", + model="text-moderation-stable" +) +``` + + + + +For `/moderations` endpoint, there is no need + +```python +from openai import OpenAI + +# set base_url to your proxy server +# set api_key to send to proxy server +client = OpenAI(api_key="", base_url="http://0.0.0.0:4000") + +response = client.moderations.create( + input="hello from litellm", + model="text-moderation-stable" +) + +print(response) +``` + + + +```shell +curl --location 'http://0.0.0.0:4000/moderations' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer sk-1234' \ + --data '{"input": "Sample text goes here", "model": "text-moderation-stable"}' +``` + + + +## Input Params +LiteLLM accepts and translates the [OpenAI Moderation params](https://platform.openai.com/docs/api-reference/moderations) across all supported providers. + +### Required Fields + +- `input`: *string or array* - Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models. + - If string: A string of text to classify for moderation + - If array of strings: An array of strings to classify for moderation + - If array of objects: An array of multi-modal inputs to the moderation model, where each object can be: + - An object describing an image to classify with: + - `type`: *string, required* - Always `image_url` + - `image_url`: *object, required* - Contains either an image URL or a data URL for a base64 encoded image + - An object describing text to classify with: + - `type`: *string, required* - Always `text` + - `text`: *string, required* - A string of text to classify + +### Optional Fields + +- `model`: *string (optional)* - The moderation model to use. Defaults to `omni-moderation-latest`. + +## Output Format +Here's the exact json output and type you can expect from all moderation calls: + +```python +{ + "id": "modr-AB8CjOTu2jiq12hp1AQPfeqFWaORR", + "model": "text-moderation-007", + "results": [ + { + "flagged": true, + "categories": { + "sexual": false, + "hate": false, + "harassment": true, + "self-harm": false, + "sexual/minors": false, + "hate/threatening": false, + "violence/graphic": false, + "self-harm/intent": false, + "self-harm/instructions": false, + "harassment/threatening": true, + "violence": true + }, + "category_scores": { + "sexual": 0.000011726012417057063, + "hate": 0.22706663608551025, + "harassment": 0.5215635299682617, + "self-harm": 2.227119921371923e-6, + "sexual/minors": 7.107352217872176e-8, + "hate/threatening": 0.023547329008579254, + "violence/graphic": 0.00003391829886822961, + "self-harm/intent": 1.646940972932498e-6, + "self-harm/instructions": 1.1198755256458526e-9, + "harassment/threatening": 0.5694745779037476, + "violence": 0.9971134662628174 + } + } + ] +} + +``` + + diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 81ac3c34a..79eb326bd 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -246,6 +246,7 @@ const sidebars = { "completion/usage", ], }, + "text_completion", "embedding/supported_embedding", "image_generation", { @@ -261,6 +262,7 @@ const sidebars = { "batches", "realtime", "fine_tuning", + "moderation"," { type: "link", label: "Use LiteLLM Proxy with Vertex, Bedrock SDK", From 3e162b3b8cd3340d5bf303ca9faa105d690dc9da Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 27 Nov 2024 15:43:15 -0800 Subject: [PATCH 4/8] docs on /moderations --- docs/my-website/docs/moderation.md | 25 ++++++++++++++++++++++--- docs/my-website/sidebars.js | 3 +-- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/docs/my-website/docs/moderation.md b/docs/my-website/docs/moderation.md index bd756fe8d..1bde75237 100644 --- a/docs/my-website/docs/moderation.md +++ b/docs/my-website/docs/moderation.md @@ -18,9 +18,19 @@ response = moderation( ``` - + -For `/moderations` endpoint, there is no need +For `/moderations` endpoint, there is **no need to specify `model` in the request or on the litellm config.yaml** + +Start litellm proxy server + +``` +litellm +``` + + + + ```python from openai import OpenAI @@ -31,12 +41,13 @@ client = OpenAI(api_key="", base_url="http://0.0.0.0:4000") response = client.moderations.create( input="hello from litellm", - model="text-moderation-stable" + model="text-moderation-stable" # optional, defaults to `omni-moderation-latest` ) print(response) ``` + ```shell @@ -48,6 +59,9 @@ curl --location 'http://0.0.0.0:4000/moderations' \ + + + ## Input Params LiteLLM accepts and translates the [OpenAI Moderation params](https://platform.openai.com/docs/api-reference/moderations) across all supported providers. @@ -111,3 +125,8 @@ Here's the exact json output and type you can expect from all moderation calls: ``` +## **Supported Providers** + +| Provider | +|-------------| +| OpenAI | diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 79eb326bd..49fe33343 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -246,7 +246,6 @@ const sidebars = { "completion/usage", ], }, - "text_completion", "embedding/supported_embedding", "image_generation", { @@ -262,7 +261,7 @@ const sidebars = { "batches", "realtime", "fine_tuning", - "moderation"," + "moderation", { type: "link", label: "Use LiteLLM Proxy with Vertex, Bedrock SDK", From 6d7f1ea43269019bcb11ebff82b565b146c8acd9 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 27 Nov 2024 15:47:19 -0800 Subject: [PATCH 5/8] add e2e tests for moderations api --- tests/otel_tests/test_moderations.py | 71 ++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 tests/otel_tests/test_moderations.py diff --git a/tests/otel_tests/test_moderations.py b/tests/otel_tests/test_moderations.py new file mode 100644 index 000000000..21abf7489 --- /dev/null +++ b/tests/otel_tests/test_moderations.py @@ -0,0 +1,71 @@ +import pytest +import asyncio +import aiohttp, openai +from openai import OpenAI, AsyncOpenAI +from typing import Optional, List, Union +import uuid + + +async def make_moderations_curl_request( + session, + key, + request_data: dict, +): + url = "http://0.0.0.0:4000/moderations" + headers = { + "Authorization": f"Bearer {key}", + "Content-Type": "application/json", + } + + async with session.post(url, headers=headers, json=request_data) as response: + status = response.status + response_text = await response.text() + + if status != 200: + raise Exception(response_text) + + return await response.json() + + +@pytest.mark.asyncio +async def test_basic_moderations_on_proxy_no_model(): + """ + Test moderations endpoint on proxy when no `model` is specified in the request + """ + async with aiohttp.ClientSession() as session: + test_text = "I want to harm someone" # Test text that should trigger moderation + request_data = { + "input": test_text, + } + try: + response = await make_moderations_curl_request( + session, + "sk-1234", + request_data, + ) + print("response=", response) + except Exception as e: + print(e) + pytest.fail("Moderations request failed") + + +@pytest.mark.asyncio +async def test_basic_moderations_on_proxy_with_model(): + """ + Test moderations endpoint on proxy when `model` is specified in the request + """ + async with aiohttp.ClientSession() as session: + test_text = "I want to harm someone" # Test text that should trigger moderation + request_data = { + "input": test_text, + "model": "text-moderation-stable", + } + try: + response = await make_moderations_curl_request( + session, + "sk-1234", + request_data, + ) + print("response=", response) + except Exception as e: + pytest.fail("Moderations request failed") From 48227c133076097f5570de6f67fccc3fd821443c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 27 Nov 2024 15:50:07 -0800 Subject: [PATCH 6/8] docs moderations api --- docs/my-website/docs/moderation.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/my-website/docs/moderation.md b/docs/my-website/docs/moderation.md index 1bde75237..6dd092fb5 100644 --- a/docs/my-website/docs/moderation.md +++ b/docs/my-website/docs/moderation.md @@ -85,6 +85,9 @@ LiteLLM accepts and translates the [OpenAI Moderation params](https://platform.o ## Output Format Here's the exact json output and type you can expect from all moderation calls: +[**LiteLLM follows OpenAI's output format**](https://platform.openai.com/docs/api-reference/moderations/object) + + ```python { "id": "modr-AB8CjOTu2jiq12hp1AQPfeqFWaORR", From 7026f4fdfe7c68cde372a0b861e7a0361bbcb01a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 27 Nov 2024 15:51:47 -0800 Subject: [PATCH 7/8] test_pass_through_moderation_endpoint_factory --- tests/router_unit_tests/test_router_helper_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/router_unit_tests/test_router_helper_utils.py b/tests/router_unit_tests/test_router_helper_utils.py index 3c51c619e..f247c33e3 100644 --- a/tests/router_unit_tests/test_router_helper_utils.py +++ b/tests/router_unit_tests/test_router_helper_utils.py @@ -1040,8 +1040,11 @@ def test_pattern_match_deployment_set_model_name( async def test_pass_through_moderation_endpoint_factory(model_list): router = Router(model_list=model_list) response = await router._pass_through_moderation_endpoint_factory( - original_function=litellm.amoderation, input="this is valid good text" + original_function=litellm.amoderation, + input="this is valid good text", + model=None, ) + assert response is not None @pytest.mark.parametrize( From 195a36e6afa857f011e893619e2f269b4b89a68f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 27 Nov 2024 16:03:30 -0800 Subject: [PATCH 8/8] docs text completion --- docs/my-website/docs/text_completion.md | 174 ++++++++++++++++++++++++ docs/my-website/sidebars.js | 1 + 2 files changed, 175 insertions(+) create mode 100644 docs/my-website/docs/text_completion.md diff --git a/docs/my-website/docs/text_completion.md b/docs/my-website/docs/text_completion.md new file mode 100644 index 000000000..8be40dfdc --- /dev/null +++ b/docs/my-website/docs/text_completion.md @@ -0,0 +1,174 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Text Completion + +### Usage + + + +```python +from litellm import text_completion + +response = text_completion( + model="gpt-3.5-turbo-instruct", + prompt="Say this is a test", + max_tokens=7 +) +``` + + + + +1. Define models on config.yaml + +```yaml +model_list: + - model_name: gpt-3.5-turbo-instruct + litellm_params: + model: text-completion-openai/gpt-3.5-turbo-instruct # The `text-completion-openai/` prefix will call openai.completions.create + api_key: os.environ/OPENAI_API_KEY + - model_name: text-davinci-003 + litellm_params: + model: text-completion-openai/text-davinci-003 + api_key: os.environ/OPENAI_API_KEY +``` + +2. Start litellm proxy server + +``` +litellm --config config.yaml +``` + + + + +```python +from openai import OpenAI + +# set base_url to your proxy server +# set api_key to send to proxy server +client = OpenAI(api_key="", base_url="http://0.0.0.0:4000") + +response = client.completions.create( + model="gpt-3.5-turbo-instruct", + prompt="Say this is a test", + max_tokens=7 +) + +print(response) +``` + + + + +```shell +curl --location 'http://0.0.0.0:4000/completions' \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer sk-1234' \ + --data '{ + "model": "gpt-3.5-turbo-instruct", + "prompt": "Say this is a test", + "max_tokens": 7 + }' +``` + + + + + + +## Input Params + +LiteLLM accepts and translates the [OpenAI Text Completion params](https://platform.openai.com/docs/api-reference/completions) across all supported providers. + +### Required Fields + +- `model`: *string* - ID of the model to use +- `prompt`: *string or array* - The prompt(s) to generate completions for + +### Optional Fields + +- `best_of`: *integer* - Generates best_of completions server-side and returns the "best" one +- `echo`: *boolean* - Echo back the prompt in addition to the completion. +- `frequency_penalty`: *number* - Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency. +- `logit_bias`: *map* - Modify the likelihood of specified tokens appearing in the completion +- `logprobs`: *integer* - Include the log probabilities on the logprobs most likely tokens. Max value of 5 +- `max_tokens`: *integer* - The maximum number of tokens to generate. +- `n`: *integer* - How many completions to generate for each prompt. +- `presence_penalty`: *number* - Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far. +- `seed`: *integer* - If specified, system will attempt to make deterministic samples +- `stop`: *string or array* - Up to 4 sequences where the API will stop generating tokens +- `stream`: *boolean* - Whether to stream back partial progress. Defaults to false +- `suffix`: *string* - The suffix that comes after a completion of inserted text +- `temperature`: *number* - What sampling temperature to use, between 0 and 2. +- `top_p`: *number* - An alternative to sampling with temperature, called nucleus sampling. +- `user`: *string* - A unique identifier representing your end-user + +## Output Format +Here's the exact JSON output format you can expect from completion calls: + + +[**Follows OpenAI's output format**](https://platform.openai.com/docs/api-reference/completions/object) + + + + + +```python +{ + "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7", + "object": "text_completion", + "created": 1589478378, + "model": "gpt-3.5-turbo-instruct", + "system_fingerprint": "fp_44709d6fcb", + "choices": [ + { + "text": "\n\nThis is indeed a test", + "index": 0, + "logprobs": null, + "finish_reason": "length" + } + ], + "usage": { + "prompt_tokens": 5, + "completion_tokens": 7, + "total_tokens": 12 + } +} + +``` + + + +```python +{ + "id": "cmpl-7iA7iJjj8V2zOkCGvWF2hAkDWBQZe", + "object": "text_completion", + "created": 1690759702, + "choices": [ + { + "text": "This", + "index": 0, + "logprobs": null, + "finish_reason": null + } + ], + "model": "gpt-3.5-turbo-instruct" + "system_fingerprint": "fp_44709d6fcb", +} + +``` + + + + + +## **Supported Providers** + +| Provider | Link to Usage | +|-------------|--------------------| +| OpenAI | [Usage](../docs/providers/text_completion_openai) | +| Azure OpenAI| [Usage](../docs/providers/azure) | + + diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 49fe33343..3ae914b0e 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -246,6 +246,7 @@ const sidebars = { "completion/usage", ], }, + "text_completion", "embedding/supported_embedding", "image_generation", {