From d717fd8fcb02b1ee4c13812f8c59fed6e2eff53f Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 27 Nov 2024 15:32:06 -0800
Subject: [PATCH 1/8] fix _pass_through_moderation_endpoint_factory

---
 litellm/router.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/litellm/router.py b/litellm/router.py
index d09f3be8b..3751b2403 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -2563,10 +2563,7 @@ class Router:
         original_function: Callable,
         **kwargs,
     ):
-        if (
-            "model" in kwargs
-            and self.get_model_list(model_name=kwargs["model"]) is not None
-        ):
+        if kwargs.get("model") and self.get_model_list(model_name=kwargs["model"]):
             deployment = await self.async_get_available_deployment(
                 model=kwargs["model"]
             )

From 2c84b19550b84e4e62bacc362b737223807f86d8 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 27 Nov 2024 15:35:19 -0800
Subject: [PATCH 2/8] fix route_llm_request

---
 litellm/proxy/route_llm_request.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/litellm/proxy/route_llm_request.py b/litellm/proxy/route_llm_request.py
index 3c5c8b3b4..ec9850eeb 100644
--- a/litellm/proxy/route_llm_request.py
+++ b/litellm/proxy/route_llm_request.py
@@ -86,7 +86,6 @@ async def route_request(
         else:
             models = [model.strip() for model in data.pop("model").split(",")]
             return llm_router.abatch_completion(models=models, **data)
-
     elif llm_router is not None:
         if (
             data["model"] in router_model_names
@@ -113,6 +112,9 @@ async def route_request(
                 or len(llm_router.pattern_router.patterns) > 0
             ):
                 return getattr(llm_router, f"{route_type}")(**data)
+            elif route_type == "amoderation":
+                # moderation endpoint does not require `model` parameter
+                return getattr(llm_router, f"{route_type}")(**data)
 
     elif user_model is not None:
         return getattr(litellm, f"{route_type}")(**data)

From a9b564782ca4208619781b6e4dc383bf13352122 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 27 Nov 2024 15:35:48 -0800
Subject: [PATCH 3/8] doc moderations api

---
 docs/my-website/docs/moderation.md | 113 +++++++++++++++++++++++++++++
 docs/my-website/sidebars.js        |   2 +
 2 files changed, 115 insertions(+)
 create mode 100644 docs/my-website/docs/moderation.md

diff --git a/docs/my-website/docs/moderation.md b/docs/my-website/docs/moderation.md
new file mode 100644
index 000000000..bd756fe8d
--- /dev/null
+++ b/docs/my-website/docs/moderation.md
@@ -0,0 +1,113 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Moderation
+
+
+### Usage
+<Tabs>
+<TabItem value="python" label="LiteLLM Python SDK">
+
+```python
+from litellm import moderation
+
+response = moderation(
+    input="hello from litellm",
+    model="text-moderation-stable"
+)
+```
+
+</TabItem>
+<TabItem value="openai" label="LiteLLM Proxy Server">
+
+For `/moderations` endpoint, there is no need
+
+```python
+from openai import OpenAI
+
+# set base_url to your proxy server
+# set api_key to send to proxy server
+client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:4000")
+
+response = client.moderations.create(
+    input="hello from litellm",
+    model="text-moderation-stable"
+)
+
+print(response)
+```
+</TabItem>
+<TabItem value="curl" label="Curl Request">
+
+```shell
+curl --location 'http://0.0.0.0:4000/moderations' \
+    --header 'Content-Type: application/json' \
+    --header 'Authorization: Bearer sk-1234' \
+    --data '{"input": "Sample text goes here", "model": "text-moderation-stable"}'
+```
+</TabItem>
+</Tabs>
+
+## Input Params
+LiteLLM accepts and translates the [OpenAI Moderation params](https://platform.openai.com/docs/api-reference/moderations) across all supported providers.
+
+### Required Fields
+
+- `input`: *string or array* - Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models.
+  - If string: A string of text to classify for moderation
+  - If array of strings: An array of strings to classify for moderation
+  - If array of objects: An array of multi-modal inputs to the moderation model, where each object can be:
+    - An object describing an image to classify with:
+      - `type`: *string, required* - Always `image_url`
+      - `image_url`: *object, required* - Contains either an image URL or a data URL for a base64 encoded image
+    - An object describing text to classify with:
+      - `type`: *string, required* - Always `text`
+      - `text`: *string, required* - A string of text to classify
+
+### Optional Fields
+
+- `model`: *string (optional)* - The moderation model to use. Defaults to `omni-moderation-latest`.
+
+## Output Format
+Here's the exact json output and type you can expect from all moderation calls:
+
+```python
+{
+  "id": "modr-AB8CjOTu2jiq12hp1AQPfeqFWaORR",
+  "model": "text-moderation-007",
+  "results": [
+    {
+      "flagged": true,
+      "categories": {
+        "sexual": false,
+        "hate": false,
+        "harassment": true,
+        "self-harm": false,
+        "sexual/minors": false,
+        "hate/threatening": false,
+        "violence/graphic": false,
+        "self-harm/intent": false,
+        "self-harm/instructions": false,
+        "harassment/threatening": true,
+        "violence": true
+      },
+      "category_scores": {
+        "sexual": 0.000011726012417057063,
+        "hate": 0.22706663608551025,
+        "harassment": 0.5215635299682617,
+        "self-harm": 2.227119921371923e-6,
+        "sexual/minors": 7.107352217872176e-8,
+        "hate/threatening": 0.023547329008579254,
+        "violence/graphic": 0.00003391829886822961,
+        "self-harm/intent": 1.646940972932498e-6,
+        "self-harm/instructions": 1.1198755256458526e-9,
+        "harassment/threatening": 0.5694745779037476,
+        "violence": 0.9971134662628174
+      }
+    }
+  ]
+}
+
+```
+
+
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 81ac3c34a..79eb326bd 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -246,6 +246,7 @@ const sidebars = {
             "completion/usage",
           ],
         },
+        "text_completion",
         "embedding/supported_embedding",
         "image_generation",
         {
@@ -261,6 +262,7 @@ const sidebars = {
         "batches",
         "realtime",
         "fine_tuning",
+        "moderation","
         {
           type: "link",
           label: "Use LiteLLM Proxy with Vertex, Bedrock SDK",

From 3e162b3b8cd3340d5bf303ca9faa105d690dc9da Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 27 Nov 2024 15:43:15 -0800
Subject: [PATCH 4/8] docs on /moderations

---
 docs/my-website/docs/moderation.md | 25 ++++++++++++++++++++++---
 docs/my-website/sidebars.js        |  3 +--
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/docs/my-website/docs/moderation.md b/docs/my-website/docs/moderation.md
index bd756fe8d..1bde75237 100644
--- a/docs/my-website/docs/moderation.md
+++ b/docs/my-website/docs/moderation.md
@@ -18,9 +18,19 @@ response = moderation(
 ```
 
 </TabItem>
-<TabItem value="openai" label="LiteLLM Proxy Server">
+<TabItem value="proxy" label="LiteLLM Proxy Server">
 
-For `/moderations` endpoint, there is no need
+For `/moderations` endpoint, there is **no need to specify `model` in the request or on the litellm config.yaml**
+
+Start litellm proxy server 
+
+```
+litellm
+```
+
+
+<Tabs>
+<TabItem value="python" label="OpenAI Python SDK">
 
 ```python
 from openai import OpenAI
@@ -31,12 +41,13 @@ client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:4000")
 
 response = client.moderations.create(
     input="hello from litellm",
-    model="text-moderation-stable"
+    model="text-moderation-stable" # optional, defaults to `omni-moderation-latest`
 )
 
 print(response)
 ```
 </TabItem>
+
 <TabItem value="curl" label="Curl Request">
 
 ```shell
@@ -48,6 +59,9 @@ curl --location 'http://0.0.0.0:4000/moderations' \
 </TabItem>
 </Tabs>
 
+</TabItem>
+</Tabs>
+
 ## Input Params
 LiteLLM accepts and translates the [OpenAI Moderation params](https://platform.openai.com/docs/api-reference/moderations) across all supported providers.
 
@@ -111,3 +125,8 @@ Here's the exact json output and type you can expect from all moderation calls:
 ```
 
 
+## **Supported Providers**
+
+| Provider    |
+|-------------|
+| OpenAI      |  
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 79eb326bd..49fe33343 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -246,7 +246,6 @@ const sidebars = {
             "completion/usage",
           ],
         },
-        "text_completion",
         "embedding/supported_embedding",
         "image_generation",
         {
@@ -262,7 +261,7 @@ const sidebars = {
         "batches",
         "realtime",
         "fine_tuning",
-        "moderation","
+        "moderation",
         {
           type: "link",
           label: "Use LiteLLM Proxy with Vertex, Bedrock SDK",

From 6d7f1ea43269019bcb11ebff82b565b146c8acd9 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 27 Nov 2024 15:47:19 -0800
Subject: [PATCH 5/8] add e2e tests for moderations api

---
 tests/otel_tests/test_moderations.py | 71 ++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 tests/otel_tests/test_moderations.py

diff --git a/tests/otel_tests/test_moderations.py b/tests/otel_tests/test_moderations.py
new file mode 100644
index 000000000..21abf7489
--- /dev/null
+++ b/tests/otel_tests/test_moderations.py
@@ -0,0 +1,71 @@
+import pytest
+import asyncio
+import aiohttp, openai
+from openai import OpenAI, AsyncOpenAI
+from typing import Optional, List, Union
+import uuid
+
+
+async def make_moderations_curl_request(
+    session,
+    key,
+    request_data: dict,
+):
+    url = "http://0.0.0.0:4000/moderations"
+    headers = {
+        "Authorization": f"Bearer {key}",
+        "Content-Type": "application/json",
+    }
+
+    async with session.post(url, headers=headers, json=request_data) as response:
+        status = response.status
+        response_text = await response.text()
+
+        if status != 200:
+            raise Exception(response_text)
+
+        return await response.json()
+
+
+@pytest.mark.asyncio
+async def test_basic_moderations_on_proxy_no_model():
+    """
+    Test moderations endpoint on proxy when no `model` is specified in the request
+    """
+    async with aiohttp.ClientSession() as session:
+        test_text = "I want to harm someone"  # Test text that should trigger moderation
+        request_data = {
+            "input": test_text,
+        }
+        try:
+            response = await make_moderations_curl_request(
+                session,
+                "sk-1234",
+                request_data,
+            )
+            print("response=", response)
+        except Exception as e:
+            print(e)
+            pytest.fail("Moderations request failed")
+
+
+@pytest.mark.asyncio
+async def test_basic_moderations_on_proxy_with_model():
+    """
+    Test moderations endpoint on proxy when `model` is specified in the request
+    """
+    async with aiohttp.ClientSession() as session:
+        test_text = "I want to harm someone"  # Test text that should trigger moderation
+        request_data = {
+            "input": test_text,
+            "model": "text-moderation-stable",
+        }
+        try:
+            response = await make_moderations_curl_request(
+                session,
+                "sk-1234",
+                request_data,
+            )
+            print("response=", response)
+        except Exception as e:
+            pytest.fail("Moderations request failed")

From 48227c133076097f5570de6f67fccc3fd821443c Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 27 Nov 2024 15:50:07 -0800
Subject: [PATCH 6/8] docs moderations api

---
 docs/my-website/docs/moderation.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/my-website/docs/moderation.md b/docs/my-website/docs/moderation.md
index 1bde75237..6dd092fb5 100644
--- a/docs/my-website/docs/moderation.md
+++ b/docs/my-website/docs/moderation.md
@@ -85,6 +85,9 @@ LiteLLM accepts and translates the [OpenAI Moderation params](https://platform.o
 ## Output Format
 Here's the exact json output and type you can expect from all moderation calls:
 
+[**LiteLLM follows OpenAI's output format**](https://platform.openai.com/docs/api-reference/moderations/object)
+
+
 ```python
 {
   "id": "modr-AB8CjOTu2jiq12hp1AQPfeqFWaORR",

From 7026f4fdfe7c68cde372a0b861e7a0361bbcb01a Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 27 Nov 2024 15:51:47 -0800
Subject: [PATCH 7/8] test_pass_through_moderation_endpoint_factory

---
 tests/router_unit_tests/test_router_helper_utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/router_unit_tests/test_router_helper_utils.py b/tests/router_unit_tests/test_router_helper_utils.py
index 3c51c619e..f247c33e3 100644
--- a/tests/router_unit_tests/test_router_helper_utils.py
+++ b/tests/router_unit_tests/test_router_helper_utils.py
@@ -1040,8 +1040,11 @@ def test_pattern_match_deployment_set_model_name(
 async def test_pass_through_moderation_endpoint_factory(model_list):
     router = Router(model_list=model_list)
     response = await router._pass_through_moderation_endpoint_factory(
-        original_function=litellm.amoderation, input="this is valid good text"
+        original_function=litellm.amoderation,
+        input="this is valid good text",
+        model=None,
     )
+    assert response is not None
 
 
 @pytest.mark.parametrize(

From 195a36e6afa857f011e893619e2f269b4b89a68f Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 27 Nov 2024 16:03:30 -0800
Subject: [PATCH 8/8] docs text completion

---
 docs/my-website/docs/text_completion.md | 174 ++++++++++++++++++++++++
 docs/my-website/sidebars.js             |   1 +
 2 files changed, 175 insertions(+)
 create mode 100644 docs/my-website/docs/text_completion.md

diff --git a/docs/my-website/docs/text_completion.md b/docs/my-website/docs/text_completion.md
new file mode 100644
index 000000000..8be40dfdc
--- /dev/null
+++ b/docs/my-website/docs/text_completion.md
@@ -0,0 +1,174 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Text Completion
+
+### Usage
+<Tabs>
+<TabItem value="python" label="LiteLLM Python SDK">
+
+```python
+from litellm import text_completion
+
+response = text_completion(
+    model="gpt-3.5-turbo-instruct",
+    prompt="Say this is a test",
+    max_tokens=7
+)
+```
+
+</TabItem>
+<TabItem value="proxy" label="LiteLLM Proxy Server">
+
+1. Define models on config.yaml
+
+```yaml
+model_list:
+  - model_name: gpt-3.5-turbo-instruct
+    litellm_params:
+      model: text-completion-openai/gpt-3.5-turbo-instruct # The `text-completion-openai/` prefix will call openai.completions.create
+      api_key: os.environ/OPENAI_API_KEY
+  - model_name: text-davinci-003
+    litellm_params:
+      model: text-completion-openai/text-davinci-003
+      api_key: os.environ/OPENAI_API_KEY
+```
+
+2. Start litellm proxy server 
+
+```
+litellm --config config.yaml
+```
+
+<Tabs>
+<TabItem value="python" label="OpenAI Python SDK">
+
+```python
+from openai import OpenAI
+
+# set base_url to your proxy server
+# set api_key to send to proxy server
+client = OpenAI(api_key="<proxy-api-key>", base_url="http://0.0.0.0:4000")
+
+response = client.completions.create(
+    model="gpt-3.5-turbo-instruct",
+    prompt="Say this is a test",
+    max_tokens=7
+)
+
+print(response)
+```
+</TabItem>
+
+<TabItem value="curl" label="Curl Request">
+
+```shell
+curl --location 'http://0.0.0.0:4000/completions' \
+    --header 'Content-Type: application/json' \
+    --header 'Authorization: Bearer sk-1234' \
+    --data '{
+        "model": "gpt-3.5-turbo-instruct",
+        "prompt": "Say this is a test",
+        "max_tokens": 7
+    }'
+```
+</TabItem>
+</Tabs>
+
+</TabItem>
+</Tabs>
+
+## Input Params
+
+LiteLLM accepts and translates the [OpenAI Text Completion params](https://platform.openai.com/docs/api-reference/completions) across all supported providers.
+
+### Required Fields
+
+- `model`: *string* - ID of the model to use
+- `prompt`: *string or array* - The prompt(s) to generate completions for
+
+### Optional Fields
+
+- `best_of`: *integer* - Generates best_of completions server-side and returns the "best" one
+- `echo`: *boolean* - Echo back the prompt in addition to the completion.
+- `frequency_penalty`: *number* - Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency.
+- `logit_bias`: *map* - Modify the likelihood of specified tokens appearing in the completion
+- `logprobs`: *integer* - Include the log probabilities on the logprobs most likely tokens. Max value of 5
+- `max_tokens`: *integer* - The maximum number of tokens to generate.
+- `n`: *integer* - How many completions to generate for each prompt.
+- `presence_penalty`: *number* - Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far.
+- `seed`: *integer* - If specified, system will attempt to make deterministic samples
+- `stop`: *string or array* - Up to 4 sequences where the API will stop generating tokens
+- `stream`: *boolean* - Whether to stream back partial progress. Defaults to false
+- `suffix`: *string* - The suffix that comes after a completion of inserted text
+- `temperature`: *number* - What sampling temperature to use, between 0 and 2. 
+- `top_p`: *number* - An alternative to sampling with temperature, called nucleus sampling. 
+- `user`: *string* - A unique identifier representing your end-user
+
+## Output Format
+Here's the exact JSON output format you can expect from completion calls:
+
+
+[**Follows OpenAI's output format**](https://platform.openai.com/docs/api-reference/completions/object)
+
+<Tabs>
+
+<TabItem value="non-streaming" label="Non-Streaming Response">
+
+```python
+{
+  "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7",
+  "object": "text_completion",
+  "created": 1589478378,
+  "model": "gpt-3.5-turbo-instruct",
+  "system_fingerprint": "fp_44709d6fcb",
+  "choices": [
+    {
+      "text": "\n\nThis is indeed a test",
+      "index": 0,
+      "logprobs": null,
+      "finish_reason": "length"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 5,
+    "completion_tokens": 7,
+    "total_tokens": 12
+  }
+}
+
+```
+</TabItem>
+<TabItem value="streaming" label="Streaming Response">
+
+```python
+{
+  "id": "cmpl-7iA7iJjj8V2zOkCGvWF2hAkDWBQZe",
+  "object": "text_completion",
+  "created": 1690759702,
+  "choices": [
+    {
+      "text": "This",
+      "index": 0,
+      "logprobs": null,
+      "finish_reason": null
+    }
+  ],
+  "model": "gpt-3.5-turbo-instruct"
+  "system_fingerprint": "fp_44709d6fcb",
+}
+
+```
+
+</TabItem>
+</Tabs>
+
+
+## **Supported Providers**
+
+| Provider    | Link to Usage      |
+|-------------|--------------------|
+| OpenAI      |   [Usage](../docs/providers/text_completion_openai)                 | 
+| Azure OpenAI|   [Usage](../docs/providers/azure)                 |  
+
+
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 49fe33343..3ae914b0e 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -246,6 +246,7 @@ const sidebars = {
             "completion/usage",
           ],
         },
+        "text_completion",
         "embedding/supported_embedding",
         "image_generation",
         {