fix: add missing seed parameter to ollama input

Current ollama interfacing does not allow for seed, which is supported in https://github.com/ollama/ollama/blob/main/docs/api.md#parameters and https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values This resolves that by adding in handling of seed parameter.
2024-05-31 01:47:56 +08:00 · 2024-05-31 01:47:56 +08:00 · d3921a3d28
commit d3921a3d28
parent 9f189ac91c
3 changed files with 28 additions and 10 deletions
--- a/litellm/llms/ollama.py
+++ b/litellm/llms/ollama.py
@ -45,6 +45,8 @@ class OllamaConfig:

    - `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7

+    - `seed` (int): Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. Example usage: seed 42
+
    - `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"

    - `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
@ -69,6 +71,7 @@ class OllamaConfig:
    repeat_last_n: Optional[int] = None
    repeat_penalty: Optional[float] = None
    temperature: Optional[float] = None
+    seed: Optional[int] = None
    stop: Optional[list] = (
        None  # stop is a list based on this - https://github.com/ollama/ollama/pull/442
    )
@ -90,6 +93,7 @@ class OllamaConfig:
        repeat_last_n: Optional[int] = None,
        repeat_penalty: Optional[float] = None,
        temperature: Optional[float] = None,
+        seed: Optional[int] = None,
        stop: Optional[list] = None,
        tfs_z: Optional[float] = None,
        num_predict: Optional[int] = None,
@ -120,6 +124,19 @@ class OllamaConfig:
            )
            and v is not None
        }
+    def get_supported_openai_params(
+        self,
+    ):
+        return [
+            "max_tokens",
+            "stream",
+            "top_p",
+            "temperature",
+            "seed",
+            "frequency_penalty",
+            "stop",
+            "response_format",
+        ]

 # ollama wants plain base64 jpeg/png files as images.  strip any leading dataURI
 # and convert to jpeg if necessary.
--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@ -45,6 +45,8 @@ class OllamaChatConfig:

    - `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7

+    - `seed` (int): Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. Example usage: seed 42
+
    - `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"

    - `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
@ -69,6 +71,7 @@ class OllamaChatConfig:
    repeat_last_n: Optional[int] = None
    repeat_penalty: Optional[float] = None
    temperature: Optional[float] = None
+    seed: Optional[int] = None
    stop: Optional[list] = (
        None  # stop is a list based on this - https://github.com/ollama/ollama/pull/442
    )
@ -90,6 +93,7 @@ class OllamaChatConfig:
        repeat_last_n: Optional[int] = None,
        repeat_penalty: Optional[float] = None,
        temperature: Optional[float] = None,
+        seed: Optional[int] = None,
        stop: Optional[list] = None,
        tfs_z: Optional[float] = None,
        num_predict: Optional[int] = None,
@ -130,6 +134,7 @@ class OllamaChatConfig:
            "stream",
            "top_p",
            "temperature",
+            "seed",
            "frequency_penalty",
            "stop",
            "tools",
@ -146,6 +151,8 @@ class OllamaChatConfig:
                optional_params["stream"] = value
            if param == "temperature":
                optional_params["temperature"] = value
+            if param == "seed":
+                optional_params["seed"] = value
            if param == "top_p":
                optional_params["top_p"] = value
            if param == "frequency_penalty":
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -5752,6 +5752,8 @@ def get_optional_params(
            optional_params["stream"] = stream
        if temperature is not None:
            optional_params["temperature"] = temperature
+        if seed is not None:
+            optional_params["seed"] = seed
        if top_p is not None:
            optional_params["top_p"] = top_p
        if frequency_penalty is not None:
@ -6404,6 +6406,8 @@ def get_supported_openai_params(
            return ["stream", "temperature", "max_tokens"]
        elif model.startswith("mistral"):
            return ["max_tokens", "temperature", "stop", "top_p", "stream"]
+    elif custom_llm_provider == "ollama":
+        return litellm.OllamaConfig().get_supported_openai_params()
    elif custom_llm_provider == "ollama_chat":
        return litellm.OllamaChatConfig().get_supported_openai_params()
    elif custom_llm_provider == "anthropic":
@ -6573,16 +6577,6 @@ def get_supported_openai_params(
        ]
    elif custom_llm_provider == "cloudflare":
        return ["max_tokens", "stream"]
-    elif custom_llm_provider == "ollama":
-        return [
-            "max_tokens",
-            "stream",
-            "top_p",
-            "temperature",
-            "frequency_penalty",
-            "stop",
-            "response_format",
-        ]
    elif custom_llm_provider == "nlp_cloud":
        return [
            "max_tokens",