diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py index 2884ff04a..283878056 100644 --- a/litellm/llms/ollama.py +++ b/litellm/llms/ollama.py @@ -45,6 +45,8 @@ class OllamaConfig: - `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7 + - `seed` (int): Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. Example usage: seed 42 + - `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:" - `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1 @@ -69,6 +71,7 @@ class OllamaConfig: repeat_last_n: Optional[int] = None repeat_penalty: Optional[float] = None temperature: Optional[float] = None + seed: Optional[int] = None stop: Optional[list] = ( None # stop is a list based on this - https://github.com/ollama/ollama/pull/442 ) @@ -90,6 +93,7 @@ class OllamaConfig: repeat_last_n: Optional[int] = None, repeat_penalty: Optional[float] = None, temperature: Optional[float] = None, + seed: Optional[int] = None, stop: Optional[list] = None, tfs_z: Optional[float] = None, num_predict: Optional[int] = None, @@ -120,6 +124,19 @@ class OllamaConfig: ) and v is not None } + def get_supported_openai_params( + self, + ): + return [ + "max_tokens", + "stream", + "top_p", + "temperature", + "seed", + "frequency_penalty", + "stop", + "response_format", + ] # ollama wants plain base64 jpeg/png files as images. strip any leading dataURI # and convert to jpeg if necessary. diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py index d1ff4953f..a05807722 100644 --- a/litellm/llms/ollama_chat.py +++ b/litellm/llms/ollama_chat.py @@ -45,6 +45,8 @@ class OllamaChatConfig: - `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7 + - `seed` (int): Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. Example usage: seed 42 + - `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:" - `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1 @@ -69,6 +71,7 @@ class OllamaChatConfig: repeat_last_n: Optional[int] = None repeat_penalty: Optional[float] = None temperature: Optional[float] = None + seed: Optional[int] = None stop: Optional[list] = ( None # stop is a list based on this - https://github.com/ollama/ollama/pull/442 ) @@ -90,6 +93,7 @@ class OllamaChatConfig: repeat_last_n: Optional[int] = None, repeat_penalty: Optional[float] = None, temperature: Optional[float] = None, + seed: Optional[int] = None, stop: Optional[list] = None, tfs_z: Optional[float] = None, num_predict: Optional[int] = None, @@ -130,6 +134,7 @@ class OllamaChatConfig: "stream", "top_p", "temperature", + "seed", "frequency_penalty", "stop", "tools", @@ -146,6 +151,8 @@ class OllamaChatConfig: optional_params["stream"] = value if param == "temperature": optional_params["temperature"] = value + if param == "seed": + optional_params["seed"] = value if param == "top_p": optional_params["top_p"] = value if param == "frequency_penalty": diff --git a/litellm/utils.py b/litellm/utils.py index 95d9160ef..43c04ba9b 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -5752,6 +5752,8 @@ def get_optional_params( optional_params["stream"] = stream if temperature is not None: optional_params["temperature"] = temperature + if seed is not None: + optional_params["seed"] = seed if top_p is not None: optional_params["top_p"] = top_p if frequency_penalty is not None: @@ -6404,6 +6406,8 @@ def get_supported_openai_params( return ["stream", "temperature", "max_tokens"] elif model.startswith("mistral"): return ["max_tokens", "temperature", "stop", "top_p", "stream"] + elif custom_llm_provider == "ollama": + return litellm.OllamaConfig().get_supported_openai_params() elif custom_llm_provider == "ollama_chat": return litellm.OllamaChatConfig().get_supported_openai_params() elif custom_llm_provider == "anthropic": @@ -6573,16 +6577,6 @@ def get_supported_openai_params( ] elif custom_llm_provider == "cloudflare": return ["max_tokens", "stream"] - elif custom_llm_provider == "ollama": - return [ - "max_tokens", - "stream", - "top_p", - "temperature", - "frequency_penalty", - "stop", - "response_format", - ] elif custom_llm_provider == "nlp_cloud": return [ "max_tokens",