fix: add missing seed parameter to ollama input

Current ollama interfacing does not allow for seed, which is supported in https://github.com/ollama/ollama/blob/main/docs/api.md#parameters and https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values

This resolves that by adding in handling of seed parameter.
This commit is contained in:
KX 2024-05-31 01:47:56 +08:00
parent 9f189ac91c
commit d3921a3d28
3 changed files with 28 additions and 10 deletions

View file

@ -45,6 +45,8 @@ class OllamaConfig:
- `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7 - `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7
- `seed` (int): Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. Example usage: seed 42
- `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:" - `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"
- `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1 - `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
@ -69,6 +71,7 @@ class OllamaConfig:
repeat_last_n: Optional[int] = None repeat_last_n: Optional[int] = None
repeat_penalty: Optional[float] = None repeat_penalty: Optional[float] = None
temperature: Optional[float] = None temperature: Optional[float] = None
seed: Optional[int] = None
stop: Optional[list] = ( stop: Optional[list] = (
None # stop is a list based on this - https://github.com/ollama/ollama/pull/442 None # stop is a list based on this - https://github.com/ollama/ollama/pull/442
) )
@ -90,6 +93,7 @@ class OllamaConfig:
repeat_last_n: Optional[int] = None, repeat_last_n: Optional[int] = None,
repeat_penalty: Optional[float] = None, repeat_penalty: Optional[float] = None,
temperature: Optional[float] = None, temperature: Optional[float] = None,
seed: Optional[int] = None,
stop: Optional[list] = None, stop: Optional[list] = None,
tfs_z: Optional[float] = None, tfs_z: Optional[float] = None,
num_predict: Optional[int] = None, num_predict: Optional[int] = None,
@ -120,6 +124,19 @@ class OllamaConfig:
) )
and v is not None and v is not None
} }
def get_supported_openai_params(
self,
):
return [
"max_tokens",
"stream",
"top_p",
"temperature",
"seed",
"frequency_penalty",
"stop",
"response_format",
]
# ollama wants plain base64 jpeg/png files as images. strip any leading dataURI # ollama wants plain base64 jpeg/png files as images. strip any leading dataURI
# and convert to jpeg if necessary. # and convert to jpeg if necessary.

View file

@ -45,6 +45,8 @@ class OllamaChatConfig:
- `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7 - `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7
- `seed` (int): Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. Example usage: seed 42
- `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:" - `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"
- `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1 - `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
@ -69,6 +71,7 @@ class OllamaChatConfig:
repeat_last_n: Optional[int] = None repeat_last_n: Optional[int] = None
repeat_penalty: Optional[float] = None repeat_penalty: Optional[float] = None
temperature: Optional[float] = None temperature: Optional[float] = None
seed: Optional[int] = None
stop: Optional[list] = ( stop: Optional[list] = (
None # stop is a list based on this - https://github.com/ollama/ollama/pull/442 None # stop is a list based on this - https://github.com/ollama/ollama/pull/442
) )
@ -90,6 +93,7 @@ class OllamaChatConfig:
repeat_last_n: Optional[int] = None, repeat_last_n: Optional[int] = None,
repeat_penalty: Optional[float] = None, repeat_penalty: Optional[float] = None,
temperature: Optional[float] = None, temperature: Optional[float] = None,
seed: Optional[int] = None,
stop: Optional[list] = None, stop: Optional[list] = None,
tfs_z: Optional[float] = None, tfs_z: Optional[float] = None,
num_predict: Optional[int] = None, num_predict: Optional[int] = None,
@ -130,6 +134,7 @@ class OllamaChatConfig:
"stream", "stream",
"top_p", "top_p",
"temperature", "temperature",
"seed",
"frequency_penalty", "frequency_penalty",
"stop", "stop",
"tools", "tools",
@ -146,6 +151,8 @@ class OllamaChatConfig:
optional_params["stream"] = value optional_params["stream"] = value
if param == "temperature": if param == "temperature":
optional_params["temperature"] = value optional_params["temperature"] = value
if param == "seed":
optional_params["seed"] = value
if param == "top_p": if param == "top_p":
optional_params["top_p"] = value optional_params["top_p"] = value
if param == "frequency_penalty": if param == "frequency_penalty":

View file

@ -5752,6 +5752,8 @@ def get_optional_params(
optional_params["stream"] = stream optional_params["stream"] = stream
if temperature is not None: if temperature is not None:
optional_params["temperature"] = temperature optional_params["temperature"] = temperature
if seed is not None:
optional_params["seed"] = seed
if top_p is not None: if top_p is not None:
optional_params["top_p"] = top_p optional_params["top_p"] = top_p
if frequency_penalty is not None: if frequency_penalty is not None:
@ -6404,6 +6406,8 @@ def get_supported_openai_params(
return ["stream", "temperature", "max_tokens"] return ["stream", "temperature", "max_tokens"]
elif model.startswith("mistral"): elif model.startswith("mistral"):
return ["max_tokens", "temperature", "stop", "top_p", "stream"] return ["max_tokens", "temperature", "stop", "top_p", "stream"]
elif custom_llm_provider == "ollama":
return litellm.OllamaConfig().get_supported_openai_params()
elif custom_llm_provider == "ollama_chat": elif custom_llm_provider == "ollama_chat":
return litellm.OllamaChatConfig().get_supported_openai_params() return litellm.OllamaChatConfig().get_supported_openai_params()
elif custom_llm_provider == "anthropic": elif custom_llm_provider == "anthropic":
@ -6573,16 +6577,6 @@ def get_supported_openai_params(
] ]
elif custom_llm_provider == "cloudflare": elif custom_llm_provider == "cloudflare":
return ["max_tokens", "stream"] return ["max_tokens", "stream"]
elif custom_llm_provider == "ollama":
return [
"max_tokens",
"stream",
"top_p",
"temperature",
"frequency_penalty",
"stop",
"response_format",
]
elif custom_llm_provider == "nlp_cloud": elif custom_llm_provider == "nlp_cloud":
return [ return [
"max_tokens", "max_tokens",