forked from phoenix/litellm-mirror
fix(utils.py): return function name for ollama_chat function calls
This commit is contained in:
parent
b4e12fb8fd
commit
0e7b30bec9
4 changed files with 79 additions and 26 deletions
|
@ -5,6 +5,12 @@ LiteLLM supports all models from [Ollama](https://github.com/jmorganca/ollama)
|
||||||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
We recommend using [ollama_chat](#using-ollama-apichat) for better responses.
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
## Pre-requisites
|
## Pre-requisites
|
||||||
Ensure you have your ollama server running
|
Ensure you have your ollama server running
|
||||||
|
|
||||||
|
|
|
@ -588,6 +588,7 @@ from .llms.petals import PetalsConfig
|
||||||
from .llms.vertex_ai import VertexAIConfig
|
from .llms.vertex_ai import VertexAIConfig
|
||||||
from .llms.sagemaker import SagemakerConfig
|
from .llms.sagemaker import SagemakerConfig
|
||||||
from .llms.ollama import OllamaConfig
|
from .llms.ollama import OllamaConfig
|
||||||
|
from .llms.ollama_chat import OllamaChatConfig
|
||||||
from .llms.maritalk import MaritTalkConfig
|
from .llms.maritalk import MaritTalkConfig
|
||||||
from .llms.bedrock import (
|
from .llms.bedrock import (
|
||||||
AmazonTitanConfig,
|
AmazonTitanConfig,
|
||||||
|
|
|
@ -18,7 +18,7 @@ class OllamaError(Exception):
|
||||||
) # Call the base class constructor with the parameters it needs
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
|
||||||
|
|
||||||
class OllamaConfig:
|
class OllamaChatConfig:
|
||||||
"""
|
"""
|
||||||
Reference: https://github.com/jmorganca/ollama/blob/main/docs/api.md#parameters
|
Reference: https://github.com/jmorganca/ollama/blob/main/docs/api.md#parameters
|
||||||
|
|
||||||
|
@ -108,6 +108,7 @@ class OllamaConfig:
|
||||||
k: v
|
k: v
|
||||||
for k, v in cls.__dict__.items()
|
for k, v in cls.__dict__.items()
|
||||||
if not k.startswith("__")
|
if not k.startswith("__")
|
||||||
|
and k != "function_name" # special param for function calling
|
||||||
and not isinstance(
|
and not isinstance(
|
||||||
v,
|
v,
|
||||||
(
|
(
|
||||||
|
@ -120,6 +121,61 @@ class OllamaConfig:
|
||||||
and v is not None
|
and v is not None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def get_supported_openai_params(
|
||||||
|
self,
|
||||||
|
):
|
||||||
|
return [
|
||||||
|
"max_tokens",
|
||||||
|
"stream",
|
||||||
|
"top_p",
|
||||||
|
"temperature",
|
||||||
|
"frequency_penalty",
|
||||||
|
"stop",
|
||||||
|
"tools",
|
||||||
|
"tool_choice",
|
||||||
|
"functions",
|
||||||
|
]
|
||||||
|
|
||||||
|
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
||||||
|
for param, value in non_default_params.items():
|
||||||
|
if param == "max_tokens":
|
||||||
|
optional_params["num_predict"] = value
|
||||||
|
if param == "stream":
|
||||||
|
optional_params["stream"] = value
|
||||||
|
if param == "temperature":
|
||||||
|
optional_params["temperature"] = value
|
||||||
|
if param == "top_p":
|
||||||
|
optional_params["top_p"] = value
|
||||||
|
if param == "frequency_penalty":
|
||||||
|
optional_params["repeat_penalty"] = param
|
||||||
|
if param == "stop":
|
||||||
|
optional_params["stop"] = value
|
||||||
|
### FUNCTION CALLING LOGIC ###
|
||||||
|
if param == "tools":
|
||||||
|
# ollama actually supports json output
|
||||||
|
optional_params["format"] = "json"
|
||||||
|
litellm.add_function_to_prompt = (
|
||||||
|
True # so that main.py adds the function call to the prompt
|
||||||
|
)
|
||||||
|
optional_params["functions_unsupported_model"] = value
|
||||||
|
|
||||||
|
if len(optional_params["functions_unsupported_model"]) == 1:
|
||||||
|
optional_params["function_name"] = optional_params[
|
||||||
|
"functions_unsupported_model"
|
||||||
|
][0]["function"]["name"]
|
||||||
|
|
||||||
|
if param == "functions":
|
||||||
|
# ollama actually supports json output
|
||||||
|
optional_params["format"] = "json"
|
||||||
|
litellm.add_function_to_prompt = (
|
||||||
|
True # so that main.py adds the function call to the prompt
|
||||||
|
)
|
||||||
|
optional_params["functions_unsupported_model"] = non_default_params.pop(
|
||||||
|
"functions"
|
||||||
|
)
|
||||||
|
non_default_params.pop("tool_choice", None) # causes ollama requests to hang
|
||||||
|
return optional_params
|
||||||
|
|
||||||
|
|
||||||
# ollama implementation
|
# ollama implementation
|
||||||
def get_ollama_response(
|
def get_ollama_response(
|
||||||
|
@ -138,7 +194,7 @@ def get_ollama_response(
|
||||||
url = f"{api_base}/api/chat"
|
url = f"{api_base}/api/chat"
|
||||||
|
|
||||||
## Load Config
|
## Load Config
|
||||||
config = litellm.OllamaConfig.get_config()
|
config = litellm.OllamaChatConfig.get_config()
|
||||||
for k, v in config.items():
|
for k, v in config.items():
|
||||||
if (
|
if (
|
||||||
k not in optional_params
|
k not in optional_params
|
||||||
|
@ -147,6 +203,7 @@ def get_ollama_response(
|
||||||
|
|
||||||
stream = optional_params.pop("stream", False)
|
stream = optional_params.pop("stream", False)
|
||||||
format = optional_params.pop("format", None)
|
format = optional_params.pop("format", None)
|
||||||
|
function_name = optional_params.pop("function_name", None)
|
||||||
|
|
||||||
for m in messages:
|
for m in messages:
|
||||||
if "role" in m and m["role"] == "tool":
|
if "role" in m and m["role"] == "tool":
|
||||||
|
@ -187,6 +244,7 @@ def get_ollama_response(
|
||||||
model_response=model_response,
|
model_response=model_response,
|
||||||
encoding=encoding,
|
encoding=encoding,
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
|
function_name=function_name,
|
||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
elif stream == True:
|
elif stream == True:
|
||||||
|
@ -290,7 +348,9 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
|
|
||||||
async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
async def ollama_acompletion(
|
||||||
|
url, data, model_response, encoding, logging_obj, function_name
|
||||||
|
):
|
||||||
data["stream"] = False
|
data["stream"] = False
|
||||||
try:
|
try:
|
||||||
timeout = aiohttp.ClientTimeout(total=litellm.request_timeout) # 10 minutes
|
timeout = aiohttp.ClientTimeout(total=litellm.request_timeout) # 10 minutes
|
||||||
|
@ -324,7 +384,7 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
||||||
"id": f"call_{str(uuid.uuid4())}",
|
"id": f"call_{str(uuid.uuid4())}",
|
||||||
"function": {
|
"function": {
|
||||||
"arguments": response_json["message"]["content"],
|
"arguments": response_json["message"]["content"],
|
||||||
"name": "",
|
"name": function_name or "",
|
||||||
},
|
},
|
||||||
"type": "function",
|
"type": "function",
|
||||||
}
|
}
|
||||||
|
|
|
@ -4147,8 +4147,9 @@ def get_optional_params(
|
||||||
and custom_llm_provider != "mistral"
|
and custom_llm_provider != "mistral"
|
||||||
and custom_llm_provider != "anthropic"
|
and custom_llm_provider != "anthropic"
|
||||||
and custom_llm_provider != "bedrock"
|
and custom_llm_provider != "bedrock"
|
||||||
|
and custom_llm_provider != "ollama_chat"
|
||||||
):
|
):
|
||||||
if custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
|
if custom_llm_provider == "ollama":
|
||||||
# ollama actually supports json output
|
# ollama actually supports json output
|
||||||
optional_params["format"] = "json"
|
optional_params["format"] = "json"
|
||||||
litellm.add_function_to_prompt = (
|
litellm.add_function_to_prompt = (
|
||||||
|
@ -4174,7 +4175,7 @@ def get_optional_params(
|
||||||
else:
|
else:
|
||||||
raise UnsupportedParamsError(
|
raise UnsupportedParamsError(
|
||||||
status_code=500,
|
status_code=500,
|
||||||
message=f"Function calling is not supported by {custom_llm_provider}. To add it to the prompt, set `litellm.add_function_to_prompt = True`.",
|
message=f"Function calling is not supported by {custom_llm_provider}.",
|
||||||
)
|
)
|
||||||
|
|
||||||
def _check_valid_arg(supported_params):
|
def _check_valid_arg(supported_params):
|
||||||
|
@ -4687,28 +4688,13 @@ def get_optional_params(
|
||||||
if stop is not None:
|
if stop is not None:
|
||||||
optional_params["stop"] = stop
|
optional_params["stop"] = stop
|
||||||
elif custom_llm_provider == "ollama_chat":
|
elif custom_llm_provider == "ollama_chat":
|
||||||
supported_params = [
|
supported_params = litellm.OllamaChatConfig().get_supported_openai_params()
|
||||||
"max_tokens",
|
|
||||||
"stream",
|
|
||||||
"top_p",
|
|
||||||
"temperature",
|
|
||||||
"frequency_penalty",
|
|
||||||
"stop",
|
|
||||||
]
|
|
||||||
_check_valid_arg(supported_params=supported_params)
|
_check_valid_arg(supported_params=supported_params)
|
||||||
|
|
||||||
if max_tokens is not None:
|
optional_params = litellm.OllamaChatConfig().map_openai_params(
|
||||||
optional_params["num_predict"] = max_tokens
|
non_default_params=non_default_params, optional_params=optional_params
|
||||||
if stream:
|
)
|
||||||
optional_params["stream"] = stream
|
|
||||||
if temperature is not None:
|
|
||||||
optional_params["temperature"] = temperature
|
|
||||||
if top_p is not None:
|
|
||||||
optional_params["top_p"] = top_p
|
|
||||||
if frequency_penalty is not None:
|
|
||||||
optional_params["repeat_penalty"] = frequency_penalty
|
|
||||||
if stop is not None:
|
|
||||||
optional_params["stop"] = stop
|
|
||||||
elif custom_llm_provider == "nlp_cloud":
|
elif custom_llm_provider == "nlp_cloud":
|
||||||
supported_params = [
|
supported_params = [
|
||||||
"max_tokens",
|
"max_tokens",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue