diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index d8e750fde..4298ed28b 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -714,7 +714,8 @@ "input_cost_per_token": 0.00000070, "output_cost_per_token": 0.00000080, "litellm_provider": "groq", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "groq/mixtral-8x7b-32768": { "max_tokens": 32768, @@ -723,7 +724,8 @@ "input_cost_per_token": 0.00000027, "output_cost_per_token": 0.00000027, "litellm_provider": "groq", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "groq/gemma-7b-it": { "max_tokens": 8192, @@ -732,7 +734,8 @@ "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000010, "litellm_provider": "groq", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "claude-instant-1.2": { "max_tokens": 8191, diff --git a/litellm/tests/test_function_calling.py b/litellm/tests/test_function_calling.py index ffef8f659..2a815edc8 100644 --- a/litellm/tests/test_function_calling.py +++ b/litellm/tests/test_function_calling.py @@ -219,3 +219,94 @@ def test_parallel_function_call_stream(): # test_parallel_function_call_stream() + + +def test_groq_parallel_function_call(): + litellm.set_verbose = True + try: + # Step 1: send the conversation and available functions to the model + messages = [ + { + "role": "system", + "content": "You are a function calling LLM that uses the data extracted from get_current_weather to answer questions about the weather in San Francisco.", + }, + { + "role": "user", + "content": "What's the weather like in San Francisco?", + }, + ] + tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + }, + }, + "required": ["location"], + }, + }, + } + ] + response = litellm.completion( + model="groq/llama2-70b-4096", + messages=messages, + tools=tools, + tool_choice="auto", # auto is default, but we'll be explicit + ) + print("Response\n", response) + response_message = response.choices[0].message + tool_calls = response_message.tool_calls + + assert isinstance(response.choices[0].message.tool_calls[0].function.name, str) + assert isinstance( + response.choices[0].message.tool_calls[0].function.arguments, str + ) + + print("length of tool calls", len(tool_calls)) + + # Step 2: check if the model wanted to call a function + if tool_calls: + # Step 3: call the function + # Note: the JSON response may not always be valid; be sure to handle errors + available_functions = { + "get_current_weather": get_current_weather, + } # only one function in this example, but you can have multiple + messages.append( + response_message + ) # extend conversation with assistant's reply + print("Response message\n", response_message) + # Step 4: send the info for each function call and function response to the model + for tool_call in tool_calls: + function_name = tool_call.function.name + function_to_call = available_functions[function_name] + function_args = json.loads(tool_call.function.arguments) + function_response = function_to_call( + location=function_args.get("location"), + unit=function_args.get("unit"), + ) + messages.append( + { + "tool_call_id": tool_call.id, + "role": "tool", + "name": function_name, + "content": function_response, + } + ) # extend conversation with function response + print(f"messages: {messages}") + second_response = litellm.completion( + model="groq/llama2-70b-4096", messages=messages + ) # get a new response from the model where it can see the function response + print("second response\n", second_response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") diff --git a/litellm/tests/test_utils.py b/litellm/tests/test_utils.py index 2e5ac889f..0344f2114 100644 --- a/litellm/tests/test_utils.py +++ b/litellm/tests/test_utils.py @@ -219,17 +219,19 @@ def test_validate_environment_empty_model(): @mock.patch.dict(os.environ, {"OLLAMA_API_BASE": "foo"}, clear=True) def test_validate_environment_ollama(): for provider in ["ollama", "ollama_chat"]: - kv = validate_environment(provider+"/mistral") + kv = validate_environment(provider + "/mistral") assert kv["keys_in_environment"] assert kv["missing_keys"] == [] + @mock.patch.dict(os.environ, {}, clear=True) def test_validate_environment_ollama_failed(): for provider in ["ollama", "ollama_chat"]: - kv = validate_environment(provider+"/mistral") + kv = validate_environment(provider + "/mistral") assert not kv["keys_in_environment"] assert kv["missing_keys"] == ["OLLAMA_API_BASE"] + def test_function_to_dict(): print("testing function to dict for get current weather") @@ -338,6 +340,7 @@ def test_supports_function_calling(): assert ( litellm.supports_function_calling(model="azure/gpt-4-1106-preview") == True ) + assert litellm.supports_function_calling(model="groq/gemma-7b-it") == True assert ( litellm.supports_function_calling(model="anthropic.claude-instant-v1") == False diff --git a/litellm/utils.py b/litellm/utils.py index b94c22bc9..8440e361a 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4523,6 +4523,7 @@ def get_optional_params( and custom_llm_provider != "vertex_ai" and custom_llm_provider != "anyscale" and custom_llm_provider != "together_ai" + and custom_llm_provider != "groq" and custom_llm_provider != "mistral" and custom_llm_provider != "anthropic" and custom_llm_provider != "cohere_chat" @@ -5222,6 +5223,29 @@ def get_optional_params( optional_params["extra_body"] = ( extra_body # openai client supports `extra_body` param ) + elif custom_llm_provider == "groq": + supported_params = get_supported_openai_params( + model=model, custom_llm_provider=custom_llm_provider + ) + _check_valid_arg(supported_params=supported_params) + + if temperature is not None: + optional_params["temperature"] = temperature + if max_tokens is not None: + optional_params["max_tokens"] = max_tokens + if top_p is not None: + optional_params["top_p"] = top_p + if stream is not None: + optional_params["stream"] = stream + if stop is not None: + optional_params["stop"] = stop + if tools is not None: + optional_params["tools"] = tools + if tool_choice is not None: + optional_params["tool_choice"] = tool_choice + if response_format is not None: + optional_params["response_format"] = tool_choice + elif custom_llm_provider == "openrouter": supported_params = get_supported_openai_params( model=model, custom_llm_provider=custom_llm_provider @@ -5426,6 +5450,17 @@ def get_supported_openai_params(model: str, custom_llm_provider: str): "tools", "tool_choice", ] + elif custom_llm_provider == "groq": + return [ + "temperature", + "max_tokens", + "top_p", + "stream", + "stop", + "tools", + "tool_choice", + "response_format", + ] elif custom_llm_provider == "cohere": return [ "stream", diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index d8e750fde..4298ed28b 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -714,7 +714,8 @@ "input_cost_per_token": 0.00000070, "output_cost_per_token": 0.00000080, "litellm_provider": "groq", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "groq/mixtral-8x7b-32768": { "max_tokens": 32768, @@ -723,7 +724,8 @@ "input_cost_per_token": 0.00000027, "output_cost_per_token": 0.00000027, "litellm_provider": "groq", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "groq/gemma-7b-it": { "max_tokens": 8192, @@ -732,7 +734,8 @@ "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000010, "litellm_provider": "groq", - "mode": "chat" + "mode": "chat", + "supports_function_calling": true }, "claude-instant-1.2": { "max_tokens": 8191,