litellm-mirror/litellm/llms/groq/chat/transformation.py
Krish Dholakia 1a4910f6c0 fix(health.md): add rerank model health check information (#7295)
* fix(health.md): add rerank model health check information

* build(model_prices_and_context_window.json): add gemini 2.0 for google ai studio - pricing + commercial rate limits

* build(model_prices_and_context_window.json): add gemini-2.0 supports audio output = true

* docs(team_model_add.md): clarify allowing teams to add models is an enterprise feature

* fix(o1_transformation.py): add support for 'n', 'response_format' and 'stop' params for o1 and 'stream_options' param for o1-mini

* build(model_prices_and_context_window.json): add 'supports_system_message' to supporting openai models

needed as o1-preview, and o1-mini models don't support 'system message

* fix(o1_transformation.py): translate system message based on if o1 model supports it

* fix(o1_transformation.py): return 'stream' param support if o1-mini/o1-preview

o1 currently doesn't support streaming, but the other model versions do

Fixes https://github.com/BerriAI/litellm/issues/7292

* fix(o1_transformation.py): return tool calling/response_format in supported params if model map says so

Fixes https://github.com/BerriAI/litellm/issues/7292

* fix: fix linting errors

* fix: update '_transform_messages'

* fix(o1_transformation.py): fix provider passed for supported param checks

* test(base_llm_unit_tests.py): skip test if api takes >5s to respond

* fix(utils.py): return false in 'supports_factory' if can't find value

* fix(o1_transformation.py): always return stream + stream_options as supported params + handle stream options being passed in for azure o1

* feat(openai.py): support stream faking natively in openai handler

Allows o1 calls to be faked for just the "o1" model, allows native streaming for o1-mini, o1-preview

 Fixes https://github.com/BerriAI/litellm/issues/7292

* fix(openai.py): use inference param instead of original optional param
2024-12-18 19:18:10 -08:00

159 lines
5.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Translate from OpenAI's `/v1/chat/completions` to Groq's `/v1/chat/completions`
"""
import json
import types
from typing import List, Optional, Tuple, Union
from pydantic import BaseModel
import litellm
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionAssistantMessage,
ChatCompletionToolParam,
ChatCompletionToolParamFunctionChunk,
)
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
class GroqChatConfig(OpenAIGPTConfig):
frequency_penalty: Optional[int] = None
function_call: Optional[Union[str, dict]] = None
functions: Optional[list] = None
logit_bias: Optional[dict] = None
max_tokens: Optional[int] = None
n: Optional[int] = None
presence_penalty: Optional[int] = None
stop: Optional[Union[str, list]] = None
temperature: Optional[int] = None
top_p: Optional[int] = None
response_format: Optional[dict] = None
tools: Optional[list] = None
tool_choice: Optional[Union[str, dict]] = None
def __init__(
self,
frequency_penalty: Optional[int] = None,
function_call: Optional[Union[str, dict]] = None,
functions: Optional[list] = None,
logit_bias: Optional[dict] = None,
max_tokens: Optional[int] = None,
n: Optional[int] = None,
presence_penalty: Optional[int] = None,
stop: Optional[Union[str, list]] = None,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
response_format: Optional[dict] = None,
tools: Optional[list] = None,
tool_choice: Optional[Union[str, dict]] = None,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return super().get_config()
def _transform_messages(self, messages: List[AllMessageValues], model: str) -> List:
for idx, message in enumerate(messages):
"""
1. Don't pass 'null' function_call assistant message to groq - https://github.com/BerriAI/litellm/issues/5839
"""
if isinstance(message, BaseModel):
_message = message.model_dump()
else:
_message = message
assistant_message = _message.get("role") == "assistant"
if assistant_message:
new_message = ChatCompletionAssistantMessage(role="assistant")
for k, v in _message.items():
if v is not None:
new_message[k] = v # type: ignore
messages[idx] = new_message
return messages
def _get_openai_compatible_provider_info(
self, api_base: Optional[str], api_key: Optional[str]
) -> Tuple[Optional[str], Optional[str]]:
# groq is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.groq.com/openai/v1
api_base = (
api_base
or get_secret_str("GROQ_API_BASE")
or "https://api.groq.com/openai/v1"
) # type: ignore
dynamic_api_key = api_key or get_secret_str("GROQ_API_KEY")
return api_base, dynamic_api_key
def _should_fake_stream(self, optional_params: dict) -> bool:
"""
Groq doesn't support 'response_format' while streaming
"""
if optional_params.get("response_format") is not None:
return True
return False
def _create_json_tool_call_for_response_format(
self,
json_schema: dict,
):
"""
Handles creating a tool call for getting responses in JSON format.
Args:
json_schema (Optional[dict]): The JSON schema the response should be in
Returns:
AnthropicMessagesTool: The tool call to send to Anthropic API to get responses in JSON format
"""
return ChatCompletionToolParam(
type="function",
function=ChatCompletionToolParamFunctionChunk(
name="json_tool_call",
parameters=json_schema,
),
)
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool = False,
) -> dict:
_response_format = non_default_params.get("response_format")
if _response_format is not None and isinstance(_response_format, dict):
json_schema: Optional[dict] = None
if "response_schema" in _response_format:
json_schema = _response_format["response_schema"]
elif "json_schema" in _response_format:
json_schema = _response_format["json_schema"]["schema"]
"""
When using tools in this way: - https://docs.anthropic.com/en/docs/build-with-claude/tool-use#json-mode
- You usually want to provide a single tool
- You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool
- Remember that the model will pass the input to the tool, so the name of the tool and description should be from the models perspective.
"""
if json_schema is not None:
_tool_choice = {
"type": "function",
"function": {"name": "json_tool_call"},
}
_tool = self._create_json_tool_call_for_response_format(
json_schema=json_schema,
)
optional_params["tools"] = [_tool]
optional_params["tool_choice"] = _tool_choice
optional_params["json_mode"] = True
non_default_params.pop("response_format", None)
return super().map_openai_params(
non_default_params, optional_params, model, drop_params
)