diff --git a/litellm/__init__.py b/litellm/__init__.py index cc67cd00c..d018e7e6c 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -829,6 +829,7 @@ from .llms.openai import ( MistralConfig, MistralEmbeddingConfig, DeepInfraConfig, + GroqConfig, AzureAIStudioConfig, ) from .llms.nvidia_nim import NvidiaNimConfig diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py index da8189c4a..f905d17a3 100644 --- a/litellm/llms/azure.py +++ b/litellm/llms/azure.py @@ -19,6 +19,7 @@ from typing import ( import httpx # type: ignore import requests from openai import AsyncAzureOpenAI, AzureOpenAI +from pydantic import BaseModel from typing_extensions import overload import litellm @@ -1534,7 +1535,12 @@ class AzureChatCompletion(BaseLLM): response = azure_client.audio.transcriptions.create( **data, timeout=timeout # type: ignore ) - stringified_response = response.model_dump() + + if isinstance(response, BaseModel): + stringified_response = response.model_dump() + else: + stringified_response = TranscriptionResponse(text=response).model_dump() + ## LOGGING logging_obj.post_call( input=audio_file.name, @@ -1587,7 +1593,10 @@ class AzureChatCompletion(BaseLLM): **data, timeout=timeout ) # type: ignore - stringified_response = response.model_dump() + if isinstance(response, BaseModel): + stringified_response = response.model_dump() + else: + stringified_response = TranscriptionResponse(text=response).model_dump() ## LOGGING logging_obj.post_call( diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index 25641c4b8..6325c77dc 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -348,6 +348,104 @@ class DeepInfraConfig: return optional_params +class GroqConfig: + """ + Reference: https://deepinfra.com/docs/advanced/openai_api + + The class `DeepInfra` provides configuration for the DeepInfra's Chat Completions API interface. Below are the parameters: + """ + + frequency_penalty: Optional[int] = None + function_call: Optional[Union[str, dict]] = None + functions: Optional[list] = None + logit_bias: Optional[dict] = None + max_tokens: Optional[int] = None + n: Optional[int] = None + presence_penalty: Optional[int] = None + stop: Optional[Union[str, list]] = None + temperature: Optional[int] = None + top_p: Optional[int] = None + response_format: Optional[dict] = None + tools: Optional[list] = None + tool_choice: Optional[Union[str, dict]] = None + + def __init__( + self, + frequency_penalty: Optional[int] = None, + function_call: Optional[Union[str, dict]] = None, + functions: Optional[list] = None, + logit_bias: Optional[dict] = None, + max_tokens: Optional[int] = None, + n: Optional[int] = None, + presence_penalty: Optional[int] = None, + stop: Optional[Union[str, list]] = None, + temperature: Optional[int] = None, + top_p: Optional[int] = None, + response_format: Optional[dict] = None, + tools: Optional[list] = None, + tool_choice: Optional[Union[str, dict]] = None, + ) -> None: + locals_ = locals().copy() + for key, value in locals_.items(): + if key != "self" and value is not None: + setattr(self.__class__, key, value) + + @classmethod + def get_config(cls): + return { + k: v + for k, v in cls.__dict__.items() + if not k.startswith("__") + and not isinstance( + v, + ( + types.FunctionType, + types.BuiltinFunctionType, + classmethod, + staticmethod, + ), + ) + and v is not None + } + + def get_supported_openai_params_stt(self): + return [ + "prompt", + "response_format", + "temperature", + "language", + ] + + def get_supported_openai_response_formats_stt(self) -> List[str]: + return ["json", "verbose_json", "text"] + + def map_openai_params_stt( + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, + ) -> dict: + response_formats = self.get_supported_openai_response_formats_stt() + for param, value in non_default_params.items(): + if param == "response_format": + if value in response_formats: + optional_params[param] = value + else: + if litellm.drop_params is True or drop_params is True: + pass + else: + raise litellm.utils.UnsupportedParamsError( + message="Groq doesn't support response_format={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format( + value + ), + status_code=400, + ) + else: + optional_params[param] = value + return optional_params + + class OpenAIConfig: """ Reference: https://platform.openai.com/docs/api-reference/chat/create @@ -1360,7 +1458,11 @@ class OpenAIChatCompletion(BaseLLM): **data, timeout=timeout # type: ignore ) - stringified_response = response.model_dump() + if isinstance(response, BaseModel): + stringified_response = response.model_dump() + else: + stringified_response = TranscriptionResponse(text=response).model_dump() + ## LOGGING logging_obj.post_call( input=audio_file.name, @@ -1400,7 +1502,10 @@ class OpenAIChatCompletion(BaseLLM): timeout=timeout, ) logging_obj.model_call_details["response_headers"] = headers - stringified_response = response.model_dump() + if isinstance(response, BaseModel): + stringified_response = response.model_dump() + else: + stringified_response = TranscriptionResponse(text=response).model_dump() ## LOGGING logging_obj.post_call( input=audio_file.name, diff --git a/litellm/main.py b/litellm/main.py index 097c86707..1d949e3bd 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -61,6 +61,7 @@ from litellm.utils import ( get_llm_provider, get_optional_params_embeddings, get_optional_params_image_gen, + get_optional_params_transcription, get_secret, mock_completion_streaming_obj, read_config_args, @@ -4279,7 +4280,7 @@ def image_generation( @client -async def atranscription(*args, **kwargs): +async def atranscription(*args, **kwargs) -> TranscriptionResponse: """ Calls openai + azure whisper endpoints. @@ -4304,9 +4305,9 @@ async def atranscription(*args, **kwargs): # Await normally init_response = await loop.run_in_executor(None, func_with_context) - if isinstance(init_response, dict) or isinstance( - init_response, TranscriptionResponse - ): ## CACHING SCENARIO + if isinstance(init_response, dict): + response = TranscriptionResponse(**init_response) + elif isinstance(init_response, TranscriptionResponse): ## CACHING SCENARIO response = init_response elif asyncio.iscoroutine(init_response): response = await init_response @@ -4346,7 +4347,7 @@ def transcription( litellm_logging_obj: Optional[LiteLLMLoggingObj] = None, custom_llm_provider=None, **kwargs, -): +) -> TranscriptionResponse: """ Calls openai + azure whisper endpoints. @@ -4358,6 +4359,7 @@ def transcription( proxy_server_request = kwargs.get("proxy_server_request", None) model_info = kwargs.get("model_info", None) metadata = kwargs.get("metadata", {}) + drop_params = kwargs.get("drop_params", None) client: Optional[ Union[ openai.AsyncOpenAI, @@ -4379,12 +4381,22 @@ def transcription( if dynamic_api_key is not None: api_key = dynamic_api_key - optional_params = { - "language": language, - "prompt": prompt, - "response_format": response_format, - "temperature": None, # openai defaults this to 0 - } + + optional_params = get_optional_params_transcription( + model=model, + language=language, + prompt=prompt, + response_format=response_format, + temperature=temperature, + custom_llm_provider=custom_llm_provider, + drop_params=drop_params, + ) + # optional_params = { + # "language": language, + # "prompt": prompt, + # "response_format": response_format, + # "temperature": None, # openai defaults this to 0 + # } if custom_llm_provider == "azure": # azure configs diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html deleted file mode 100644 index dc2d75de1..000000000 --- a/litellm/proxy/_experimental/out/404.html +++ /dev/null @@ -1 +0,0 @@ -