diff --git a/docs/my-website/docs/providers/groq.md b/docs/my-website/docs/providers/groq.md index 70b9f00d6..bcca20b5d 100644 --- a/docs/my-website/docs/providers/groq.md +++ b/docs/my-website/docs/providers/groq.md @@ -157,4 +157,21 @@ if tool_calls: model="groq/llama2-70b-4096", messages=messages ) # get a new response from the model where it can see the function response print("second response\n", second_response) +``` + +## Speech to Text - Whisper + +```python +os.environ["GROQ_API_KEY"] = "" +audio_file = open("/path/to/audio.mp3", "rb") + +transcript = litellm.transcription( + model="groq/whisper-large-v3", + file=audio_file, + prompt="Specify context or spelling", + temperature=0, + response_format="json" +) + +print("response=", transcript) ``` \ No newline at end of file diff --git a/litellm/main.py b/litellm/main.py index 1da31abd7..ecd03f1b6 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -4367,6 +4367,8 @@ def transcription( model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base) # type: ignore + if dynamic_api_key is not None: + api_key = dynamic_api_key optional_params = { "language": language, "prompt": prompt, @@ -4408,7 +4410,7 @@ def transcription( azure_ad_token=azure_ad_token, max_retries=max_retries, ) - elif custom_llm_provider == "openai": + elif custom_llm_provider == "openai" or custom_llm_provider == "groq": api_base = ( api_base or litellm.api_base diff --git a/litellm/tests/gettysburg.wav b/litellm/tests/gettysburg.wav new file mode 100644 index 000000000..9690f521e Binary files /dev/null and b/litellm/tests/gettysburg.wav differ diff --git a/tests/test_whisper.py b/litellm/tests/test_whisper.py similarity index 92% rename from tests/test_whisper.py rename to litellm/tests/test_whisper.py index 09819f796..d0f1ed9eb 100644 --- a/tests/test_whisper.py +++ b/litellm/tests/test_whisper.py @@ -1,16 +1,22 @@ # What is this? ## Tests `litellm.transcription` endpoint. Outside litellm module b/c of audio file used in testing (it's ~700kb). -import pytest -import asyncio, time -import aiohttp, traceback -from openai import AsyncOpenAI -import sys, os, dotenv -from typing import Optional -from dotenv import load_dotenv -from litellm.integrations.custom_logger import CustomLogger -import litellm +import asyncio import logging +import os +import sys +import time +import traceback +from typing import Optional + +import aiohttp +import dotenv +import pytest +from dotenv import load_dotenv +from openai import AsyncOpenAI + +import litellm +from litellm.integrations.custom_logger import CustomLogger # Get the current directory of the file being run pwd = os.path.dirname(os.path.realpath(__file__)) @@ -41,6 +47,19 @@ def test_transcription(): # test_transcription() +def test_transcription_groq(): + litellm.set_verbose = True + transcript = litellm.transcription( + model="groq/whisper-large-v3", + file=audio_file, + ) + print(f"response=: {transcript.model_dump()}") + print(f"hidden_params: {transcript._hidden_params}") + + +# test_transcription() + + def test_transcription_azure(): litellm.set_verbose = True transcript = litellm.transcription(