Merge pull request #4549 from BerriAI/litellm_add_groq_whisper

[Feat] Add Groq/whisper-large-v3
2024-07-04 17:30:48 -07:00 · 2024-07-04 17:30:48 -07:00 · 340f7317e8
commit 340f7317e8
parent 1807c49f18 82067072a7
4 changed files with 48 additions and 10 deletions
--- a/docs/my-website/docs/providers/groq.md
+++ b/docs/my-website/docs/providers/groq.md
@ -158,3 +158,20 @@ if tool_calls:
    )  # get a new response from the model where it can see the function response
    print("second response\n", second_response)
 ```
+
+## Speech to Text - Whisper
+
+```python
+os.environ["GROQ_API_KEY"] = ""
+audio_file = open("/path/to/audio.mp3", "rb")
+
+transcript = litellm.transcription(
+    model="groq/whisper-large-v3",
+    file=audio_file,
+    prompt="Specify context or spelling",
+    temperature=0,
+    response_format="json"
+)
+
+print("response=", transcript)
+```
--- a/litellm/main.py
+++ b/litellm/main.py
@ -4367,6 +4367,8 @@ def transcription(

    model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base)  # type: ignore

+    if dynamic_api_key is not None:
+        api_key = dynamic_api_key
    optional_params = {
        "language": language,
        "prompt": prompt,
@ -4408,7 +4410,7 @@ def transcription(
            azure_ad_token=azure_ad_token,
            max_retries=max_retries,
        )
-    elif custom_llm_provider == "openai":
+    elif custom_llm_provider == "openai" or custom_llm_provider == "groq":
        api_base = (
            api_base
            or litellm.api_base
--- a/litellm/tests/gettysburg.wav
+++ b/litellm/tests/gettysburg.wav
--- a/litellm/tests/test_whisper.py
+++ b/litellm/tests/test_whisper.py
@ -1,16 +1,22 @@
 # What is this?
 ## Tests `litellm.transcription` endpoint. Outside litellm module b/c of audio file used in testing (it's ~700kb).

-import pytest
-import asyncio, time
-import aiohttp, traceback
-from openai import AsyncOpenAI
-import sys, os, dotenv
-from typing import Optional
-from dotenv import load_dotenv
-from litellm.integrations.custom_logger import CustomLogger
-import litellm
+import asyncio
 import logging
+import os
+import sys
+import time
+import traceback
+from typing import Optional
+
+import aiohttp
+import dotenv
+import pytest
+from dotenv import load_dotenv
+from openai import AsyncOpenAI
+
+import litellm
+from litellm.integrations.custom_logger import CustomLogger

 # Get the current directory of the file being run
 pwd = os.path.dirname(os.path.realpath(__file__))
@ -41,6 +47,19 @@ def test_transcription():
 # test_transcription()


+def test_transcription_groq():
+    litellm.set_verbose = True
+    transcript = litellm.transcription(
+        model="groq/whisper-large-v3",
+        file=audio_file,
+    )
+    print(f"response=: {transcript.model_dump()}")
+    print(f"hidden_params: {transcript._hidden_params}")
+
+
+# test_transcription()
+
+
 def test_transcription_azure():
    litellm.set_verbose = True
    transcript = litellm.transcription(