Merge pull request #4549 from BerriAI/litellm_add_groq_whisper

[Feat] Add Groq/whisper-large-v3
This commit is contained in:
Ishaan Jaff 2024-07-04 17:30:48 -07:00 committed by GitHub
commit 340f7317e8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 48 additions and 10 deletions

View file

@ -158,3 +158,20 @@ if tool_calls:
) # get a new response from the model where it can see the function response
print("second response\n", second_response)
```
## Speech to Text - Whisper
```python
os.environ["GROQ_API_KEY"] = ""
audio_file = open("/path/to/audio.mp3", "rb")
transcript = litellm.transcription(
model="groq/whisper-large-v3",
file=audio_file,
prompt="Specify context or spelling",
temperature=0,
response_format="json"
)
print("response=", transcript)
```

View file

@ -4367,6 +4367,8 @@ def transcription(
model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base) # type: ignore
if dynamic_api_key is not None:
api_key = dynamic_api_key
optional_params = {
"language": language,
"prompt": prompt,
@ -4408,7 +4410,7 @@ def transcription(
azure_ad_token=azure_ad_token,
max_retries=max_retries,
)
elif custom_llm_provider == "openai":
elif custom_llm_provider == "openai" or custom_llm_provider == "groq":
api_base = (
api_base
or litellm.api_base

Binary file not shown.

View file

@ -1,16 +1,22 @@
# What is this?
## Tests `litellm.transcription` endpoint. Outside litellm module b/c of audio file used in testing (it's ~700kb).
import pytest
import asyncio, time
import aiohttp, traceback
from openai import AsyncOpenAI
import sys, os, dotenv
from typing import Optional
from dotenv import load_dotenv
from litellm.integrations.custom_logger import CustomLogger
import litellm
import asyncio
import logging
import os
import sys
import time
import traceback
from typing import Optional
import aiohttp
import dotenv
import pytest
from dotenv import load_dotenv
from openai import AsyncOpenAI
import litellm
from litellm.integrations.custom_logger import CustomLogger
# Get the current directory of the file being run
pwd = os.path.dirname(os.path.realpath(__file__))
@ -41,6 +47,19 @@ def test_transcription():
# test_transcription()
def test_transcription_groq():
litellm.set_verbose = True
transcript = litellm.transcription(
model="groq/whisper-large-v3",
file=audio_file,
)
print(f"response=: {transcript.model_dump()}")
print(f"hidden_params: {transcript._hidden_params}")
# test_transcription()
def test_transcription_azure():
litellm.set_verbose = True
transcript = litellm.transcription(