(Refactor) - Re use litellm.completion/litellm.embedding etc for health checks (#7455)

* add mode: realtime

* add _realtime_health_check

* test_realtime_health_check

* azure _realtime_health_check

* _realtime_health_check

* Realtime Models

* fix code quality

* delete OAI / Azure custom health check code

* simplest version of ahealth check

* update tests

* working health check post refactor

* working aspeech health check

* fix realtime health checks

* test_audio_transcription_health_check

* use get_audio_file_for_health_check

* test_text_completion_health_check

* ahealth_check

* simplify health check code

* update ahealth_check

* fix import

* fix unused imports

* fix ahealth_check

* fix local testing

* test_async_realtime_health_check
This commit is contained in:
Ishaan Jaff 2024-12-28 18:38:54 -08:00 committed by GitHub
parent 4e65722a00
commit 1e06ee3162
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 188 additions and 373 deletions

View file

@ -1,5 +1,4 @@
import hashlib
import os
import types
from typing import (
Any,
@ -1306,105 +1305,6 @@ class OpenAIChatCompletion(BaseLLM):
return HttpxBinaryResponseContent(response=response.response)
async def ahealth_check(
self,
model: Optional[str],
api_key: Optional[str],
timeout: float,
mode: str,
messages: Optional[list] = None,
input: Optional[list] = None,
prompt: Optional[str] = None,
organization: Optional[str] = None,
api_base: Optional[str] = None,
):
client = AsyncOpenAI(
api_key=api_key,
timeout=timeout,
organization=organization,
base_url=api_base,
)
if model is None and mode != "image_generation":
raise Exception("model is not set")
completion = None
if mode == "completion":
completion = await client.completions.with_raw_response.create(
model=model, # type: ignore
prompt=prompt, # type: ignore
)
elif mode == "chat":
if messages is None:
raise Exception("messages is not set")
completion = await client.chat.completions.with_raw_response.create(
model=model, # type: ignore
messages=messages, # type: ignore
)
elif mode == "embedding":
if input is None:
raise Exception("input is not set")
completion = await client.embeddings.with_raw_response.create(
model=model, # type: ignore
input=input, # type: ignore
)
elif mode == "image_generation":
if prompt is None:
raise Exception("prompt is not set")
completion = await client.images.with_raw_response.generate(
model=model, # type: ignore
prompt=prompt, # type: ignore
)
elif mode == "audio_transcription":
# Get the current directory of the file being run
pwd = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(
pwd, "../../../tests/gettysburg.wav"
) # proxy address
audio_file = open(file_path, "rb")
completion = await client.audio.transcriptions.with_raw_response.create(
file=audio_file,
model=model, # type: ignore
prompt=prompt, # type: ignore
)
elif mode == "audio_speech":
# Get the current directory of the file being run
completion = await client.audio.speech.with_raw_response.create(
model=model, # type: ignore
input=prompt, # type: ignore
voice="alloy",
)
elif mode == "realtime":
from litellm.realtime_api.main import _realtime_health_check
# create a websocket connection
await _realtime_health_check(
model=model or "",
api_key=api_key,
api_base=api_base or "https://api.openai.com/",
custom_llm_provider="openai",
)
return {}
else:
raise ValueError("mode not set, passed in mode: " + mode)
response = {}
if completion is None or not hasattr(completion, "headers"):
raise Exception("invalid completion response")
if (
completion.headers.get("x-ratelimit-remaining-requests", None) is not None
): # not provided for dall-e requests
response["x-ratelimit-remaining-requests"] = completion.headers[
"x-ratelimit-remaining-requests"
]
if completion.headers.get("x-ratelimit-remaining-tokens", None) is not None:
response["x-ratelimit-remaining-tokens"] = completion.headers[
"x-ratelimit-remaining-tokens"
]
return response
class OpenAIFilesAPI(BaseLLM):
"""