forked from phoenix/litellm-mirror
LiteLLM Minor Fixes & Improvements (10/10/2024) (#6158)
* refactor(vertex_ai_partner_models/anthropic): refactor anthropic to use partner model logic * fix(vertex_ai/): support passing custom api base to partner models Fixes https://github.com/BerriAI/litellm/issues/4317 * fix(proxy_server.py): Fix prometheus premium user check logic * docs(prometheus.md): update quick start docs * fix(custom_llm.py): support passing dynamic api key + api base * fix(realtime_api/main.py): Add request/response logging for realtime api endpoints Closes https://github.com/BerriAI/litellm/issues/6081 * feat(openai/realtime): add openai realtime api logging Closes https://github.com/BerriAI/litellm/issues/6081 * fix(realtime_streaming.py): fix linting errors * fix(realtime_streaming.py): fix linting errors * fix: fix linting errors * fix pattern match router * Add literalai in the sidebar observability category (#6163) * fix: add literalai in the sidebar * fix: typo * update (#6160) * Feat: Add Langtrace integration (#5341) * Feat: Add Langtrace integration * add langtrace service name * fix timestamps for traces * add tests * Discard Callback + use existing otel logger * cleanup * remove print statments * remove callback * add docs * docs * add logging docs * format logging * remove emoji and add litellm proxy example * format logging * format `logging.md` * add langtrace docs to logging.md * sync conflict * docs fix * (perf) move s3 logging to Batch logging + async [94% faster perf under 100 RPS on 1 litellm instance] (#6165) * fix move s3 to use customLogger * add basic s3 logging test * add s3 to custom logger compatible * use batch logger for s3 * s3 set flush interval and batch size * fix s3 logging * add notes on s3 logging * fix s3 logging * add basic s3 logging test * fix s3 type errors * add test for sync logging on s3 * fix: fix to debug log --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Willy Douhard <willy.douhard@gmail.com> Co-authored-by: yujonglee <yujonglee.dev@gmail.com> Co-authored-by: Ali Waleed <ali@scale3labs.com>
This commit is contained in:
parent
9db4ccca9f
commit
11f9df923a
28 changed files with 966 additions and 760 deletions
|
@ -116,7 +116,6 @@ lagoLogger = None
|
|||
dataDogLogger = None
|
||||
prometheusLogger = None
|
||||
dynamoLogger = None
|
||||
s3Logger = None
|
||||
genericAPILogger = None
|
||||
clickHouseLogger = None
|
||||
greenscaleLogger = None
|
||||
|
@ -1346,36 +1345,6 @@ class Logging:
|
|||
user_id=kwargs.get("user", None),
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
if callback == "s3":
|
||||
global s3Logger
|
||||
if s3Logger is None:
|
||||
s3Logger = S3Logger()
|
||||
if self.stream:
|
||||
if "complete_streaming_response" in self.model_call_details:
|
||||
print_verbose(
|
||||
"S3Logger Logger: Got Stream Event - Completed Stream Response"
|
||||
)
|
||||
s3Logger.log_event(
|
||||
kwargs=self.model_call_details,
|
||||
response_obj=self.model_call_details[
|
||||
"complete_streaming_response"
|
||||
],
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
else:
|
||||
print_verbose(
|
||||
"S3Logger Logger: Got Stream Event - No complete stream response as yet"
|
||||
)
|
||||
else:
|
||||
s3Logger.log_event(
|
||||
kwargs=self.model_call_details,
|
||||
response_obj=result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
if (
|
||||
callback == "openmeter"
|
||||
and self.model_call_details.get("litellm_params", {}).get(
|
||||
|
@ -2245,7 +2214,7 @@ def set_callbacks(callback_list, function_id=None):
|
|||
"""
|
||||
Globally sets the callback client
|
||||
"""
|
||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger
|
||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, logfireLogger, dynamoLogger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger
|
||||
|
||||
try:
|
||||
for callback in callback_list:
|
||||
|
@ -2319,8 +2288,6 @@ def set_callbacks(callback_list, function_id=None):
|
|||
dataDogLogger = DataDogLogger()
|
||||
elif callback == "dynamodb":
|
||||
dynamoLogger = DyanmoDBLogger()
|
||||
elif callback == "s3":
|
||||
s3Logger = S3Logger()
|
||||
elif callback == "wandb":
|
||||
weightsBiasesLogger = WeightsBiasesLogger()
|
||||
elif callback == "logfire":
|
||||
|
@ -2357,7 +2324,6 @@ def _init_custom_logger_compatible_class(
|
|||
llm_router: Optional[
|
||||
Any
|
||||
], # expect litellm.Router, but typing errors due to circular import
|
||||
premium_user: Optional[bool] = None,
|
||||
) -> Optional[CustomLogger]:
|
||||
if logging_integration == "lago":
|
||||
for callback in _in_memory_loggers:
|
||||
|
@ -2404,17 +2370,9 @@ def _init_custom_logger_compatible_class(
|
|||
if isinstance(callback, PrometheusLogger):
|
||||
return callback # type: ignore
|
||||
|
||||
if premium_user:
|
||||
_prometheus_logger = PrometheusLogger()
|
||||
_in_memory_loggers.append(_prometheus_logger)
|
||||
return _prometheus_logger # type: ignore
|
||||
elif premium_user is False:
|
||||
verbose_logger.warning(
|
||||
f"🚨🚨🚨 Prometheus Metrics is on LiteLLM Enterprise\n🚨 {CommonProxyErrors.not_premium_user.value}"
|
||||
)
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
_prometheus_logger = PrometheusLogger()
|
||||
_in_memory_loggers.append(_prometheus_logger)
|
||||
return _prometheus_logger # type: ignore
|
||||
elif logging_integration == "datadog":
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, DataDogLogger):
|
||||
|
@ -2423,6 +2381,14 @@ def _init_custom_logger_compatible_class(
|
|||
_datadog_logger = DataDogLogger()
|
||||
_in_memory_loggers.append(_datadog_logger)
|
||||
return _datadog_logger # type: ignore
|
||||
elif logging_integration == "s3":
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, S3Logger):
|
||||
return callback # type: ignore
|
||||
|
||||
_s3_logger = S3Logger()
|
||||
_in_memory_loggers.append(_s3_logger)
|
||||
return _s3_logger # type: ignore
|
||||
elif logging_integration == "gcs_bucket":
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, GCSBucketLogger):
|
||||
|
@ -2589,6 +2555,10 @@ def get_custom_logger_compatible_class(
|
|||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, PrometheusLogger):
|
||||
return callback
|
||||
elif logging_integration == "s3":
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, S3Logger):
|
||||
return callback
|
||||
elif logging_integration == "datadog":
|
||||
for callback in _in_memory_loggers:
|
||||
if isinstance(callback, DataDogLogger):
|
||||
|
|
112
litellm/litellm_core_utils/realtime_streaming.py
Normal file
112
litellm/litellm_core_utils/realtime_streaming.py
Normal file
|
@ -0,0 +1,112 @@
|
|||
"""
|
||||
async with websockets.connect( # type: ignore
|
||||
url,
|
||||
extra_headers={
|
||||
"api-key": api_key, # type: ignore
|
||||
},
|
||||
) as backend_ws:
|
||||
forward_task = asyncio.create_task(
|
||||
forward_messages(websocket, backend_ws)
|
||||
)
|
||||
|
||||
try:
|
||||
while True:
|
||||
message = await websocket.receive_text()
|
||||
await backend_ws.send(message)
|
||||
except websockets.exceptions.ConnectionClosed: # type: ignore
|
||||
forward_task.cancel()
|
||||
finally:
|
||||
if not forward_task.done():
|
||||
forward_task.cancel()
|
||||
try:
|
||||
await forward_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import concurrent.futures
|
||||
import traceback
|
||||
from asyncio import Task
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from .litellm_logging import Logging as LiteLLMLogging
|
||||
|
||||
# Create a thread pool with a maximum of 10 threads
|
||||
executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
|
||||
|
||||
|
||||
class RealTimeStreaming:
|
||||
def __init__(
|
||||
self,
|
||||
websocket: Any,
|
||||
backend_ws: Any,
|
||||
logging_obj: Optional[LiteLLMLogging] = None,
|
||||
):
|
||||
self.websocket = websocket
|
||||
self.backend_ws = backend_ws
|
||||
self.logging_obj = logging_obj
|
||||
self.messages: List = []
|
||||
self.input_message: Dict = {}
|
||||
|
||||
def store_message(self, message: Union[str, bytes]):
|
||||
"""Store message in list"""
|
||||
self.messages.append(message)
|
||||
|
||||
def store_input(self, message: dict):
|
||||
"""Store input message"""
|
||||
self.input_message = message
|
||||
if self.logging_obj:
|
||||
self.logging_obj.pre_call(input=message, api_key="")
|
||||
|
||||
async def log_messages(self):
|
||||
"""Log messages in list"""
|
||||
if self.logging_obj:
|
||||
## ASYNC LOGGING
|
||||
# Create an event loop for the new thread
|
||||
asyncio.create_task(self.logging_obj.async_success_handler(self.messages))
|
||||
## SYNC LOGGING
|
||||
executor.submit(self.logging_obj.success_handler(self.messages))
|
||||
|
||||
async def backend_to_client_send_messages(self):
|
||||
import websockets
|
||||
|
||||
try:
|
||||
while True:
|
||||
message = await self.backend_ws.recv()
|
||||
await self.websocket.send_text(message)
|
||||
|
||||
## LOGGING
|
||||
self.store_message(message)
|
||||
except websockets.exceptions.ConnectionClosed: # type: ignore
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
await self.log_messages()
|
||||
|
||||
async def client_ack_messages(self):
|
||||
try:
|
||||
while True:
|
||||
message = await self.websocket.receive_text()
|
||||
## LOGGING
|
||||
self.store_input(message=message)
|
||||
## FORWARD TO BACKEND
|
||||
await self.backend_ws.send(message)
|
||||
except self.websockets.exceptions.ConnectionClosed: # type: ignore
|
||||
pass
|
||||
|
||||
async def bidirectional_forward(self):
|
||||
|
||||
forward_task = asyncio.create_task(self.backend_to_client_send_messages())
|
||||
try:
|
||||
await self.client_ack_messages()
|
||||
except self.websockets.exceptions.ConnectionClosed: # type: ignore
|
||||
forward_task.cancel()
|
||||
finally:
|
||||
if not forward_task.done():
|
||||
forward_task.cancel()
|
||||
try:
|
||||
await forward_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
Loading…
Add table
Add a link
Reference in a new issue