mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
93 lines
3.3 KiB
Python
93 lines
3.3 KiB
Python
# Start tracing memory allocations
|
|
import os
|
|
import tracemalloc
|
|
|
|
from fastapi import APIRouter
|
|
|
|
from litellm._logging import verbose_proxy_logger
|
|
|
|
router = APIRouter()
|
|
|
|
if os.environ.get("LITELLM_PROFILE", "false").lower() == "true":
|
|
tracemalloc.start(10)
|
|
|
|
@router.get("/memory-usage", include_in_schema=False)
|
|
async def memory_usage():
|
|
# Take a snapshot of the current memory usage
|
|
snapshot = tracemalloc.take_snapshot()
|
|
top_stats = snapshot.statistics("lineno")
|
|
verbose_proxy_logger.debug("TOP STATS: %s", top_stats)
|
|
|
|
# Get the top 50 memory usage lines
|
|
top_50 = top_stats[:50]
|
|
result = []
|
|
for stat in top_50:
|
|
result.append(f"{stat.traceback.format(limit=10)}: {stat.size / 1024} KiB")
|
|
|
|
return {"top_50_memory_usage": result}
|
|
|
|
@router.get("/memory-usage-in-mem-cache", include_in_schema=False)
|
|
async def memory_usage_in_mem_cache():
|
|
# returns the size of all in-memory caches on the proxy server
|
|
"""
|
|
1. user_api_key_cache
|
|
2. router_cache
|
|
3. proxy_logging_cache
|
|
4. internal_usage_cache
|
|
"""
|
|
from litellm.proxy.proxy_server import (
|
|
llm_router,
|
|
proxy_logging_obj,
|
|
user_api_key_cache,
|
|
)
|
|
|
|
num_items_in_user_api_key_cache = len(
|
|
user_api_key_cache.in_memory_cache.cache_dict
|
|
) + len(user_api_key_cache.in_memory_cache.ttl_dict)
|
|
num_items_in_llm_router_cache = len(
|
|
llm_router.cache.in_memory_cache.cache_dict
|
|
) + len(llm_router.cache.in_memory_cache.ttl_dict)
|
|
num_items_in_proxy_logging_obj_cache = len(
|
|
proxy_logging_obj.internal_usage_cache.in_memory_cache.cache_dict
|
|
) + len(proxy_logging_obj.internal_usage_cache.in_memory_cache.ttl_dict)
|
|
|
|
return {
|
|
"num_items_in_user_api_key_cache": num_items_in_user_api_key_cache,
|
|
"num_items_in_llm_router_cache": num_items_in_llm_router_cache,
|
|
"num_items_in_proxy_logging_obj_cache": num_items_in_proxy_logging_obj_cache,
|
|
}
|
|
|
|
|
|
@router.get("/otel-spans", include_in_schema=False)
|
|
async def get_otel_spans():
|
|
from litellm.integrations.opentelemetry import OpenTelemetry
|
|
from litellm.proxy.proxy_server import open_telemetry_logger
|
|
|
|
open_telemetry_logger: OpenTelemetry = open_telemetry_logger
|
|
otel_exporter = open_telemetry_logger.OTEL_EXPORTER
|
|
recorded_spans = otel_exporter.get_finished_spans()
|
|
|
|
print("Spans: ", recorded_spans) # noqa
|
|
|
|
most_recent_parent = None
|
|
most_recent_start_time = 1000000
|
|
spans_grouped_by_parent = {}
|
|
for span in recorded_spans:
|
|
if span.parent is not None:
|
|
parent_trace_id = span.parent.trace_id
|
|
if parent_trace_id not in spans_grouped_by_parent:
|
|
spans_grouped_by_parent[parent_trace_id] = []
|
|
spans_grouped_by_parent[parent_trace_id].append(span.name)
|
|
|
|
# check time of span
|
|
if span.start_time > most_recent_start_time:
|
|
most_recent_parent = parent_trace_id
|
|
most_recent_start_time = span.start_time
|
|
|
|
# these are otel spans - get the span name
|
|
span_names = [span.name for span in recorded_spans]
|
|
return {
|
|
"otel_spans": span_names,
|
|
"spans_grouped_by_parent": spans_grouped_by_parent,
|
|
"most_recent_parent": most_recent_parent,
|
|
}
|