(perf) Fix memory leak on /completions route (#8551)

* initial mem util test

* fix _cached_get_model_info_helper

* test memory usage

* fix tests

* fix mem usage
This commit is contained in:
Ishaan Jaff 2025-02-14 18:58:16 -08:00 committed by GitHub
parent 8f3d62180e
commit ef0db202d6
2 changed files with 244 additions and 1 deletions

View file

@ -4173,7 +4173,6 @@ def _get_max_position_embeddings(model_name: str) -> Optional[int]:
return None
@lru_cache_wrapper(maxsize=16)
def _cached_get_model_info_helper(
model: str, custom_llm_provider: Optional[str]
) -> ModelInfoBase: