(perf) Fix memory leak on /completions route (#8551)

* initial mem util test

* fix _cached_get_model_info_helper

* test memory usage

* fix tests

* fix mem usage
This commit is contained in:
Ishaan Jaff 2025-02-14 18:58:16 -08:00 committed by GitHub
parent 6ef6588d93
commit 753290e5b8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 244 additions and 1 deletions

View file

@ -4173,7 +4173,6 @@ def _get_max_position_embeddings(model_name: str) -> Optional[int]:
return None
@lru_cache_wrapper(maxsize=16)
def _cached_get_model_info_helper(
model: str, custom_llm_provider: Optional[str]
) -> ModelInfoBase: