fix(caching.py): dual cache async_batch_get_cache fix + testing

this fixes a bug in usage-based-routing-v2 which was caused b/c of how the result was being returned from dual cache async_batch_get_cache. it also adds unit testing for that function (and it's sync equivalent)
2024-04-19 15:03:25 -07:00 · 2024-04-19 15:03:25 -07:00 · 01a1a8f731
commit 01a1a8f731
parent 3c6b6355c7
8 changed files with 149 additions and 10 deletions
--- a/proxy_server_config.yaml
+++ b/proxy_server_config.yaml
@ -55,6 +55,20 @@ model_list:
      api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
      stream_timeout: 0.001
      rpm: 1
+  - model_name: fake-openai-endpoint-3
+    litellm_params:
+      model: openai/my-fake-model
+      api_key: my-fake-key
+      api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
+      stream_timeout: 0.001
+      rpm: 10
+  - model_name: fake-openai-endpoint-3
+    litellm_params:
+      model: openai/my-fake-model-2
+      api_key: my-fake-key
+      api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
+      stream_timeout: 0.001
+      rpm: 10
  - model_name: "*"
    litellm_params:
      model: openai/*