fix(proxy_server.py): fix get model info when litellm_model_id is set + move model analytics to free (#7886)

* fix(proxy_server.py): fix get model info when litellm_model_id is set Fixes https://github.com/BerriAI/litellm/issues/7873 * test(test_models.py): add test to ensure get model info on specific deployment has same value as all model info Fixes https://github.com/BerriAI/litellm/issues/7873 * fix(usage.tsx): make model analytics free Fixes @iqballx's feedback * fix(fix(invoke_handler.py):-fix-bedrock-error-chunk-parsing): return correct bedrock status code and error message if chunk in stream Improves bedrock stream error handling * fix(proxy_server.py): fix linting errors * test(test_auth_checks.py): remove redundant test * fix(proxy_server.py): fix linting errors * test: fix flaky test * test: fix test
2025-04-26 03:04:13 +00:00 · 2025-01-21 08:19:07 -08:00 · 2025-01-21 08:19:07 -08:00 · c8aa876785
commit c8aa876785
parent 0295f494b6
8 changed files with 146 additions and 131 deletions
--- a/tests/local_testing/test_embedding.py
+++ b/tests/local_testing/test_embedding.py
@ -642,19 +642,27 @@ def tgi_mock_post(*args, **kwargs):
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler


-@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
-async def test_hf_embedding_sentence_sim(sync_mode):
+@patch("litellm.llms.huggingface.chat.handler.async_get_hf_task_embedding_for_model")
+@patch("litellm.llms.huggingface.chat.handler.get_hf_task_embedding_for_model")
+@pytest.mark.parametrize("sync_mode", [True, False])
+async def test_hf_embedding_sentence_sim(
+    mock_async_get_hf_task_embedding_for_model,
+    mock_get_hf_task_embedding_for_model,
+    sync_mode,
+):
    try:
        # huggingface/microsoft/codebert-base
        # huggingface/facebook/bart-large
+        mock_get_hf_task_embedding_for_model.return_value = "sentence-similarity"
+        mock_async_get_hf_task_embedding_for_model.return_value = "sentence-similarity"
        if sync_mode is True:
            client = HTTPHandler(concurrent_limit=1)
        else:
            client = AsyncHTTPHandler(concurrent_limit=1)
        with patch.object(client, "post", side_effect=tgi_mock_post) as mock_client:
            data = {
-                "model": "huggingface/TaylorAI/bge-micro-v2",
+                "model": "huggingface/sentence-transformers/TaylorAI/bge-micro-v2",
                "input": ["good morning from litellm", "this is another item"],
                "client": client,
            }