fix(proxy_server.py): fix get model info when litellm_model_id is set + move model analytics to free (#7886)

* fix(proxy_server.py): fix get model info when litellm_model_id is set

Fixes https://github.com/BerriAI/litellm/issues/7873

* test(test_models.py): add test to ensure get model info on specific deployment has same value as all model info

Fixes https://github.com/BerriAI/litellm/issues/7873

* fix(usage.tsx): make model analytics free

Fixes @iqballx's feedback

* fix(fix(invoke_handler.py):-fix-bedrock-error-chunk-parsing): return correct bedrock status code and error message if chunk in stream

Improves bedrock stream error handling

* fix(proxy_server.py): fix linting errors

* test(test_auth_checks.py): remove redundant test

* fix(proxy_server.py): fix linting errors

* test: fix flaky test

* test: fix test
This commit is contained in:
Krish Dholakia 2025-01-21 08:19:07 -08:00 committed by GitHub
parent 0295f494b6
commit c8aa876785
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 146 additions and 131 deletions

View file

@ -642,19 +642,27 @@ def tgi_mock_post(*args, **kwargs):
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_hf_embedding_sentence_sim(sync_mode):
@patch("litellm.llms.huggingface.chat.handler.async_get_hf_task_embedding_for_model")
@patch("litellm.llms.huggingface.chat.handler.get_hf_task_embedding_for_model")
@pytest.mark.parametrize("sync_mode", [True, False])
async def test_hf_embedding_sentence_sim(
mock_async_get_hf_task_embedding_for_model,
mock_get_hf_task_embedding_for_model,
sync_mode,
):
try:
# huggingface/microsoft/codebert-base
# huggingface/facebook/bart-large
mock_get_hf_task_embedding_for_model.return_value = "sentence-similarity"
mock_async_get_hf_task_embedding_for_model.return_value = "sentence-similarity"
if sync_mode is True:
client = HTTPHandler(concurrent_limit=1)
else:
client = AsyncHTTPHandler(concurrent_limit=1)
with patch.object(client, "post", side_effect=tgi_mock_post) as mock_client:
data = {
"model": "huggingface/TaylorAI/bge-micro-v2",
"model": "huggingface/sentence-transformers/TaylorAI/bge-micro-v2",
"input": ["good morning from litellm", "this is another item"],
"client": client,
}