mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
add bedrock llama vision support + cohere / infinity rerank - 'return_documents' support (#8684)
* build(model_prices_and_context_window.json): mark bedrock llama as supporting vision based on docs * Add price for Cerebras llama3.3-70b (#8676) * docs(readme.md): fix contributing docs point people to new mock directory testing structure s/o @vibhavbhat * build: update contributing readme * docs(readme.md): improve docs * docs(readme.md): cleanup readme on tests/ * docs(README.md): cleanup doc * feat(infinity/): support returning documents when return_documents=True * test(test_rerank.py): add e2e testing for cohere rerank * fix: fix linting errors * fix(together_ai/): fix together ai transformation * fix: fix linting error * fix: fix linting errors * fix: fix linting errors * test: mark cohere as flaky * build: fix model supports check * test: fix test * test: mark flaky test * fix: fix test * test: fix test --------- Co-authored-by: Yury Koleda <fut.wrk@gmail.com>
This commit is contained in:
parent
b682dc4ec8
commit
251467a525
13 changed files with 206 additions and 31 deletions
19
README.md
19
README.md
|
@ -343,25 +343,32 @@ curl 'http://0.0.0.0:4000/key/generate' \
|
|||
To contribute: Clone the repo locally -> Make a change -> Submit a PR with the change.
|
||||
|
||||
Here's how to modify the repo locally:
|
||||
|
||||
Step 1: Clone the repo
|
||||
|
||||
```
|
||||
git clone https://github.com/BerriAI/litellm.git
|
||||
```
|
||||
|
||||
Step 2: Navigate into the project, and install dependencies:
|
||||
Step 2: Install dependencies:
|
||||
|
||||
```
|
||||
cd litellm
|
||||
poetry install -E extra_proxy -E proxy
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
Step 3: Test your change:
|
||||
|
||||
a. Add a pytest test within `tests/litellm/`
|
||||
|
||||
This folder follows the same directory structure as `litellm/`.
|
||||
|
||||
If a corresponding test file does not exist, create one.
|
||||
|
||||
b. Run the test
|
||||
|
||||
```
|
||||
cd tests # pwd: Documents/litellm/litellm/tests
|
||||
poetry run flake8
|
||||
poetry run pytest .
|
||||
cd tests/litellm # pwd: Documents/litellm/litellm/tests/litellm
|
||||
pytest /path/to/test_file.py
|
||||
```
|
||||
|
||||
Step 4: Submit a PR with your changes! 🚀
|
||||
|
|
|
@ -13,8 +13,14 @@ import litellm
|
|||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.llms.cohere.rerank.transformation import CohereRerankConfig
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.rerank import RerankBilledUnits, RerankResponseMeta, RerankTokens
|
||||
from litellm.types.utils import RerankResponse
|
||||
from litellm.types.rerank import (
|
||||
RerankBilledUnits,
|
||||
RerankResponse,
|
||||
RerankResponseDocument,
|
||||
RerankResponseMeta,
|
||||
RerankResponseResult,
|
||||
RerankTokens,
|
||||
)
|
||||
|
||||
from .common_utils import InfinityError
|
||||
|
||||
|
@ -88,13 +94,23 @@ class InfinityRerankConfig(CohereRerankConfig):
|
|||
)
|
||||
rerank_meta = RerankResponseMeta(billed_units=_billed_units, tokens=_tokens)
|
||||
|
||||
_results: Optional[List[dict]] = raw_response_json.get("results")
|
||||
|
||||
if _results is None:
|
||||
cohere_results: List[RerankResponseResult] = []
|
||||
if raw_response_json.get("results"):
|
||||
for result in raw_response_json.get("results"):
|
||||
_rerank_response = RerankResponseResult(
|
||||
index=result.get("index"),
|
||||
relevance_score=result.get("relevance_score"),
|
||||
)
|
||||
if result.get("document"):
|
||||
_rerank_response["document"] = RerankResponseDocument(
|
||||
text=result.get("document")
|
||||
)
|
||||
cohere_results.append(_rerank_response)
|
||||
if cohere_results is None:
|
||||
raise ValueError(f"No results found in the response={raw_response_json}")
|
||||
|
||||
return RerankResponse(
|
||||
id=raw_response_json.get("id") or str(uuid.uuid4()),
|
||||
results=_results, # type: ignore
|
||||
results=cohere_results,
|
||||
meta=rerank_meta,
|
||||
) # Return response
|
||||
|
|
|
@ -10,7 +10,9 @@ from typing import List, Optional
|
|||
from litellm.types.rerank import (
|
||||
RerankBilledUnits,
|
||||
RerankResponse,
|
||||
RerankResponseDocument,
|
||||
RerankResponseMeta,
|
||||
RerankResponseResult,
|
||||
RerankTokens,
|
||||
)
|
||||
|
||||
|
@ -27,8 +29,35 @@ class TogetherAIRerankConfig:
|
|||
if _results is None:
|
||||
raise ValueError(f"No results found in the response={response}")
|
||||
|
||||
rerank_results: List[RerankResponseResult] = []
|
||||
|
||||
for result in _results:
|
||||
# Validate required fields exist
|
||||
if not all(key in result for key in ["index", "relevance_score"]):
|
||||
raise ValueError(f"Missing required fields in the result={result}")
|
||||
|
||||
# Get document data if it exists
|
||||
document_data = result.get("document", {})
|
||||
document = (
|
||||
RerankResponseDocument(text=str(document_data.get("text", "")))
|
||||
if document_data
|
||||
else None
|
||||
)
|
||||
|
||||
# Create typed result
|
||||
rerank_result = RerankResponseResult(
|
||||
index=int(result["index"]),
|
||||
relevance_score=float(result["relevance_score"]),
|
||||
)
|
||||
|
||||
# Only add document if it exists
|
||||
if document:
|
||||
rerank_result["document"] = document
|
||||
|
||||
rerank_results.append(rerank_result)
|
||||
|
||||
return RerankResponse(
|
||||
id=response.get("id") or str(uuid.uuid4()),
|
||||
results=_results, # type: ignore
|
||||
results=rerank_results,
|
||||
meta=rerank_meta,
|
||||
) # Return response
|
||||
|
|
|
@ -2643,6 +2643,17 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"cerebras/llama3.3-70b": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 128000,
|
||||
"input_cost_per_token": 0.00000085,
|
||||
"output_cost_per_token": 0.0000012,
|
||||
"litellm_provider": "cerebras",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"friendliai/meta-llama-3.1-8b-instruct": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 8192,
|
||||
|
@ -7450,7 +7461,8 @@
|
|||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": false
|
||||
"supports_tool_choice": false,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.meta.llama3-2-11b-instruct-v1:0": {
|
||||
"max_tokens": 128000,
|
||||
|
@ -7461,7 +7473,8 @@
|
|||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": false
|
||||
"supports_tool_choice": false,
|
||||
"supports_vision": true
|
||||
},
|
||||
"meta.llama3-2-90b-instruct-v1:0": {
|
||||
"max_tokens": 128000,
|
||||
|
@ -7472,7 +7485,8 @@
|
|||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": false
|
||||
"supports_tool_choice": false,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.meta.llama3-2-90b-instruct-v1:0": {
|
||||
"max_tokens": 128000,
|
||||
|
@ -7483,7 +7497,8 @@
|
|||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": false
|
||||
"supports_tool_choice": false,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.meta.llama3-3-70b-instruct-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
|
|
19
litellm/types/llms/rerank.py
Normal file
19
litellm/types/llms/rerank.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
import json
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union
|
||||
|
||||
from typing_extensions import (
|
||||
Protocol,
|
||||
Required,
|
||||
Self,
|
||||
TypeGuard,
|
||||
get_origin,
|
||||
override,
|
||||
runtime_checkable,
|
||||
)
|
||||
|
||||
|
||||
class InfinityRerankResult(TypedDict):
|
||||
index: int
|
||||
relevance_score: float
|
||||
document: Optional[str]
|
|
@ -7,7 +7,7 @@ https://docs.cohere.com/reference/rerank
|
|||
from typing import List, Optional, Union
|
||||
|
||||
from pydantic import BaseModel, PrivateAttr
|
||||
from typing_extensions import TypedDict
|
||||
from typing_extensions import Required, TypedDict
|
||||
|
||||
|
||||
class RerankRequest(BaseModel):
|
||||
|
@ -45,9 +45,14 @@ class RerankResponseMeta(TypedDict, total=False):
|
|||
tokens: Optional[RerankTokens]
|
||||
|
||||
|
||||
class RerankResponseResult(TypedDict):
|
||||
index: int
|
||||
relevance_score: float
|
||||
class RerankResponseDocument(TypedDict):
|
||||
text: str
|
||||
|
||||
|
||||
class RerankResponseResult(TypedDict, total=False):
|
||||
index: Required[int]
|
||||
relevance_score: Required[float]
|
||||
document: RerankResponseDocument
|
||||
|
||||
|
||||
class RerankResponse(BaseModel):
|
||||
|
|
|
@ -2643,6 +2643,17 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"cerebras/llama3.3-70b": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 128000,
|
||||
"input_cost_per_token": 0.00000085,
|
||||
"output_cost_per_token": 0.0000012,
|
||||
"litellm_provider": "cerebras",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"friendliai/meta-llama-3.1-8b-instruct": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 8192,
|
||||
|
@ -7450,7 +7461,8 @@
|
|||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": false
|
||||
"supports_tool_choice": false,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.meta.llama3-2-11b-instruct-v1:0": {
|
||||
"max_tokens": 128000,
|
||||
|
@ -7461,7 +7473,8 @@
|
|||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": false
|
||||
"supports_tool_choice": false,
|
||||
"supports_vision": true
|
||||
},
|
||||
"meta.llama3-2-90b-instruct-v1:0": {
|
||||
"max_tokens": 128000,
|
||||
|
@ -7472,7 +7485,8 @@
|
|||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": false
|
||||
"supports_tool_choice": false,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.meta.llama3-2-90b-instruct-v1:0": {
|
||||
"max_tokens": 128000,
|
||||
|
@ -7483,7 +7497,8 @@
|
|||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": false
|
||||
"supports_tool_choice": false,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.meta.llama3-3-70b-instruct-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
|
|
|
@ -1 +1,9 @@
|
|||
**In total litellm runs 500+ tests** Most tests are in [/litellm/tests](https://github.com/BerriAI/litellm/tree/main/litellm/tests). These are just the tests for the proxy docker image, used for circle ci.
|
||||
**In total litellm runs 1000+ tests**
|
||||
|
||||
[02/20/2025] Update:
|
||||
|
||||
To make it easier to contribute and map what behavior is tested,
|
||||
|
||||
we've started mapping the litellm directory in `tests/litellm`
|
||||
|
||||
This folder can only run mock tests.
|
|
@ -1165,6 +1165,9 @@ def test_models_by_provider():
|
|||
"""
|
||||
Make sure all providers from model map are in the valid providers list
|
||||
"""
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
from litellm import models_by_provider
|
||||
|
||||
providers = set()
|
||||
|
|
|
@ -87,6 +87,39 @@ async def test_infinity_rerank():
|
|||
assert_response_shape(response, custom_llm_provider="infinity")
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_infinity_rerank_with_return_documents():
|
||||
mock_response = AsyncMock()
|
||||
|
||||
mock_response = AsyncMock()
|
||||
|
||||
def return_val():
|
||||
return {
|
||||
"id": "cmpl-mockid",
|
||||
"results": [{"index": 0, "relevance_score": 0.95, "document": "hello"}],
|
||||
"usage": {"prompt_tokens": 100, "total_tokens": 150},
|
||||
}
|
||||
|
||||
mock_response.json = return_val
|
||||
mock_response.headers = {"key": "value"}
|
||||
mock_response.status_code = 200
|
||||
|
||||
with patch(
|
||||
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
|
||||
return_value=mock_response,
|
||||
) as mock_post:
|
||||
response = await litellm.arerank(
|
||||
model="infinity/rerank-model",
|
||||
query="hello",
|
||||
documents=["hello", "world"],
|
||||
top_n=3,
|
||||
return_documents=True,
|
||||
api_base="https://api.infinity.ai",
|
||||
)
|
||||
assert response.results[0]["document"] == {"text": "hello"}
|
||||
assert_response_shape(response, custom_llm_provider="infinity")
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_infinity_rerank_with_env(monkeypatch):
|
||||
# Set up mock response
|
||||
|
|
|
@ -9,6 +9,7 @@ from dotenv import load_dotenv
|
|||
load_dotenv()
|
||||
import io
|
||||
import os
|
||||
from typing import Optional, Dict
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
|
@ -29,7 +30,11 @@ from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
|||
def assert_response_shape(response, custom_llm_provider):
|
||||
expected_response_shape = {"id": str, "results": list, "meta": dict}
|
||||
|
||||
expected_results_shape = {"index": int, "relevance_score": float}
|
||||
expected_results_shape = {
|
||||
"index": int,
|
||||
"relevance_score": float,
|
||||
"document": Optional[Dict[str, str]],
|
||||
}
|
||||
|
||||
expected_meta_shape = {"api_version": dict, "billed_units": dict}
|
||||
|
||||
|
@ -44,6 +49,9 @@ def assert_response_shape(response, custom_llm_provider):
|
|||
assert isinstance(
|
||||
result["relevance_score"], expected_results_shape["relevance_score"]
|
||||
)
|
||||
if "document" in result:
|
||||
assert isinstance(result["document"], Dict)
|
||||
assert isinstance(result["document"]["text"], str)
|
||||
assert isinstance(response.meta, expected_response_shape["meta"])
|
||||
|
||||
if custom_llm_provider == "cohere":
|
||||
|
@ -364,17 +372,15 @@ def test_rerank_response_assertions():
|
|||
**{
|
||||
"id": "ab0fcca0-b617-11ef-b292-0242ac110002",
|
||||
"results": [
|
||||
{"index": 2, "relevance_score": 0.9958819150924683, "document": None},
|
||||
{"index": 0, "relevance_score": 0.001293411129154265, "document": None},
|
||||
{"index": 2, "relevance_score": 0.9958819150924683},
|
||||
{"index": 0, "relevance_score": 0.001293411129154265},
|
||||
{
|
||||
"index": 1,
|
||||
"relevance_score": 7.641685078851879e-05,
|
||||
"document": None,
|
||||
},
|
||||
{
|
||||
"index": 3,
|
||||
"relevance_score": 7.621097756782547e-05,
|
||||
"document": None,
|
||||
},
|
||||
],
|
||||
"meta": {
|
||||
|
@ -387,3 +393,19 @@ def test_rerank_response_assertions():
|
|||
)
|
||||
|
||||
assert_response_shape(r, custom_llm_provider="custom")
|
||||
|
||||
|
||||
@pytest.mark.flaky(retries=3, delay=1)
|
||||
def test_rerank_cohere_api():
|
||||
response = litellm.rerank(
|
||||
model="cohere/rerank-english-v3.0",
|
||||
query="hello",
|
||||
documents=["hello", "world"],
|
||||
return_documents=True,
|
||||
top_n=3,
|
||||
)
|
||||
print("rerank response", response)
|
||||
assert response.results[0]["document"] is not None
|
||||
assert response.results[0]["document"]["text"] is not None
|
||||
assert response.results[0]["document"]["text"] == "hello"
|
||||
assert response.results[1]["document"]["text"] == "world"
|
||||
|
|
|
@ -2775,6 +2775,8 @@ def test_bedrock_cost_calc_with_region():
|
|||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
litellm.add_known_models()
|
||||
|
||||
hidden_params = {
|
||||
"custom_llm_provider": "bedrock",
|
||||
"region_name": "us-east-1",
|
||||
|
|
|
@ -961,6 +961,7 @@ async def test_gemini_embeddings(sync_mode, input):
|
|||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.flaky(retries=3, delay=1)
|
||||
async def test_hf_embedddings_with_optional_params(sync_mode):
|
||||
litellm.set_verbose = True
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue