add bedrock llama vision support + cohere / infinity rerank - 'return_documents' support (#8684)

* build(model_prices_and_context_window.json): mark bedrock llama as supporting vision based on docs

* Add price for Cerebras llama3.3-70b (#8676)

* docs(readme.md): fix contributing docs

point people to new mock directory testing structure s/o @vibhavbhat

* build: update contributing readme

* docs(readme.md): improve docs

* docs(readme.md): cleanup readme on tests/

* docs(README.md): cleanup doc

* feat(infinity/): support returning documents when return_documents=True

* test(test_rerank.py): add e2e testing for cohere rerank

* fix: fix linting errors

* fix(together_ai/): fix together ai transformation

* fix: fix linting error

* fix: fix linting errors

* fix: fix linting errors

* test: mark cohere as flaky

* build: fix model supports check

* test: fix test

* test: mark flaky test

* fix: fix test

* test: fix test

---------

Co-authored-by: Yury Koleda <fut.wrk@gmail.com>
This commit is contained in:
Krish Dholakia 2025-02-20 21:23:54 -08:00 committed by GitHub
parent b682dc4ec8
commit 251467a525
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 206 additions and 31 deletions

View file

@ -343,25 +343,32 @@ curl 'http://0.0.0.0:4000/key/generate' \
To contribute: Clone the repo locally -> Make a change -> Submit a PR with the change.
Here's how to modify the repo locally:
Step 1: Clone the repo
```
git clone https://github.com/BerriAI/litellm.git
```
Step 2: Navigate into the project, and install dependencies:
Step 2: Install dependencies:
```
cd litellm
poetry install -E extra_proxy -E proxy
pip install -r requirements.txt
```
Step 3: Test your change:
a. Add a pytest test within `tests/litellm/`
This folder follows the same directory structure as `litellm/`.
If a corresponding test file does not exist, create one.
b. Run the test
```
cd tests # pwd: Documents/litellm/litellm/tests
poetry run flake8
poetry run pytest .
cd tests/litellm # pwd: Documents/litellm/litellm/tests/litellm
pytest /path/to/test_file.py
```
Step 4: Submit a PR with your changes! 🚀

View file

@ -13,8 +13,14 @@ import litellm
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.cohere.rerank.transformation import CohereRerankConfig
from litellm.secret_managers.main import get_secret_str
from litellm.types.rerank import RerankBilledUnits, RerankResponseMeta, RerankTokens
from litellm.types.utils import RerankResponse
from litellm.types.rerank import (
RerankBilledUnits,
RerankResponse,
RerankResponseDocument,
RerankResponseMeta,
RerankResponseResult,
RerankTokens,
)
from .common_utils import InfinityError
@ -88,13 +94,23 @@ class InfinityRerankConfig(CohereRerankConfig):
)
rerank_meta = RerankResponseMeta(billed_units=_billed_units, tokens=_tokens)
_results: Optional[List[dict]] = raw_response_json.get("results")
if _results is None:
cohere_results: List[RerankResponseResult] = []
if raw_response_json.get("results"):
for result in raw_response_json.get("results"):
_rerank_response = RerankResponseResult(
index=result.get("index"),
relevance_score=result.get("relevance_score"),
)
if result.get("document"):
_rerank_response["document"] = RerankResponseDocument(
text=result.get("document")
)
cohere_results.append(_rerank_response)
if cohere_results is None:
raise ValueError(f"No results found in the response={raw_response_json}")
return RerankResponse(
id=raw_response_json.get("id") or str(uuid.uuid4()),
results=_results, # type: ignore
results=cohere_results,
meta=rerank_meta,
) # Return response

View file

@ -10,7 +10,9 @@ from typing import List, Optional
from litellm.types.rerank import (
RerankBilledUnits,
RerankResponse,
RerankResponseDocument,
RerankResponseMeta,
RerankResponseResult,
RerankTokens,
)
@ -27,8 +29,35 @@ class TogetherAIRerankConfig:
if _results is None:
raise ValueError(f"No results found in the response={response}")
rerank_results: List[RerankResponseResult] = []
for result in _results:
# Validate required fields exist
if not all(key in result for key in ["index", "relevance_score"]):
raise ValueError(f"Missing required fields in the result={result}")
# Get document data if it exists
document_data = result.get("document", {})
document = (
RerankResponseDocument(text=str(document_data.get("text", "")))
if document_data
else None
)
# Create typed result
rerank_result = RerankResponseResult(
index=int(result["index"]),
relevance_score=float(result["relevance_score"]),
)
# Only add document if it exists
if document:
rerank_result["document"] = document
rerank_results.append(rerank_result)
return RerankResponse(
id=response.get("id") or str(uuid.uuid4()),
results=_results, # type: ignore
results=rerank_results,
meta=rerank_meta,
) # Return response

View file

@ -2643,6 +2643,17 @@
"supports_function_calling": true,
"supports_tool_choice": true
},
"cerebras/llama3.3-70b": {
"max_tokens": 128000,
"max_input_tokens": 128000,
"max_output_tokens": 128000,
"input_cost_per_token": 0.00000085,
"output_cost_per_token": 0.0000012,
"litellm_provider": "cerebras",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": true
},
"friendliai/meta-llama-3.1-8b-instruct": {
"max_tokens": 8192,
"max_input_tokens": 8192,
@ -7450,7 +7461,8 @@
"litellm_provider": "bedrock",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": false
"supports_tool_choice": false,
"supports_vision": true
},
"us.meta.llama3-2-11b-instruct-v1:0": {
"max_tokens": 128000,
@ -7461,7 +7473,8 @@
"litellm_provider": "bedrock",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": false
"supports_tool_choice": false,
"supports_vision": true
},
"meta.llama3-2-90b-instruct-v1:0": {
"max_tokens": 128000,
@ -7472,7 +7485,8 @@
"litellm_provider": "bedrock",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": false
"supports_tool_choice": false,
"supports_vision": true
},
"us.meta.llama3-2-90b-instruct-v1:0": {
"max_tokens": 128000,
@ -7483,7 +7497,8 @@
"litellm_provider": "bedrock",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": false
"supports_tool_choice": false,
"supports_vision": true
},
"us.meta.llama3-3-70b-instruct-v1:0": {
"max_tokens": 4096,

View file

@ -0,0 +1,19 @@
import json
from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union
from typing_extensions import (
Protocol,
Required,
Self,
TypeGuard,
get_origin,
override,
runtime_checkable,
)
class InfinityRerankResult(TypedDict):
index: int
relevance_score: float
document: Optional[str]

View file

@ -7,7 +7,7 @@ https://docs.cohere.com/reference/rerank
from typing import List, Optional, Union
from pydantic import BaseModel, PrivateAttr
from typing_extensions import TypedDict
from typing_extensions import Required, TypedDict
class RerankRequest(BaseModel):
@ -45,9 +45,14 @@ class RerankResponseMeta(TypedDict, total=False):
tokens: Optional[RerankTokens]
class RerankResponseResult(TypedDict):
index: int
relevance_score: float
class RerankResponseDocument(TypedDict):
text: str
class RerankResponseResult(TypedDict, total=False):
index: Required[int]
relevance_score: Required[float]
document: RerankResponseDocument
class RerankResponse(BaseModel):

View file

@ -2643,6 +2643,17 @@
"supports_function_calling": true,
"supports_tool_choice": true
},
"cerebras/llama3.3-70b": {
"max_tokens": 128000,
"max_input_tokens": 128000,
"max_output_tokens": 128000,
"input_cost_per_token": 0.00000085,
"output_cost_per_token": 0.0000012,
"litellm_provider": "cerebras",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": true
},
"friendliai/meta-llama-3.1-8b-instruct": {
"max_tokens": 8192,
"max_input_tokens": 8192,
@ -7450,7 +7461,8 @@
"litellm_provider": "bedrock",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": false
"supports_tool_choice": false,
"supports_vision": true
},
"us.meta.llama3-2-11b-instruct-v1:0": {
"max_tokens": 128000,
@ -7461,7 +7473,8 @@
"litellm_provider": "bedrock",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": false
"supports_tool_choice": false,
"supports_vision": true
},
"meta.llama3-2-90b-instruct-v1:0": {
"max_tokens": 128000,
@ -7472,7 +7485,8 @@
"litellm_provider": "bedrock",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": false
"supports_tool_choice": false,
"supports_vision": true
},
"us.meta.llama3-2-90b-instruct-v1:0": {
"max_tokens": 128000,
@ -7483,7 +7497,8 @@
"litellm_provider": "bedrock",
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": false
"supports_tool_choice": false,
"supports_vision": true
},
"us.meta.llama3-3-70b-instruct-v1:0": {
"max_tokens": 4096,

View file

@ -1 +1,9 @@
**In total litellm runs 500+ tests** Most tests are in [/litellm/tests](https://github.com/BerriAI/litellm/tree/main/litellm/tests). These are just the tests for the proxy docker image, used for circle ci.
**In total litellm runs 1000+ tests**
[02/20/2025] Update:
To make it easier to contribute and map what behavior is tested,
we've started mapping the litellm directory in `tests/litellm`
This folder can only run mock tests.

View file

@ -1165,6 +1165,9 @@ def test_models_by_provider():
"""
Make sure all providers from model map are in the valid providers list
"""
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
from litellm import models_by_provider
providers = set()

View file

@ -87,6 +87,39 @@ async def test_infinity_rerank():
assert_response_shape(response, custom_llm_provider="infinity")
@pytest.mark.asyncio()
async def test_infinity_rerank_with_return_documents():
mock_response = AsyncMock()
mock_response = AsyncMock()
def return_val():
return {
"id": "cmpl-mockid",
"results": [{"index": 0, "relevance_score": 0.95, "document": "hello"}],
"usage": {"prompt_tokens": 100, "total_tokens": 150},
}
mock_response.json = return_val
mock_response.headers = {"key": "value"}
mock_response.status_code = 200
with patch(
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
return_value=mock_response,
) as mock_post:
response = await litellm.arerank(
model="infinity/rerank-model",
query="hello",
documents=["hello", "world"],
top_n=3,
return_documents=True,
api_base="https://api.infinity.ai",
)
assert response.results[0]["document"] == {"text": "hello"}
assert_response_shape(response, custom_llm_provider="infinity")
@pytest.mark.asyncio()
async def test_infinity_rerank_with_env(monkeypatch):
# Set up mock response

View file

@ -9,6 +9,7 @@ from dotenv import load_dotenv
load_dotenv()
import io
import os
from typing import Optional, Dict
sys.path.insert(
0, os.path.abspath("../..")
@ -29,7 +30,11 @@ from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
def assert_response_shape(response, custom_llm_provider):
expected_response_shape = {"id": str, "results": list, "meta": dict}
expected_results_shape = {"index": int, "relevance_score": float}
expected_results_shape = {
"index": int,
"relevance_score": float,
"document": Optional[Dict[str, str]],
}
expected_meta_shape = {"api_version": dict, "billed_units": dict}
@ -44,6 +49,9 @@ def assert_response_shape(response, custom_llm_provider):
assert isinstance(
result["relevance_score"], expected_results_shape["relevance_score"]
)
if "document" in result:
assert isinstance(result["document"], Dict)
assert isinstance(result["document"]["text"], str)
assert isinstance(response.meta, expected_response_shape["meta"])
if custom_llm_provider == "cohere":
@ -364,17 +372,15 @@ def test_rerank_response_assertions():
**{
"id": "ab0fcca0-b617-11ef-b292-0242ac110002",
"results": [
{"index": 2, "relevance_score": 0.9958819150924683, "document": None},
{"index": 0, "relevance_score": 0.001293411129154265, "document": None},
{"index": 2, "relevance_score": 0.9958819150924683},
{"index": 0, "relevance_score": 0.001293411129154265},
{
"index": 1,
"relevance_score": 7.641685078851879e-05,
"document": None,
},
{
"index": 3,
"relevance_score": 7.621097756782547e-05,
"document": None,
},
],
"meta": {
@ -387,3 +393,19 @@ def test_rerank_response_assertions():
)
assert_response_shape(r, custom_llm_provider="custom")
@pytest.mark.flaky(retries=3, delay=1)
def test_rerank_cohere_api():
response = litellm.rerank(
model="cohere/rerank-english-v3.0",
query="hello",
documents=["hello", "world"],
return_documents=True,
top_n=3,
)
print("rerank response", response)
assert response.results[0]["document"] is not None
assert response.results[0]["document"]["text"] is not None
assert response.results[0]["document"]["text"] == "hello"
assert response.results[1]["document"]["text"] == "world"

View file

@ -2775,6 +2775,8 @@ def test_bedrock_cost_calc_with_region():
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.add_known_models()
hidden_params = {
"custom_llm_provider": "bedrock",
"region_name": "us-east-1",

View file

@ -961,6 +961,7 @@ async def test_gemini_embeddings(sync_mode, input):
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
@pytest.mark.flaky(retries=3, delay=1)
async def test_hf_embedddings_with_optional_params(sync_mode):
litellm.set_verbose = True