mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
add bedrock llama vision support + cohere / infinity rerank - 'return_documents' support (#8684)
* build(model_prices_and_context_window.json): mark bedrock llama as supporting vision based on docs * Add price for Cerebras llama3.3-70b (#8676) * docs(readme.md): fix contributing docs point people to new mock directory testing structure s/o @vibhavbhat * build: update contributing readme * docs(readme.md): improve docs * docs(readme.md): cleanup readme on tests/ * docs(README.md): cleanup doc * feat(infinity/): support returning documents when return_documents=True * test(test_rerank.py): add e2e testing for cohere rerank * fix: fix linting errors * fix(together_ai/): fix together ai transformation * fix: fix linting error * fix: fix linting errors * fix: fix linting errors * test: mark cohere as flaky * build: fix model supports check * test: fix test * test: mark flaky test * fix: fix test * test: fix test --------- Co-authored-by: Yury Koleda <fut.wrk@gmail.com>
This commit is contained in:
parent
b682dc4ec8
commit
251467a525
13 changed files with 206 additions and 31 deletions
19
README.md
19
README.md
|
@ -343,25 +343,32 @@ curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
To contribute: Clone the repo locally -> Make a change -> Submit a PR with the change.
|
To contribute: Clone the repo locally -> Make a change -> Submit a PR with the change.
|
||||||
|
|
||||||
Here's how to modify the repo locally:
|
Here's how to modify the repo locally:
|
||||||
|
|
||||||
Step 1: Clone the repo
|
Step 1: Clone the repo
|
||||||
|
|
||||||
```
|
```
|
||||||
git clone https://github.com/BerriAI/litellm.git
|
git clone https://github.com/BerriAI/litellm.git
|
||||||
```
|
```
|
||||||
|
|
||||||
Step 2: Navigate into the project, and install dependencies:
|
Step 2: Install dependencies:
|
||||||
|
|
||||||
```
|
```
|
||||||
cd litellm
|
pip install -r requirements.txt
|
||||||
poetry install -E extra_proxy -E proxy
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Step 3: Test your change:
|
Step 3: Test your change:
|
||||||
|
|
||||||
|
a. Add a pytest test within `tests/litellm/`
|
||||||
|
|
||||||
|
This folder follows the same directory structure as `litellm/`.
|
||||||
|
|
||||||
|
If a corresponding test file does not exist, create one.
|
||||||
|
|
||||||
|
b. Run the test
|
||||||
|
|
||||||
```
|
```
|
||||||
cd tests # pwd: Documents/litellm/litellm/tests
|
cd tests/litellm # pwd: Documents/litellm/litellm/tests/litellm
|
||||||
poetry run flake8
|
pytest /path/to/test_file.py
|
||||||
poetry run pytest .
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Step 4: Submit a PR with your changes! 🚀
|
Step 4: Submit a PR with your changes! 🚀
|
||||||
|
|
|
@ -13,8 +13,14 @@ import litellm
|
||||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||||
from litellm.llms.cohere.rerank.transformation import CohereRerankConfig
|
from litellm.llms.cohere.rerank.transformation import CohereRerankConfig
|
||||||
from litellm.secret_managers.main import get_secret_str
|
from litellm.secret_managers.main import get_secret_str
|
||||||
from litellm.types.rerank import RerankBilledUnits, RerankResponseMeta, RerankTokens
|
from litellm.types.rerank import (
|
||||||
from litellm.types.utils import RerankResponse
|
RerankBilledUnits,
|
||||||
|
RerankResponse,
|
||||||
|
RerankResponseDocument,
|
||||||
|
RerankResponseMeta,
|
||||||
|
RerankResponseResult,
|
||||||
|
RerankTokens,
|
||||||
|
)
|
||||||
|
|
||||||
from .common_utils import InfinityError
|
from .common_utils import InfinityError
|
||||||
|
|
||||||
|
@ -88,13 +94,23 @@ class InfinityRerankConfig(CohereRerankConfig):
|
||||||
)
|
)
|
||||||
rerank_meta = RerankResponseMeta(billed_units=_billed_units, tokens=_tokens)
|
rerank_meta = RerankResponseMeta(billed_units=_billed_units, tokens=_tokens)
|
||||||
|
|
||||||
_results: Optional[List[dict]] = raw_response_json.get("results")
|
cohere_results: List[RerankResponseResult] = []
|
||||||
|
if raw_response_json.get("results"):
|
||||||
if _results is None:
|
for result in raw_response_json.get("results"):
|
||||||
|
_rerank_response = RerankResponseResult(
|
||||||
|
index=result.get("index"),
|
||||||
|
relevance_score=result.get("relevance_score"),
|
||||||
|
)
|
||||||
|
if result.get("document"):
|
||||||
|
_rerank_response["document"] = RerankResponseDocument(
|
||||||
|
text=result.get("document")
|
||||||
|
)
|
||||||
|
cohere_results.append(_rerank_response)
|
||||||
|
if cohere_results is None:
|
||||||
raise ValueError(f"No results found in the response={raw_response_json}")
|
raise ValueError(f"No results found in the response={raw_response_json}")
|
||||||
|
|
||||||
return RerankResponse(
|
return RerankResponse(
|
||||||
id=raw_response_json.get("id") or str(uuid.uuid4()),
|
id=raw_response_json.get("id") or str(uuid.uuid4()),
|
||||||
results=_results, # type: ignore
|
results=cohere_results,
|
||||||
meta=rerank_meta,
|
meta=rerank_meta,
|
||||||
) # Return response
|
) # Return response
|
||||||
|
|
|
@ -10,7 +10,9 @@ from typing import List, Optional
|
||||||
from litellm.types.rerank import (
|
from litellm.types.rerank import (
|
||||||
RerankBilledUnits,
|
RerankBilledUnits,
|
||||||
RerankResponse,
|
RerankResponse,
|
||||||
|
RerankResponseDocument,
|
||||||
RerankResponseMeta,
|
RerankResponseMeta,
|
||||||
|
RerankResponseResult,
|
||||||
RerankTokens,
|
RerankTokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -27,8 +29,35 @@ class TogetherAIRerankConfig:
|
||||||
if _results is None:
|
if _results is None:
|
||||||
raise ValueError(f"No results found in the response={response}")
|
raise ValueError(f"No results found in the response={response}")
|
||||||
|
|
||||||
|
rerank_results: List[RerankResponseResult] = []
|
||||||
|
|
||||||
|
for result in _results:
|
||||||
|
# Validate required fields exist
|
||||||
|
if not all(key in result for key in ["index", "relevance_score"]):
|
||||||
|
raise ValueError(f"Missing required fields in the result={result}")
|
||||||
|
|
||||||
|
# Get document data if it exists
|
||||||
|
document_data = result.get("document", {})
|
||||||
|
document = (
|
||||||
|
RerankResponseDocument(text=str(document_data.get("text", "")))
|
||||||
|
if document_data
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create typed result
|
||||||
|
rerank_result = RerankResponseResult(
|
||||||
|
index=int(result["index"]),
|
||||||
|
relevance_score=float(result["relevance_score"]),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Only add document if it exists
|
||||||
|
if document:
|
||||||
|
rerank_result["document"] = document
|
||||||
|
|
||||||
|
rerank_results.append(rerank_result)
|
||||||
|
|
||||||
return RerankResponse(
|
return RerankResponse(
|
||||||
id=response.get("id") or str(uuid.uuid4()),
|
id=response.get("id") or str(uuid.uuid4()),
|
||||||
results=_results, # type: ignore
|
results=rerank_results,
|
||||||
meta=rerank_meta,
|
meta=rerank_meta,
|
||||||
) # Return response
|
) # Return response
|
||||||
|
|
|
@ -2643,6 +2643,17 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
|
"cerebras/llama3.3-70b": {
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"input_cost_per_token": 0.00000085,
|
||||||
|
"output_cost_per_token": 0.0000012,
|
||||||
|
"litellm_provider": "cerebras",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
"friendliai/meta-llama-3.1-8b-instruct": {
|
"friendliai/meta-llama-3.1-8b-instruct": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 8192,
|
"max_input_tokens": 8192,
|
||||||
|
@ -7450,7 +7461,8 @@
|
||||||
"litellm_provider": "bedrock",
|
"litellm_provider": "bedrock",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": false
|
"supports_tool_choice": false,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
"us.meta.llama3-2-11b-instruct-v1:0": {
|
"us.meta.llama3-2-11b-instruct-v1:0": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
|
@ -7461,7 +7473,8 @@
|
||||||
"litellm_provider": "bedrock",
|
"litellm_provider": "bedrock",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": false
|
"supports_tool_choice": false,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
"meta.llama3-2-90b-instruct-v1:0": {
|
"meta.llama3-2-90b-instruct-v1:0": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
|
@ -7472,7 +7485,8 @@
|
||||||
"litellm_provider": "bedrock",
|
"litellm_provider": "bedrock",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": false
|
"supports_tool_choice": false,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
"us.meta.llama3-2-90b-instruct-v1:0": {
|
"us.meta.llama3-2-90b-instruct-v1:0": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
|
@ -7483,7 +7497,8 @@
|
||||||
"litellm_provider": "bedrock",
|
"litellm_provider": "bedrock",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": false
|
"supports_tool_choice": false,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
"us.meta.llama3-3-70b-instruct-v1:0": {
|
"us.meta.llama3-3-70b-instruct-v1:0": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
|
19
litellm/types/llms/rerank.py
Normal file
19
litellm/types/llms/rerank.py
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
import json
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Any, Dict, List, Literal, Optional, Tuple, TypedDict, Union
|
||||||
|
|
||||||
|
from typing_extensions import (
|
||||||
|
Protocol,
|
||||||
|
Required,
|
||||||
|
Self,
|
||||||
|
TypeGuard,
|
||||||
|
get_origin,
|
||||||
|
override,
|
||||||
|
runtime_checkable,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class InfinityRerankResult(TypedDict):
|
||||||
|
index: int
|
||||||
|
relevance_score: float
|
||||||
|
document: Optional[str]
|
|
@ -7,7 +7,7 @@ https://docs.cohere.com/reference/rerank
|
||||||
from typing import List, Optional, Union
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
from pydantic import BaseModel, PrivateAttr
|
from pydantic import BaseModel, PrivateAttr
|
||||||
from typing_extensions import TypedDict
|
from typing_extensions import Required, TypedDict
|
||||||
|
|
||||||
|
|
||||||
class RerankRequest(BaseModel):
|
class RerankRequest(BaseModel):
|
||||||
|
@ -45,9 +45,14 @@ class RerankResponseMeta(TypedDict, total=False):
|
||||||
tokens: Optional[RerankTokens]
|
tokens: Optional[RerankTokens]
|
||||||
|
|
||||||
|
|
||||||
class RerankResponseResult(TypedDict):
|
class RerankResponseDocument(TypedDict):
|
||||||
index: int
|
text: str
|
||||||
relevance_score: float
|
|
||||||
|
|
||||||
|
class RerankResponseResult(TypedDict, total=False):
|
||||||
|
index: Required[int]
|
||||||
|
relevance_score: Required[float]
|
||||||
|
document: RerankResponseDocument
|
||||||
|
|
||||||
|
|
||||||
class RerankResponse(BaseModel):
|
class RerankResponse(BaseModel):
|
||||||
|
|
|
@ -2643,6 +2643,17 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
|
"cerebras/llama3.3-70b": {
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"input_cost_per_token": 0.00000085,
|
||||||
|
"output_cost_per_token": 0.0000012,
|
||||||
|
"litellm_provider": "cerebras",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
"friendliai/meta-llama-3.1-8b-instruct": {
|
"friendliai/meta-llama-3.1-8b-instruct": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 8192,
|
"max_input_tokens": 8192,
|
||||||
|
@ -7450,7 +7461,8 @@
|
||||||
"litellm_provider": "bedrock",
|
"litellm_provider": "bedrock",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": false
|
"supports_tool_choice": false,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
"us.meta.llama3-2-11b-instruct-v1:0": {
|
"us.meta.llama3-2-11b-instruct-v1:0": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
|
@ -7461,7 +7473,8 @@
|
||||||
"litellm_provider": "bedrock",
|
"litellm_provider": "bedrock",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": false
|
"supports_tool_choice": false,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
"meta.llama3-2-90b-instruct-v1:0": {
|
"meta.llama3-2-90b-instruct-v1:0": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
|
@ -7472,7 +7485,8 @@
|
||||||
"litellm_provider": "bedrock",
|
"litellm_provider": "bedrock",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": false
|
"supports_tool_choice": false,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
"us.meta.llama3-2-90b-instruct-v1:0": {
|
"us.meta.llama3-2-90b-instruct-v1:0": {
|
||||||
"max_tokens": 128000,
|
"max_tokens": 128000,
|
||||||
|
@ -7483,7 +7497,8 @@
|
||||||
"litellm_provider": "bedrock",
|
"litellm_provider": "bedrock",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": false
|
"supports_tool_choice": false,
|
||||||
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
"us.meta.llama3-3-70b-instruct-v1:0": {
|
"us.meta.llama3-3-70b-instruct-v1:0": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
|
|
@ -1 +1,9 @@
|
||||||
**In total litellm runs 500+ tests** Most tests are in [/litellm/tests](https://github.com/BerriAI/litellm/tree/main/litellm/tests). These are just the tests for the proxy docker image, used for circle ci.
|
**In total litellm runs 1000+ tests**
|
||||||
|
|
||||||
|
[02/20/2025] Update:
|
||||||
|
|
||||||
|
To make it easier to contribute and map what behavior is tested,
|
||||||
|
|
||||||
|
we've started mapping the litellm directory in `tests/litellm`
|
||||||
|
|
||||||
|
This folder can only run mock tests.
|
|
@ -1165,6 +1165,9 @@ def test_models_by_provider():
|
||||||
"""
|
"""
|
||||||
Make sure all providers from model map are in the valid providers list
|
Make sure all providers from model map are in the valid providers list
|
||||||
"""
|
"""
|
||||||
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||||
|
|
||||||
from litellm import models_by_provider
|
from litellm import models_by_provider
|
||||||
|
|
||||||
providers = set()
|
providers = set()
|
||||||
|
|
|
@ -87,6 +87,39 @@ async def test_infinity_rerank():
|
||||||
assert_response_shape(response, custom_llm_provider="infinity")
|
assert_response_shape(response, custom_llm_provider="infinity")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio()
|
||||||
|
async def test_infinity_rerank_with_return_documents():
|
||||||
|
mock_response = AsyncMock()
|
||||||
|
|
||||||
|
mock_response = AsyncMock()
|
||||||
|
|
||||||
|
def return_val():
|
||||||
|
return {
|
||||||
|
"id": "cmpl-mockid",
|
||||||
|
"results": [{"index": 0, "relevance_score": 0.95, "document": "hello"}],
|
||||||
|
"usage": {"prompt_tokens": 100, "total_tokens": 150},
|
||||||
|
}
|
||||||
|
|
||||||
|
mock_response.json = return_val
|
||||||
|
mock_response.headers = {"key": "value"}
|
||||||
|
mock_response.status_code = 200
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
|
||||||
|
return_value=mock_response,
|
||||||
|
) as mock_post:
|
||||||
|
response = await litellm.arerank(
|
||||||
|
model="infinity/rerank-model",
|
||||||
|
query="hello",
|
||||||
|
documents=["hello", "world"],
|
||||||
|
top_n=3,
|
||||||
|
return_documents=True,
|
||||||
|
api_base="https://api.infinity.ai",
|
||||||
|
)
|
||||||
|
assert response.results[0]["document"] == {"text": "hello"}
|
||||||
|
assert_response_shape(response, custom_llm_provider="infinity")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio()
|
@pytest.mark.asyncio()
|
||||||
async def test_infinity_rerank_with_env(monkeypatch):
|
async def test_infinity_rerank_with_env(monkeypatch):
|
||||||
# Set up mock response
|
# Set up mock response
|
||||||
|
|
|
@ -9,6 +9,7 @@ from dotenv import load_dotenv
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
|
from typing import Optional, Dict
|
||||||
|
|
||||||
sys.path.insert(
|
sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
|
@ -29,7 +30,11 @@ from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
def assert_response_shape(response, custom_llm_provider):
|
def assert_response_shape(response, custom_llm_provider):
|
||||||
expected_response_shape = {"id": str, "results": list, "meta": dict}
|
expected_response_shape = {"id": str, "results": list, "meta": dict}
|
||||||
|
|
||||||
expected_results_shape = {"index": int, "relevance_score": float}
|
expected_results_shape = {
|
||||||
|
"index": int,
|
||||||
|
"relevance_score": float,
|
||||||
|
"document": Optional[Dict[str, str]],
|
||||||
|
}
|
||||||
|
|
||||||
expected_meta_shape = {"api_version": dict, "billed_units": dict}
|
expected_meta_shape = {"api_version": dict, "billed_units": dict}
|
||||||
|
|
||||||
|
@ -44,6 +49,9 @@ def assert_response_shape(response, custom_llm_provider):
|
||||||
assert isinstance(
|
assert isinstance(
|
||||||
result["relevance_score"], expected_results_shape["relevance_score"]
|
result["relevance_score"], expected_results_shape["relevance_score"]
|
||||||
)
|
)
|
||||||
|
if "document" in result:
|
||||||
|
assert isinstance(result["document"], Dict)
|
||||||
|
assert isinstance(result["document"]["text"], str)
|
||||||
assert isinstance(response.meta, expected_response_shape["meta"])
|
assert isinstance(response.meta, expected_response_shape["meta"])
|
||||||
|
|
||||||
if custom_llm_provider == "cohere":
|
if custom_llm_provider == "cohere":
|
||||||
|
@ -364,17 +372,15 @@ def test_rerank_response_assertions():
|
||||||
**{
|
**{
|
||||||
"id": "ab0fcca0-b617-11ef-b292-0242ac110002",
|
"id": "ab0fcca0-b617-11ef-b292-0242ac110002",
|
||||||
"results": [
|
"results": [
|
||||||
{"index": 2, "relevance_score": 0.9958819150924683, "document": None},
|
{"index": 2, "relevance_score": 0.9958819150924683},
|
||||||
{"index": 0, "relevance_score": 0.001293411129154265, "document": None},
|
{"index": 0, "relevance_score": 0.001293411129154265},
|
||||||
{
|
{
|
||||||
"index": 1,
|
"index": 1,
|
||||||
"relevance_score": 7.641685078851879e-05,
|
"relevance_score": 7.641685078851879e-05,
|
||||||
"document": None,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"index": 3,
|
"index": 3,
|
||||||
"relevance_score": 7.621097756782547e-05,
|
"relevance_score": 7.621097756782547e-05,
|
||||||
"document": None,
|
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
"meta": {
|
"meta": {
|
||||||
|
@ -387,3 +393,19 @@ def test_rerank_response_assertions():
|
||||||
)
|
)
|
||||||
|
|
||||||
assert_response_shape(r, custom_llm_provider="custom")
|
assert_response_shape(r, custom_llm_provider="custom")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.flaky(retries=3, delay=1)
|
||||||
|
def test_rerank_cohere_api():
|
||||||
|
response = litellm.rerank(
|
||||||
|
model="cohere/rerank-english-v3.0",
|
||||||
|
query="hello",
|
||||||
|
documents=["hello", "world"],
|
||||||
|
return_documents=True,
|
||||||
|
top_n=3,
|
||||||
|
)
|
||||||
|
print("rerank response", response)
|
||||||
|
assert response.results[0]["document"] is not None
|
||||||
|
assert response.results[0]["document"]["text"] is not None
|
||||||
|
assert response.results[0]["document"]["text"] == "hello"
|
||||||
|
assert response.results[1]["document"]["text"] == "world"
|
||||||
|
|
|
@ -2775,6 +2775,8 @@ def test_bedrock_cost_calc_with_region():
|
||||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||||
|
|
||||||
|
litellm.add_known_models()
|
||||||
|
|
||||||
hidden_params = {
|
hidden_params = {
|
||||||
"custom_llm_provider": "bedrock",
|
"custom_llm_provider": "bedrock",
|
||||||
"region_name": "us-east-1",
|
"region_name": "us-east-1",
|
||||||
|
|
|
@ -961,6 +961,7 @@ async def test_gemini_embeddings(sync_mode, input):
|
||||||
|
|
||||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.flaky(retries=3, delay=1)
|
||||||
async def test_hf_embedddings_with_optional_params(sync_mode):
|
async def test_hf_embedddings_with_optional_params(sync_mode):
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue