mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
LiteLLM Minor Fixes & Improvements (12/05/2024) (#7037)
* fix(together_ai/chat): only return response_format + tools for supported models Fixes https://github.com/BerriAI/litellm/issues/6972 * feat(bedrock/rerank): initial working commit for bedrock rerank api support Closes https://github.com/BerriAI/litellm/issues/7021 * feat(bedrock/rerank): async bedrock rerank api support Addresses https://github.com/BerriAI/litellm/issues/7021 * build(model_prices_and_context_window.json): add 'supports_prompt_caching' for bedrock models + cleanup cross-region from model list (duplicate information - lead to inconsistencies ) * docs(json_mode.md): clarify model support for json schema Closes https://github.com/BerriAI/litellm/issues/6998 * fix(_service_logger.py): handle dd callback in list ensure failed spend tracking is logged to datadog * feat(converse_transformation.py): translate from anthropic format to bedrock format Closes https://github.com/BerriAI/litellm/issues/7030 * fix: fix linting errors * test: fix test
This commit is contained in:
parent
12dfd14b52
commit
61b35c12bb
24 changed files with 858 additions and 400 deletions
|
@ -51,6 +51,9 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
|||
|
||||
## Check Model Support
|
||||
|
||||
|
||||
### 1. Check if model supports `response_format`
|
||||
|
||||
Call `litellm.get_supported_openai_params` to check if a model/provider supports `response_format`.
|
||||
|
||||
```python
|
||||
|
@ -61,6 +64,20 @@ params = get_supported_openai_params(model="anthropic.claude-3", custom_llm_prov
|
|||
assert "response_format" in params
|
||||
```
|
||||
|
||||
### 2. Check if model supports `json_schema`
|
||||
|
||||
This is used to check if you can pass
|
||||
- `response_format={ "type": "json_schema", "json_schema": … , "strict": true }`
|
||||
- `response_format=<Pydantic Model>`
|
||||
|
||||
```python
|
||||
from litellm import supports_response_schema
|
||||
|
||||
assert supports_response_schema(model="gemini-1.5-pro-preview-0215", custom_llm_provider="bedrock")
|
||||
```
|
||||
|
||||
Check out [model_prices_and_context_window.json](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) for a full list of models and their support for `response_schema`.
|
||||
|
||||
## Pass in 'json_schema'
|
||||
|
||||
To use Structured Outputs, simply specify
|
||||
|
|
|
@ -7,6 +7,7 @@ from litellm._logging import verbose_logger
|
|||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
|
||||
from .integrations.custom_logger import CustomLogger
|
||||
from .integrations.datadog.datadog import DataDogLogger
|
||||
from .integrations.prometheus_services import PrometheusServicesLogger
|
||||
from .types.services import ServiceLoggerPayload, ServiceTypes
|
||||
|
||||
|
@ -134,9 +135,7 @@ class ServiceLogging(CustomLogger):
|
|||
await self.prometheusServicesLogger.async_service_success_hook(
|
||||
payload=payload
|
||||
)
|
||||
elif callback == "datadog":
|
||||
from litellm.integrations.datadog.datadog import DataDogLogger
|
||||
|
||||
elif callback == "datadog" or isinstance(callback, DataDogLogger):
|
||||
await self.init_datadog_logger_if_none()
|
||||
await self.dd_logger.async_service_success_hook(
|
||||
payload=payload,
|
||||
|
@ -237,6 +236,7 @@ class ServiceLogging(CustomLogger):
|
|||
duration=duration,
|
||||
call_type=call_type,
|
||||
)
|
||||
|
||||
for callback in litellm.service_callback:
|
||||
if callback == "prometheus_system":
|
||||
await self.init_prometheus_services_logger_if_none()
|
||||
|
@ -244,7 +244,7 @@ class ServiceLogging(CustomLogger):
|
|||
payload=payload,
|
||||
error=error,
|
||||
)
|
||||
elif callback == "datadog":
|
||||
elif callback == "datadog" or isinstance(callback, DataDogLogger):
|
||||
await self.init_datadog_logger_if_none()
|
||||
await self.dd_logger.async_service_failure_hook(
|
||||
payload=payload,
|
||||
|
|
|
@ -5,7 +5,7 @@ Translating between OpenAI's `/chat/completion` format and Amazon's `/converse`
|
|||
import copy
|
||||
import time
|
||||
import types
|
||||
from typing import List, Optional, Union
|
||||
from typing import List, Literal, Optional, Tuple, Union, cast, overload
|
||||
|
||||
import httpx
|
||||
|
||||
|
@ -255,6 +255,59 @@ class AmazonConverseConfig:
|
|||
)
|
||||
return optional_params
|
||||
|
||||
@overload
|
||||
def _get_cache_point_block(
|
||||
self, message_block: dict, block_type: Literal["system"]
|
||||
) -> Optional[SystemContentBlock]:
|
||||
pass
|
||||
|
||||
@overload
|
||||
def _get_cache_point_block(
|
||||
self, message_block: dict, block_type: Literal["content_block"]
|
||||
) -> Optional[ContentBlock]:
|
||||
pass
|
||||
|
||||
def _get_cache_point_block(
|
||||
self, message_block: dict, block_type: Literal["system", "content_block"]
|
||||
) -> Optional[Union[SystemContentBlock, ContentBlock]]:
|
||||
if message_block.get("cache_control", None) is None:
|
||||
return None
|
||||
if block_type == "system":
|
||||
return SystemContentBlock(cachePoint=CachePointBlock(type="default"))
|
||||
else:
|
||||
return ContentBlock(cachePoint=CachePointBlock(type="default"))
|
||||
|
||||
def _transform_system_message(
|
||||
self, messages: List[AllMessageValues]
|
||||
) -> Tuple[List[AllMessageValues], List[SystemContentBlock]]:
|
||||
system_prompt_indices = []
|
||||
system_content_blocks: List[SystemContentBlock] = []
|
||||
for idx, message in enumerate(messages):
|
||||
if message["role"] == "system":
|
||||
_system_content_block: Optional[SystemContentBlock] = None
|
||||
_cache_point_block: Optional[SystemContentBlock] = None
|
||||
if isinstance(message["content"], str) and len(message["content"]) > 0:
|
||||
_system_content_block = SystemContentBlock(text=message["content"])
|
||||
_cache_point_block = self._get_cache_point_block(
|
||||
cast(dict, message), block_type="system"
|
||||
)
|
||||
elif isinstance(message["content"], list):
|
||||
for m in message["content"]:
|
||||
if m.get("type", "") == "text" and len(m["text"]) > 0:
|
||||
_system_content_block = SystemContentBlock(text=m["text"])
|
||||
_cache_point_block = self._get_cache_point_block(
|
||||
m, block_type="system"
|
||||
)
|
||||
if _system_content_block is not None:
|
||||
system_content_blocks.append(_system_content_block)
|
||||
if _cache_point_block is not None:
|
||||
system_content_blocks.append(_cache_point_block)
|
||||
system_prompt_indices.append(idx)
|
||||
if len(system_prompt_indices) > 0:
|
||||
for idx in reversed(system_prompt_indices):
|
||||
messages.pop(idx)
|
||||
return messages, system_content_blocks
|
||||
|
||||
def _transform_request(
|
||||
self,
|
||||
model: str,
|
||||
|
@ -262,24 +315,7 @@ class AmazonConverseConfig:
|
|||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
) -> RequestObject:
|
||||
system_prompt_indices = []
|
||||
system_content_blocks: List[SystemContentBlock] = []
|
||||
for idx, message in enumerate(messages):
|
||||
if message["role"] == "system":
|
||||
_system_content_block: Optional[SystemContentBlock] = None
|
||||
if isinstance(message["content"], str) and len(message["content"]) > 0:
|
||||
_system_content_block = SystemContentBlock(text=message["content"])
|
||||
elif isinstance(message["content"], list):
|
||||
for m in message["content"]:
|
||||
if m.get("type", "") == "text" and len(m["text"]) > 0:
|
||||
_system_content_block = SystemContentBlock(text=m["text"])
|
||||
if _system_content_block is not None:
|
||||
system_content_blocks.append(_system_content_block)
|
||||
system_prompt_indices.append(idx)
|
||||
if len(system_prompt_indices) > 0:
|
||||
for idx in reversed(system_prompt_indices):
|
||||
messages.pop(idx)
|
||||
|
||||
messages, system_content_blocks = self._transform_system_message(messages)
|
||||
inference_params = copy.deepcopy(optional_params)
|
||||
additional_request_keys = []
|
||||
additional_request_params = {}
|
||||
|
|
159
litellm/llms/bedrock/rerank/handler.py
Normal file
159
litellm/llms/bedrock/rerank/handler.py
Normal file
|
@ -0,0 +1,159 @@
|
|||
import copy
|
||||
import json
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast
|
||||
|
||||
import httpx
|
||||
from openai.types.image import Image
|
||||
from pydantic import BaseModel
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LitellmLogging
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
_get_httpx_client,
|
||||
get_async_httpx_client,
|
||||
)
|
||||
from litellm.types.llms.bedrock import BedrockPreparedRequest, BedrockRerankRequest
|
||||
from litellm.types.rerank import RerankRequest
|
||||
from litellm.types.utils import RerankResponse
|
||||
|
||||
from ...base_aws_llm import BaseAWSLLM
|
||||
from ..common_utils import BedrockError
|
||||
from .transformation import BedrockRerankConfig
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from botocore.awsrequest import AWSPreparedRequest
|
||||
else:
|
||||
AWSPreparedRequest = Any
|
||||
|
||||
|
||||
class BedrockRerankHandler(BaseAWSLLM):
|
||||
async def arerank(
|
||||
self,
|
||||
prepared_request: BedrockPreparedRequest,
|
||||
):
|
||||
client = get_async_httpx_client(llm_provider=litellm.LlmProviders.BEDROCK)
|
||||
try:
|
||||
response = await client.post(url=prepared_request["endpoint_url"], headers=prepared_request["prepped"].headers, data=prepared_request["body"]) # type: ignore
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as err:
|
||||
error_code = err.response.status_code
|
||||
raise BedrockError(status_code=error_code, message=err.response.text)
|
||||
except httpx.TimeoutException:
|
||||
raise BedrockError(status_code=408, message="Timeout error occurred.")
|
||||
|
||||
return BedrockRerankConfig()._transform_response(response.json())
|
||||
|
||||
def rerank(
|
||||
self,
|
||||
model: str,
|
||||
query: str,
|
||||
documents: List[Union[str, Dict[str, Any]]],
|
||||
optional_params: dict,
|
||||
logging_obj: LitellmLogging,
|
||||
top_n: Optional[int] = None,
|
||||
rank_fields: Optional[List[str]] = None,
|
||||
return_documents: Optional[bool] = True,
|
||||
max_chunks_per_doc: Optional[int] = None,
|
||||
_is_async: Optional[bool] = False,
|
||||
api_base: Optional[str] = None,
|
||||
extra_headers: Optional[dict] = None,
|
||||
) -> RerankResponse:
|
||||
request_data = RerankRequest(
|
||||
model=model,
|
||||
query=query,
|
||||
documents=documents,
|
||||
top_n=top_n,
|
||||
rank_fields=rank_fields,
|
||||
return_documents=return_documents,
|
||||
)
|
||||
data = BedrockRerankConfig()._transform_request(request_data)
|
||||
|
||||
prepared_request = self._prepare_request(
|
||||
optional_params=optional_params,
|
||||
api_base=api_base,
|
||||
extra_headers=extra_headers,
|
||||
data=cast(dict, data),
|
||||
)
|
||||
|
||||
logging_obj.pre_call(
|
||||
input=data,
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": data,
|
||||
"api_base": prepared_request["endpoint_url"],
|
||||
"headers": prepared_request["prepped"].headers,
|
||||
},
|
||||
)
|
||||
|
||||
if _is_async:
|
||||
return self.arerank(prepared_request) # type: ignore
|
||||
|
||||
client = _get_httpx_client()
|
||||
try:
|
||||
response = client.post(url=prepared_request["endpoint_url"], headers=prepared_request["prepped"].headers, data=prepared_request["body"]) # type: ignore
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as err:
|
||||
error_code = err.response.status_code
|
||||
raise BedrockError(status_code=error_code, message=err.response.text)
|
||||
except httpx.TimeoutException:
|
||||
raise BedrockError(status_code=408, message="Timeout error occurred.")
|
||||
|
||||
return BedrockRerankConfig()._transform_response(response.json())
|
||||
|
||||
def _prepare_request(
|
||||
self,
|
||||
api_base: Optional[str],
|
||||
extra_headers: Optional[dict],
|
||||
data: dict,
|
||||
optional_params: dict,
|
||||
) -> BedrockPreparedRequest:
|
||||
try:
|
||||
import boto3
|
||||
from botocore.auth import SigV4Auth
|
||||
from botocore.awsrequest import AWSRequest
|
||||
from botocore.credentials import Credentials
|
||||
except ImportError:
|
||||
raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.")
|
||||
boto3_credentials_info = self._get_boto_credentials_from_optional_params(
|
||||
optional_params
|
||||
)
|
||||
|
||||
### SET RUNTIME ENDPOINT ###
|
||||
_, proxy_endpoint_url = self.get_runtime_endpoint(
|
||||
api_base=api_base,
|
||||
aws_bedrock_runtime_endpoint=boto3_credentials_info.aws_bedrock_runtime_endpoint,
|
||||
aws_region_name=boto3_credentials_info.aws_region_name,
|
||||
)
|
||||
proxy_endpoint_url = proxy_endpoint_url.replace(
|
||||
"bedrock-runtime", "bedrock-agent-runtime"
|
||||
)
|
||||
proxy_endpoint_url = f"{proxy_endpoint_url}/rerank"
|
||||
sigv4 = SigV4Auth(
|
||||
boto3_credentials_info.credentials,
|
||||
"bedrock",
|
||||
boto3_credentials_info.aws_region_name,
|
||||
)
|
||||
# Make POST Request
|
||||
body = json.dumps(data).encode("utf-8")
|
||||
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if extra_headers is not None:
|
||||
headers = {"Content-Type": "application/json", **extra_headers}
|
||||
request = AWSRequest(
|
||||
method="POST", url=proxy_endpoint_url, data=body, headers=headers
|
||||
)
|
||||
sigv4.add_auth(request)
|
||||
if (
|
||||
extra_headers is not None and "Authorization" in extra_headers
|
||||
): # prevent sigv4 from overwriting the auth header
|
||||
request.headers["Authorization"] = extra_headers["Authorization"]
|
||||
prepped = request.prepare()
|
||||
|
||||
return BedrockPreparedRequest(
|
||||
endpoint_url=proxy_endpoint_url,
|
||||
prepped=prepped,
|
||||
body=body,
|
||||
data=data,
|
||||
)
|
117
litellm/llms/bedrock/rerank/transformation.py
Normal file
117
litellm/llms/bedrock/rerank/transformation.py
Normal file
|
@ -0,0 +1,117 @@
|
|||
"""
|
||||
Translates from Cohere's `/v1/rerank` input format to Bedrock's `/rerank` input format.
|
||||
|
||||
Why separate file? Make it easy to see how transformation works
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from litellm.types.llms.bedrock import (
|
||||
BedrockRerankBedrockRerankingConfiguration,
|
||||
BedrockRerankConfiguration,
|
||||
BedrockRerankInlineDocumentSource,
|
||||
BedrockRerankModelConfiguration,
|
||||
BedrockRerankQuery,
|
||||
BedrockRerankRequest,
|
||||
BedrockRerankSource,
|
||||
BedrockRerankTextDocument,
|
||||
BedrockRerankTextQuery,
|
||||
)
|
||||
from litellm.types.rerank import (
|
||||
RerankBilledUnits,
|
||||
RerankRequest,
|
||||
RerankResponse,
|
||||
RerankResponseMeta,
|
||||
RerankResponseResult,
|
||||
RerankTokens,
|
||||
)
|
||||
|
||||
|
||||
class BedrockRerankConfig:
|
||||
|
||||
def _transform_sources(
|
||||
self, documents: List[Union[str, dict]]
|
||||
) -> List[BedrockRerankSource]:
|
||||
"""
|
||||
Transform the sources from RerankRequest format to Bedrock format.
|
||||
"""
|
||||
_sources = []
|
||||
for document in documents:
|
||||
if isinstance(document, str):
|
||||
_sources.append(
|
||||
BedrockRerankSource(
|
||||
inlineDocumentSource=BedrockRerankInlineDocumentSource(
|
||||
textDocument=BedrockRerankTextDocument(text=document),
|
||||
type="TEXT",
|
||||
),
|
||||
type="INLINE",
|
||||
)
|
||||
)
|
||||
else:
|
||||
_sources.append(
|
||||
BedrockRerankSource(
|
||||
inlineDocumentSource=BedrockRerankInlineDocumentSource(
|
||||
jsonDocument=document, type="JSON"
|
||||
),
|
||||
type="INLINE",
|
||||
)
|
||||
)
|
||||
return _sources
|
||||
|
||||
def _transform_request(self, request_data: RerankRequest) -> BedrockRerankRequest:
|
||||
"""
|
||||
Transform the request from RerankRequest format to Bedrock format.
|
||||
"""
|
||||
_sources = self._transform_sources(request_data.documents)
|
||||
|
||||
return BedrockRerankRequest(
|
||||
queries=[
|
||||
BedrockRerankQuery(
|
||||
textQuery=BedrockRerankTextQuery(text=request_data.query),
|
||||
type="TEXT",
|
||||
)
|
||||
],
|
||||
rerankingConfiguration=BedrockRerankConfiguration(
|
||||
bedrockRerankingConfiguration=BedrockRerankBedrockRerankingConfiguration(
|
||||
modelConfiguration=BedrockRerankModelConfiguration(
|
||||
modelArn=request_data.model
|
||||
),
|
||||
numberOfResults=request_data.top_n or len(request_data.documents),
|
||||
),
|
||||
type="BEDROCK_RERANKING_MODEL",
|
||||
),
|
||||
sources=_sources,
|
||||
)
|
||||
|
||||
def _transform_response(self, response: dict) -> RerankResponse:
|
||||
"""
|
||||
Transform the response from Bedrock into the RerankResponse format.
|
||||
|
||||
example input:
|
||||
{"results":[{"index":0,"relevanceScore":0.6847912669181824},{"index":1,"relevanceScore":0.5980774760246277}]}
|
||||
"""
|
||||
_billed_units = RerankBilledUnits(**response.get("usage", {}))
|
||||
_tokens = RerankTokens(**response.get("usage", {}))
|
||||
rerank_meta = RerankResponseMeta(billed_units=_billed_units, tokens=_tokens)
|
||||
|
||||
_results: Optional[List[RerankResponseResult]] = None
|
||||
|
||||
bedrock_results = response.get("results")
|
||||
if bedrock_results:
|
||||
_results = [
|
||||
RerankResponseResult(
|
||||
index=result.get("index"),
|
||||
relevance_score=result.get("relevanceScore"),
|
||||
)
|
||||
for result in bedrock_results
|
||||
]
|
||||
|
||||
if _results is None:
|
||||
raise ValueError(f"No results found in the response={response}")
|
||||
|
||||
return RerankResponse(
|
||||
id=response.get("id") or str(uuid.uuid4()),
|
||||
results=_results,
|
||||
meta=rerank_meta,
|
||||
) # Return response
|
|
@ -31,6 +31,6 @@ class JinaAIRerankConfig:
|
|||
|
||||
return RerankResponse(
|
||||
id=response.get("id") or str(uuid.uuid4()),
|
||||
results=_results,
|
||||
results=_results, # type: ignore
|
||||
meta=rerank_meta,
|
||||
) # Return response
|
||||
|
|
|
@ -2485,10 +2485,24 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915
|
|||
image_url=image_url
|
||||
)
|
||||
_parts.append(_part) # type: ignore
|
||||
_cache_point_block = (
|
||||
litellm.AmazonConverseConfig()._get_cache_point_block(
|
||||
element, block_type="content_block"
|
||||
)
|
||||
)
|
||||
if _cache_point_block is not None:
|
||||
_parts.append(_cache_point_block)
|
||||
user_content.extend(_parts)
|
||||
else:
|
||||
_part = BedrockContentBlock(text=messages[msg_i]["content"])
|
||||
_cache_point_block = (
|
||||
litellm.AmazonConverseConfig()._get_cache_point_block(
|
||||
messages[msg_i], block_type="content_block"
|
||||
)
|
||||
)
|
||||
user_content.append(_part)
|
||||
if _cache_point_block is not None:
|
||||
user_content.append(_cache_point_block)
|
||||
|
||||
msg_i += 1
|
||||
if user_content:
|
||||
|
|
|
@ -6,8 +6,54 @@ Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
|
|||
Docs: https://docs.together.ai/reference/completions-1
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from litellm import get_model_info, verbose_logger
|
||||
|
||||
from ..OpenAI.chat.gpt_transformation import OpenAIGPTConfig
|
||||
|
||||
|
||||
class TogetherAIConfig(OpenAIGPTConfig):
|
||||
pass
|
||||
def get_supported_openai_params(self, model: str) -> list:
|
||||
"""
|
||||
Only some together models support response_format / tool calling
|
||||
|
||||
Docs: https://docs.together.ai/docs/json-mode
|
||||
"""
|
||||
supports_function_calling: Optional[bool] = None
|
||||
try:
|
||||
model_info = get_model_info(model, custom_llm_provider="together_ai")
|
||||
supports_function_calling = model_info.get(
|
||||
"supports_function_calling", False
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.debug(f"Error getting supported openai params: {e}")
|
||||
pass
|
||||
|
||||
optional_params = super().get_supported_openai_params(model)
|
||||
if supports_function_calling is not True:
|
||||
verbose_logger.warning(
|
||||
"Only some together models support function calling/response_format. Docs - https://docs.together.ai/docs/function-calling"
|
||||
)
|
||||
optional_params.remove("tools")
|
||||
optional_params.remove("tool_choice")
|
||||
optional_params.remove("function_call")
|
||||
optional_params.remove("response_format")
|
||||
return optional_params
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
mapped_openai_params = super().map_openai_params(
|
||||
non_default_params, optional_params, model, drop_params
|
||||
)
|
||||
|
||||
if "response_format" in mapped_openai_params and mapped_openai_params[
|
||||
"response_format"
|
||||
] == {"type": "text"}:
|
||||
mapped_openai_params.pop("response_format")
|
||||
return mapped_openai_params
|
||||
|
|
|
@ -29,6 +29,6 @@ class TogetherAIRerankConfig:
|
|||
|
||||
return RerankResponse(
|
||||
id=response.get("id") or str(uuid.uuid4()),
|
||||
results=_results,
|
||||
results=_results, # type: ignore
|
||||
meta=rerank_meta,
|
||||
) # Return response
|
||||
|
|
|
@ -12,7 +12,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_audio_input": true,
|
||||
"supports_audio_output": true,
|
||||
"supports_prompt_caching": true
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true
|
||||
},
|
||||
"gpt-4": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -4818,7 +4819,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_pdf_input": true
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"amazon.nova-lite-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -4830,7 +4832,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_pdf_input": true
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"amazon.nova-pro-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -4842,7 +4845,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_pdf_input": true
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -4876,7 +4880,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_assistant_prefill": true
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"anthropic.claude-3-haiku-20240307-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -4898,7 +4903,8 @@
|
|||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_function_calling": true
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"anthropic.claude-3-opus-20240229-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -4911,139 +4917,6 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.anthropic.claude-3-sonnet-20240229-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_assistant_prefill": true
|
||||
},
|
||||
"us.anthropic.claude-3-haiku-20240307-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.00000125,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.anthropic.claude-3-5-haiku-20241022-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000005,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"us.anthropic.claude-3-opus-20240229-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.000075,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"eu.anthropic.claude-3-sonnet-20240229-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"eu.anthropic.claude-3-5-sonnet-20241022-v2:0": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_assistant_prefill": true
|
||||
},
|
||||
"eu.anthropic.claude-3-haiku-20240307-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.00000125,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"eu.anthropic.claude-3-5-haiku-20241022-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000005,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"eu.anthropic.claude-3-opus-20240229-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.000075,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"anthropic.claude-v1": {
|
||||
"max_tokens": 8191,
|
||||
"max_input_tokens": 100000,
|
||||
|
@ -6097,6 +5970,30 @@
|
|||
"litellm_provider": "together_ai",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
|
||||
"input_cost_per_token": 0.00000018,
|
||||
"output_cost_per_token": 0.00000018,
|
||||
"litellm_provider": "together_ai",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
|
||||
"input_cost_per_token": 0.00000088,
|
||||
"output_cost_per_token": 0.00000088,
|
||||
"litellm_provider": "together_ai",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {
|
||||
"input_cost_per_token": 0.0000035,
|
||||
"output_cost_per_token": 0.0000035,
|
||||
"litellm_provider": "together_ai",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
|
||||
"input_cost_per_token": 0.0000006,
|
||||
"output_cost_per_token": 0.0000006,
|
||||
|
|
|
@ -39,10 +39,10 @@ model_list:
|
|||
access_groups: ["private-openai-models"]
|
||||
|
||||
router_settings:
|
||||
routing_strategy: usage-based-routing-v2
|
||||
# routing_strategy: usage-based-routing-v2
|
||||
#redis_url: "os.environ/REDIS_URL"
|
||||
redis_host: "os.environ/REDIS_HOST"
|
||||
redis_port: "os.environ/REDIS_PORT"
|
||||
|
||||
litellm_settings:
|
||||
success_callback: ["langsmith"]
|
||||
callbacks: ["datadog"]
|
|
@ -7,6 +7,7 @@ import litellm
|
|||
from litellm._logging import verbose_logger
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.llms.azure_ai.rerank import AzureAIRerank
|
||||
from litellm.llms.bedrock.rerank.handler import BedrockRerankHandler
|
||||
from litellm.llms.cohere.rerank import CohereRerank
|
||||
from litellm.llms.jina_ai.rerank.handler import JinaAIRerank
|
||||
from litellm.llms.together_ai.rerank.handler import TogetherAIRerank
|
||||
|
@ -21,6 +22,7 @@ cohere_rerank = CohereRerank()
|
|||
together_rerank = TogetherAIRerank()
|
||||
azure_ai_rerank = AzureAIRerank()
|
||||
jina_ai_rerank = JinaAIRerank()
|
||||
bedrock_rerank = BedrockRerankHandler()
|
||||
#################################################
|
||||
|
||||
|
||||
|
@ -70,7 +72,7 @@ async def arerank(
|
|||
|
||||
|
||||
@client
|
||||
def rerank(
|
||||
def rerank( # noqa: PLR0915
|
||||
model: str,
|
||||
query: str,
|
||||
documents: List[Union[str, Dict[str, Any]]],
|
||||
|
@ -268,6 +270,27 @@ def rerank(
|
|||
max_chunks_per_doc=max_chunks_per_doc,
|
||||
_is_async=_is_async,
|
||||
)
|
||||
elif _custom_llm_provider == "bedrock":
|
||||
api_base = (
|
||||
dynamic_api_base
|
||||
or optional_params.api_base
|
||||
or litellm.api_base
|
||||
or get_secret("BEDROCK_API_BASE") # type: ignore
|
||||
)
|
||||
|
||||
response = bedrock_rerank.rerank(
|
||||
model=model,
|
||||
query=query,
|
||||
documents=documents,
|
||||
top_n=top_n,
|
||||
rank_fields=rank_fields,
|
||||
return_documents=return_documents,
|
||||
max_chunks_per_doc=max_chunks_per_doc,
|
||||
_is_async=_is_async,
|
||||
optional_params=optional_params.model_dump(exclude_unset=True),
|
||||
api_base=api_base,
|
||||
logging_obj=litellm_logging_obj,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported provider: {_custom_llm_provider}")
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@ import json
|
|||
from typing import Any, List, Literal, Optional, TypedDict, Union
|
||||
|
||||
from typing_extensions import (
|
||||
TYPE_CHECKING,
|
||||
Protocol,
|
||||
Required,
|
||||
Self,
|
||||
|
@ -14,8 +15,13 @@ from typing_extensions import (
|
|||
from .openai import ChatCompletionToolCallChunk
|
||||
|
||||
|
||||
class SystemContentBlock(TypedDict):
|
||||
class CachePointBlock(TypedDict, total=False):
|
||||
type: Literal["default"]
|
||||
|
||||
|
||||
class SystemContentBlock(TypedDict, total=False):
|
||||
text: str
|
||||
cachePoint: CachePointBlock
|
||||
|
||||
|
||||
class SourceBlock(TypedDict):
|
||||
|
@ -58,6 +64,7 @@ class ContentBlock(TypedDict, total=False):
|
|||
document: DocumentBlock
|
||||
toolResult: ToolResultBlock
|
||||
toolUse: ToolUseBlock
|
||||
cachePoint: CachePointBlock
|
||||
|
||||
|
||||
class MessageBlock(TypedDict):
|
||||
|
@ -312,3 +319,71 @@ class AmazonStability3TextToImageResponse(TypedDict, total=False):
|
|||
images: List[str]
|
||||
seeds: List[str]
|
||||
finish_reasons: List[str]
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from botocore.awsrequest import AWSPreparedRequest
|
||||
else:
|
||||
AWSPreparedRequest = Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class BedrockPreparedRequest(TypedDict):
|
||||
"""
|
||||
Internal/Helper class for preparing the request for bedrock image generation
|
||||
"""
|
||||
|
||||
endpoint_url: str
|
||||
prepped: AWSPreparedRequest
|
||||
body: bytes
|
||||
data: dict
|
||||
|
||||
|
||||
class BedrockRerankTextQuery(TypedDict):
|
||||
text: str
|
||||
|
||||
|
||||
class BedrockRerankQuery(TypedDict):
|
||||
textQuery: BedrockRerankTextQuery
|
||||
type: Literal["TEXT"]
|
||||
|
||||
|
||||
class BedrockRerankModelConfiguration(TypedDict, total=False):
|
||||
modelArn: Required[str]
|
||||
modelConfiguration: dict
|
||||
|
||||
|
||||
class BedrockRerankBedrockRerankingConfiguration(TypedDict):
|
||||
modelConfiguration: BedrockRerankModelConfiguration
|
||||
numberOfResults: int
|
||||
|
||||
|
||||
class BedrockRerankConfiguration(TypedDict):
|
||||
bedrockRerankingConfiguration: BedrockRerankBedrockRerankingConfiguration
|
||||
type: Literal["BEDROCK_RERANKING_MODEL"]
|
||||
|
||||
|
||||
class BedrockRerankTextDocument(TypedDict, total=False):
|
||||
text: str
|
||||
|
||||
|
||||
class BedrockRerankInlineDocumentSource(TypedDict, total=False):
|
||||
jsonDocument: dict
|
||||
textDocument: BedrockRerankTextDocument
|
||||
type: Literal["TEXT", "JSON"]
|
||||
|
||||
|
||||
class BedrockRerankSource(TypedDict):
|
||||
inlineDocumentSource: BedrockRerankInlineDocumentSource
|
||||
type: Literal["INLINE"]
|
||||
|
||||
|
||||
class BedrockRerankRequest(TypedDict):
|
||||
"""
|
||||
Request for Bedrock Rerank API
|
||||
"""
|
||||
|
||||
queries: List[BedrockRerankQuery]
|
||||
rerankingConfiguration: BedrockRerankConfiguration
|
||||
sources: List[BedrockRerankSource]
|
||||
|
|
|
@ -36,9 +36,14 @@ class RerankResponseMeta(TypedDict, total=False):
|
|||
tokens: RerankTokens
|
||||
|
||||
|
||||
class RerankResponseResult(TypedDict):
|
||||
index: int
|
||||
relevance_score: float
|
||||
|
||||
|
||||
class RerankResponse(BaseModel):
|
||||
id: str
|
||||
results: List[dict] # Contains index and relevance_score
|
||||
results: List[RerankResponseResult] # Contains index and relevance_score
|
||||
meta: Optional[RerankResponseMeta] = None # Contains api_version and billed_units
|
||||
|
||||
# Define private attributes using PrivateAttr
|
||||
|
|
|
@ -1874,22 +1874,11 @@ def supports_prompt_caching(
|
|||
Raises:
|
||||
Exception: If the given model is not found or there's an error in retrieval.
|
||||
"""
|
||||
try:
|
||||
model, custom_llm_provider, _, _ = litellm.get_llm_provider(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
|
||||
model_info = litellm.get_model_info(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
|
||||
if model_info.get("supports_prompt_caching", False) is True:
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
f"Model not found or error in checking prompt caching support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}"
|
||||
)
|
||||
return _supports_factory(
|
||||
model=model,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
key="supports_prompt_caching",
|
||||
)
|
||||
|
||||
|
||||
def supports_vision(model: str, custom_llm_provider: Optional[str] = None) -> bool:
|
||||
|
|
|
@ -12,7 +12,8 @@
|
|||
"supports_vision": true,
|
||||
"supports_audio_input": true,
|
||||
"supports_audio_output": true,
|
||||
"supports_prompt_caching": true
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true
|
||||
},
|
||||
"gpt-4": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -4818,7 +4819,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_pdf_input": true
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"amazon.nova-lite-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -4830,7 +4832,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_pdf_input": true
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"amazon.nova-pro-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -4842,7 +4845,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_pdf_input": true
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -4876,7 +4880,8 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_assistant_prefill": true
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"anthropic.claude-3-haiku-20240307-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -4898,7 +4903,8 @@
|
|||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_function_calling": true
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"anthropic.claude-3-opus-20240229-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
|
@ -4911,139 +4917,6 @@
|
|||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.anthropic.claude-3-sonnet-20240229-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_assistant_prefill": true
|
||||
},
|
||||
"us.anthropic.claude-3-haiku-20240307-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.00000125,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"us.anthropic.claude-3-5-haiku-20241022-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000005,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"us.anthropic.claude-3-opus-20240229-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.000075,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"eu.anthropic.claude-3-sonnet-20240229-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"eu.anthropic.claude-3-5-sonnet-20241022-v2:0": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_assistant_prefill": true
|
||||
},
|
||||
"eu.anthropic.claude-3-haiku-20240307-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.00000125,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"eu.anthropic.claude-3-5-haiku-20241022-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000005,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true
|
||||
},
|
||||
"eu.anthropic.claude-3-opus-20240229-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000015,
|
||||
"output_cost_per_token": 0.000075,
|
||||
"litellm_provider": "bedrock",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"anthropic.claude-v1": {
|
||||
"max_tokens": 8191,
|
||||
"max_input_tokens": 100000,
|
||||
|
@ -6097,6 +5970,30 @@
|
|||
"litellm_provider": "together_ai",
|
||||
"mode": "embedding"
|
||||
},
|
||||
"together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
|
||||
"input_cost_per_token": 0.00000018,
|
||||
"output_cost_per_token": 0.00000018,
|
||||
"litellm_provider": "together_ai",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
|
||||
"input_cost_per_token": 0.00000088,
|
||||
"output_cost_per_token": 0.00000088,
|
||||
"litellm_provider": "together_ai",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {
|
||||
"input_cost_per_token": 0.0000035,
|
||||
"output_cost_per_token": 0.0000035,
|
||||
"litellm_provider": "together_ai",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": {
|
||||
"input_cost_per_token": 0.0000006,
|
||||
"output_cost_per_token": 0.0000006,
|
||||
|
|
|
@ -23,6 +23,34 @@ from litellm.utils import (
|
|||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
def _usage_format_tests(usage: litellm.Usage):
|
||||
"""
|
||||
OpenAI prompt caching
|
||||
- prompt_tokens = sum of non-cache hit tokens + cache-hit tokens
|
||||
- total_tokens = prompt_tokens + completion_tokens
|
||||
|
||||
Example
|
||||
```
|
||||
"usage": {
|
||||
"prompt_tokens": 2006,
|
||||
"completion_tokens": 300,
|
||||
"total_tokens": 2306,
|
||||
"prompt_tokens_details": {
|
||||
"cached_tokens": 1920
|
||||
},
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 0
|
||||
}
|
||||
# ANTHROPIC_ONLY #
|
||||
"cache_creation_input_tokens": 0
|
||||
}
|
||||
```
|
||||
"""
|
||||
assert usage.total_tokens == usage.prompt_tokens + usage.completion_tokens
|
||||
|
||||
assert usage.prompt_tokens > usage.prompt_tokens_details.cached_tokens
|
||||
|
||||
|
||||
class BaseLLMChatTest(ABC):
|
||||
"""
|
||||
Abstract base test class that enforces a common test across all test classes.
|
||||
|
@ -273,6 +301,78 @@ class BaseLLMChatTest(ABC):
|
|||
response = litellm.completion(**base_completion_call_args, messages=messages)
|
||||
assert response is not None
|
||||
|
||||
def test_prompt_caching(self):
|
||||
litellm.set_verbose = True
|
||||
from litellm.utils import supports_prompt_caching
|
||||
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
base_completion_call_args = self.get_base_completion_call_args()
|
||||
if not supports_prompt_caching(base_completion_call_args["model"], None):
|
||||
print("Model does not support prompt caching")
|
||||
pytest.skip("Model does not support prompt caching")
|
||||
|
||||
try:
|
||||
for _ in range(2):
|
||||
response = litellm.completion(
|
||||
**base_completion_call_args,
|
||||
messages=[
|
||||
# System Message
|
||||
{
|
||||
"role": "system",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Here is the full text of a complex legal agreement"
|
||||
* 400,
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
],
|
||||
},
|
||||
# marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What are the key terms and conditions in this agreement?",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
|
||||
},
|
||||
# The final turn is marked with cache-control, for continuing in followups.
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What are the key terms and conditions in this agreement?",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
],
|
||||
},
|
||||
],
|
||||
temperature=0.2,
|
||||
max_tokens=10,
|
||||
)
|
||||
|
||||
_usage_format_tests(response.usage)
|
||||
|
||||
print("response=", response)
|
||||
print("response.usage=", response.usage)
|
||||
|
||||
_usage_format_tests(response.usage)
|
||||
|
||||
assert "prompt_tokens_details" in response.usage
|
||||
assert response.usage.prompt_tokens_details.cached_tokens > 0
|
||||
except litellm.InternalServerError:
|
||||
pass
|
||||
|
||||
@pytest.fixture
|
||||
def pdf_messages(self):
|
||||
import base64
|
||||
|
|
|
@ -79,6 +79,7 @@ class BaseLLMRerankTest(ABC):
|
|||
@pytest.mark.asyncio()
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
async def test_basic_rerank(self, sync_mode):
|
||||
litellm.set_verbose = True
|
||||
rerank_call_args = self.get_base_rerank_call_args()
|
||||
custom_llm_provider = self.get_custom_llm_provider()
|
||||
if sync_mode is True:
|
||||
|
@ -86,7 +87,7 @@ class BaseLLMRerankTest(ABC):
|
|||
**rerank_call_args,
|
||||
query="hello",
|
||||
documents=["hello", "world"],
|
||||
top_n=3,
|
||||
top_n=2,
|
||||
)
|
||||
|
||||
print("re rank response: ", response)
|
||||
|
@ -102,7 +103,7 @@ class BaseLLMRerankTest(ABC):
|
|||
**rerank_call_args,
|
||||
query="hello",
|
||||
documents=["hello", "world"],
|
||||
top_n=3,
|
||||
top_n=2,
|
||||
)
|
||||
|
||||
print("async re rank response: ", response)
|
||||
|
|
|
@ -666,7 +666,7 @@ from litellm import completion
|
|||
|
||||
class TestAnthropicCompletion(BaseLLMChatTest):
|
||||
def get_base_completion_call_args(self) -> dict:
|
||||
return {"model": "claude-3-haiku-20240307"}
|
||||
return {"model": "anthropic/claude-3-5-sonnet-20240620"}
|
||||
|
||||
def test_tool_call_no_arguments(self, tool_call_no_arguments):
|
||||
"""Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833"""
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
"""
|
||||
Tests Bedrock Completion + Rerank endpoints
|
||||
"""
|
||||
|
||||
# @pytest.mark.skip(reason="AWS Suspended Account")
|
||||
import os
|
||||
import sys
|
||||
|
@ -31,6 +35,7 @@ from litellm.llms.bedrock.chat import BedrockLLM
|
|||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from litellm.llms.prompt_templates.factory import _bedrock_tools_pt
|
||||
from base_llm_unit_tests import BaseLLMChatTest
|
||||
from base_rerank_unit_tests import BaseLLMRerankTest
|
||||
|
||||
# litellm.num_retries = 3
|
||||
litellm.cache = None
|
||||
|
@ -1971,13 +1976,67 @@ def test_bedrock_base_model_helper():
|
|||
assert model == "us.amazon.nova-pro-v1:0"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"messages, expected_cache_control",
|
||||
[
|
||||
(
|
||||
[ # test system prompt cache
|
||||
{
|
||||
"role": "system",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "You are an AI assistant tasked with analyzing legal documents.",
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Here is the full text of a complex legal agreement",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what are the key terms and conditions in this agreement?",
|
||||
},
|
||||
],
|
||||
True,
|
||||
),
|
||||
(
|
||||
[ # test user prompt cache
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what are the key terms and conditions in this agreement?",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
},
|
||||
],
|
||||
True,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_bedrock_prompt_caching_message(messages, expected_cache_control):
|
||||
import litellm
|
||||
import json
|
||||
|
||||
transformed_messages = litellm.AmazonConverseConfig()._transform_request(
|
||||
model="bedrock/anthropic.claude-3-5-haiku-20241022-v1:0",
|
||||
messages=messages,
|
||||
optional_params={},
|
||||
litellm_params={},
|
||||
)
|
||||
if expected_cache_control:
|
||||
assert "cachePoint" in json.dumps(transformed_messages)
|
||||
else:
|
||||
assert "cachePoint" not in json.dumps(transformed_messages)
|
||||
|
||||
|
||||
class TestBedrockConverseChat(BaseLLMChatTest):
|
||||
def get_base_completion_call_args(self) -> dict:
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
litellm.add_known_models()
|
||||
return {
|
||||
"model": "bedrock/us.anthropic.claude-3-haiku-20240307-v1:0",
|
||||
"model": "bedrock/anthropic.claude-3-5-haiku-20241022-v1:0",
|
||||
}
|
||||
|
||||
def test_tool_call_no_arguments(self, tool_call_no_arguments):
|
||||
|
@ -1991,3 +2050,19 @@ class TestBedrockConverseChat(BaseLLMChatTest):
|
|||
Todo: if litellm.modify_params is True ensure it's a valid utf-8 sequence
|
||||
"""
|
||||
pass
|
||||
|
||||
def test_prompt_caching(self):
|
||||
"""
|
||||
Remove override once we have access to Bedrock prompt caching
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class TestBedrockRerank(BaseLLMRerankTest):
|
||||
def get_custom_llm_provider(self) -> litellm.LlmProviders:
|
||||
return litellm.LlmProviders.BEDROCK
|
||||
|
||||
def get_base_rerank_call_args(self) -> dict:
|
||||
return {
|
||||
"model": "bedrock/arn:aws:bedrock:us-west-2::foundation-model/amazon.rerank-v1:0",
|
||||
}
|
||||
|
|
58
tests/llm_translation/test_together_ai.py
Normal file
58
tests/llm_translation/test_together_ai.py
Normal file
|
@ -0,0 +1,58 @@
|
|||
"""
|
||||
Test TogetherAI LLM
|
||||
"""
|
||||
|
||||
from base_llm_unit_tests import BaseLLMChatTest
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
import litellm
|
||||
import pytest
|
||||
|
||||
|
||||
class TestTogetherAI(BaseLLMChatTest):
|
||||
def get_base_completion_call_args(self) -> dict:
|
||||
litellm.set_verbose = True
|
||||
return {"model": "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1"}
|
||||
|
||||
def test_tool_call_no_arguments(self, tool_call_no_arguments):
|
||||
"""Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833"""
|
||||
pass
|
||||
|
||||
def test_multilingual_requests(self):
|
||||
"""
|
||||
Mistral API raises a 400 BadRequest error when the request contains invalid utf-8 sequences.
|
||||
"""
|
||||
pass
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, expected_bool",
|
||||
[
|
||||
("meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", True),
|
||||
("nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", False),
|
||||
],
|
||||
)
|
||||
def test_get_supported_response_format_together_ai(
|
||||
self, model: str, expected_bool: bool
|
||||
) -> None:
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
optional_params = litellm.get_supported_openai_params(
|
||||
model, custom_llm_provider="together_ai"
|
||||
)
|
||||
# Mapped provider
|
||||
assert isinstance(optional_params, list)
|
||||
|
||||
if expected_bool:
|
||||
assert "response_format" in optional_params
|
||||
assert "tools" in optional_params
|
||||
else:
|
||||
assert "response_format" not in optional_params
|
||||
assert "tools" not in optional_params
|
|
@ -197,7 +197,7 @@ async def test_async_log_cache_hit_on_callbacks():
|
|||
),
|
||||
(
|
||||
CallTypes.rerank.value,
|
||||
{"id": "test", "results": [{"index": 0, "score": 0.9}]},
|
||||
{"id": "test", "results": [{"index": 0, "relevance_score": 0.9}]},
|
||||
RerankResponse,
|
||||
),
|
||||
(
|
||||
|
|
|
@ -38,76 +38,6 @@ def _usage_format_tests(usage: litellm.Usage):
|
|||
assert usage.prompt_tokens > usage.prompt_tokens_details.cached_tokens
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"anthropic/claude-3-5-sonnet-20240620",
|
||||
# "openai/gpt-4o",
|
||||
# "deepseek/deepseek-chat",
|
||||
],
|
||||
)
|
||||
def test_prompt_caching_model(model):
|
||||
try:
|
||||
for _ in range(2):
|
||||
response = litellm.completion(
|
||||
model=model,
|
||||
messages=[
|
||||
# System Message
|
||||
{
|
||||
"role": "system",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Here is the full text of a complex legal agreement"
|
||||
* 400,
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
],
|
||||
},
|
||||
# marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What are the key terms and conditions in this agreement?",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo",
|
||||
},
|
||||
# The final turn is marked with cache-control, for continuing in followups.
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What are the key terms and conditions in this agreement?",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
],
|
||||
},
|
||||
],
|
||||
temperature=0.2,
|
||||
max_tokens=10,
|
||||
)
|
||||
|
||||
_usage_format_tests(response.usage)
|
||||
|
||||
print("response=", response)
|
||||
print("response.usage=", response.usage)
|
||||
|
||||
_usage_format_tests(response.usage)
|
||||
|
||||
assert "prompt_tokens_details" in response.usage
|
||||
assert response.usage.prompt_tokens_details.cached_tokens > 0
|
||||
except litellm.InternalServerError:
|
||||
pass
|
||||
|
||||
|
||||
def test_supports_prompt_caching():
|
||||
from litellm.utils import supports_prompt_caching
|
||||
|
||||
|
|
|
@ -185,3 +185,22 @@ async def test_log_db_metrics_failure_error_types(exception, should_log):
|
|||
else:
|
||||
# Assert failure was NOT logged for non-DB errors
|
||||
mock_proxy_logging.service_logging_obj.async_service_failure_hook.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dd_log_db_spend_failure_metrics():
|
||||
from litellm._service_logger import ServiceLogging
|
||||
from litellm.integrations.datadog.datadog import DataDogLogger
|
||||
|
||||
dd_logger = DataDogLogger()
|
||||
with patch.object(dd_logger, "async_service_failure_hook", new_callable=AsyncMock):
|
||||
service_logging_obj = ServiceLogging()
|
||||
|
||||
litellm.service_callback = [dd_logger]
|
||||
|
||||
await service_logging_obj.async_service_failure_hook(
|
||||
service=ServiceTypes.DB,
|
||||
call_type="test_call_type",
|
||||
error="test_error",
|
||||
duration=1.0,
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue