diff --git a/docs/my-website/docs/completion/json_mode.md b/docs/my-website/docs/completion/json_mode.md index 119cc033fe..0c3a930764 100644 --- a/docs/my-website/docs/completion/json_mode.md +++ b/docs/my-website/docs/completion/json_mode.md @@ -51,6 +51,9 @@ curl http://0.0.0.0:4000/v1/chat/completions \ ## Check Model Support + +### 1. Check if model supports `response_format` + Call `litellm.get_supported_openai_params` to check if a model/provider supports `response_format`. ```python @@ -61,6 +64,20 @@ params = get_supported_openai_params(model="anthropic.claude-3", custom_llm_prov assert "response_format" in params ``` +### 2. Check if model supports `json_schema` + +This is used to check if you can pass +- `response_format={ "type": "json_schema", "json_schema": … , "strict": true }` +- `response_format=` + +```python +from litellm import supports_response_schema + +assert supports_response_schema(model="gemini-1.5-pro-preview-0215", custom_llm_provider="bedrock") +``` + +Check out [model_prices_and_context_window.json](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) for a full list of models and their support for `response_schema`. + ## Pass in 'json_schema' To use Structured Outputs, simply specify diff --git a/litellm/_service_logger.py b/litellm/_service_logger.py index f777c93d45..5cba897cf3 100644 --- a/litellm/_service_logger.py +++ b/litellm/_service_logger.py @@ -7,6 +7,7 @@ from litellm._logging import verbose_logger from litellm.proxy._types import UserAPIKeyAuth from .integrations.custom_logger import CustomLogger +from .integrations.datadog.datadog import DataDogLogger from .integrations.prometheus_services import PrometheusServicesLogger from .types.services import ServiceLoggerPayload, ServiceTypes @@ -134,9 +135,7 @@ class ServiceLogging(CustomLogger): await self.prometheusServicesLogger.async_service_success_hook( payload=payload ) - elif callback == "datadog": - from litellm.integrations.datadog.datadog import DataDogLogger - + elif callback == "datadog" or isinstance(callback, DataDogLogger): await self.init_datadog_logger_if_none() await self.dd_logger.async_service_success_hook( payload=payload, @@ -237,6 +236,7 @@ class ServiceLogging(CustomLogger): duration=duration, call_type=call_type, ) + for callback in litellm.service_callback: if callback == "prometheus_system": await self.init_prometheus_services_logger_if_none() @@ -244,7 +244,7 @@ class ServiceLogging(CustomLogger): payload=payload, error=error, ) - elif callback == "datadog": + elif callback == "datadog" or isinstance(callback, DataDogLogger): await self.init_datadog_logger_if_none() await self.dd_logger.async_service_failure_hook( payload=payload, diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py index e8d7e80b74..6238fd8d57 100644 --- a/litellm/llms/bedrock/chat/converse_transformation.py +++ b/litellm/llms/bedrock/chat/converse_transformation.py @@ -5,7 +5,7 @@ Translating between OpenAI's `/chat/completion` format and Amazon's `/converse` import copy import time import types -from typing import List, Optional, Union +from typing import List, Literal, Optional, Tuple, Union, cast, overload import httpx @@ -255,6 +255,59 @@ class AmazonConverseConfig: ) return optional_params + @overload + def _get_cache_point_block( + self, message_block: dict, block_type: Literal["system"] + ) -> Optional[SystemContentBlock]: + pass + + @overload + def _get_cache_point_block( + self, message_block: dict, block_type: Literal["content_block"] + ) -> Optional[ContentBlock]: + pass + + def _get_cache_point_block( + self, message_block: dict, block_type: Literal["system", "content_block"] + ) -> Optional[Union[SystemContentBlock, ContentBlock]]: + if message_block.get("cache_control", None) is None: + return None + if block_type == "system": + return SystemContentBlock(cachePoint=CachePointBlock(type="default")) + else: + return ContentBlock(cachePoint=CachePointBlock(type="default")) + + def _transform_system_message( + self, messages: List[AllMessageValues] + ) -> Tuple[List[AllMessageValues], List[SystemContentBlock]]: + system_prompt_indices = [] + system_content_blocks: List[SystemContentBlock] = [] + for idx, message in enumerate(messages): + if message["role"] == "system": + _system_content_block: Optional[SystemContentBlock] = None + _cache_point_block: Optional[SystemContentBlock] = None + if isinstance(message["content"], str) and len(message["content"]) > 0: + _system_content_block = SystemContentBlock(text=message["content"]) + _cache_point_block = self._get_cache_point_block( + cast(dict, message), block_type="system" + ) + elif isinstance(message["content"], list): + for m in message["content"]: + if m.get("type", "") == "text" and len(m["text"]) > 0: + _system_content_block = SystemContentBlock(text=m["text"]) + _cache_point_block = self._get_cache_point_block( + m, block_type="system" + ) + if _system_content_block is not None: + system_content_blocks.append(_system_content_block) + if _cache_point_block is not None: + system_content_blocks.append(_cache_point_block) + system_prompt_indices.append(idx) + if len(system_prompt_indices) > 0: + for idx in reversed(system_prompt_indices): + messages.pop(idx) + return messages, system_content_blocks + def _transform_request( self, model: str, @@ -262,24 +315,7 @@ class AmazonConverseConfig: optional_params: dict, litellm_params: dict, ) -> RequestObject: - system_prompt_indices = [] - system_content_blocks: List[SystemContentBlock] = [] - for idx, message in enumerate(messages): - if message["role"] == "system": - _system_content_block: Optional[SystemContentBlock] = None - if isinstance(message["content"], str) and len(message["content"]) > 0: - _system_content_block = SystemContentBlock(text=message["content"]) - elif isinstance(message["content"], list): - for m in message["content"]: - if m.get("type", "") == "text" and len(m["text"]) > 0: - _system_content_block = SystemContentBlock(text=m["text"]) - if _system_content_block is not None: - system_content_blocks.append(_system_content_block) - system_prompt_indices.append(idx) - if len(system_prompt_indices) > 0: - for idx in reversed(system_prompt_indices): - messages.pop(idx) - + messages, system_content_blocks = self._transform_system_message(messages) inference_params = copy.deepcopy(optional_params) additional_request_keys = [] additional_request_params = {} diff --git a/litellm/llms/bedrock/rerank/handler.py b/litellm/llms/bedrock/rerank/handler.py new file mode 100644 index 0000000000..d604760e35 --- /dev/null +++ b/litellm/llms/bedrock/rerank/handler.py @@ -0,0 +1,159 @@ +import copy +import json +import os +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, cast + +import httpx +from openai.types.image import Image +from pydantic import BaseModel + +import litellm +from litellm._logging import verbose_logger +from litellm.litellm_core_utils.litellm_logging import Logging as LitellmLogging +from litellm.llms.custom_httpx.http_handler import ( + _get_httpx_client, + get_async_httpx_client, +) +from litellm.types.llms.bedrock import BedrockPreparedRequest, BedrockRerankRequest +from litellm.types.rerank import RerankRequest +from litellm.types.utils import RerankResponse + +from ...base_aws_llm import BaseAWSLLM +from ..common_utils import BedrockError +from .transformation import BedrockRerankConfig + +if TYPE_CHECKING: + from botocore.awsrequest import AWSPreparedRequest +else: + AWSPreparedRequest = Any + + +class BedrockRerankHandler(BaseAWSLLM): + async def arerank( + self, + prepared_request: BedrockPreparedRequest, + ): + client = get_async_httpx_client(llm_provider=litellm.LlmProviders.BEDROCK) + try: + response = await client.post(url=prepared_request["endpoint_url"], headers=prepared_request["prepped"].headers, data=prepared_request["body"]) # type: ignore + response.raise_for_status() + except httpx.HTTPStatusError as err: + error_code = err.response.status_code + raise BedrockError(status_code=error_code, message=err.response.text) + except httpx.TimeoutException: + raise BedrockError(status_code=408, message="Timeout error occurred.") + + return BedrockRerankConfig()._transform_response(response.json()) + + def rerank( + self, + model: str, + query: str, + documents: List[Union[str, Dict[str, Any]]], + optional_params: dict, + logging_obj: LitellmLogging, + top_n: Optional[int] = None, + rank_fields: Optional[List[str]] = None, + return_documents: Optional[bool] = True, + max_chunks_per_doc: Optional[int] = None, + _is_async: Optional[bool] = False, + api_base: Optional[str] = None, + extra_headers: Optional[dict] = None, + ) -> RerankResponse: + request_data = RerankRequest( + model=model, + query=query, + documents=documents, + top_n=top_n, + rank_fields=rank_fields, + return_documents=return_documents, + ) + data = BedrockRerankConfig()._transform_request(request_data) + + prepared_request = self._prepare_request( + optional_params=optional_params, + api_base=api_base, + extra_headers=extra_headers, + data=cast(dict, data), + ) + + logging_obj.pre_call( + input=data, + api_key="", + additional_args={ + "complete_input_dict": data, + "api_base": prepared_request["endpoint_url"], + "headers": prepared_request["prepped"].headers, + }, + ) + + if _is_async: + return self.arerank(prepared_request) # type: ignore + + client = _get_httpx_client() + try: + response = client.post(url=prepared_request["endpoint_url"], headers=prepared_request["prepped"].headers, data=prepared_request["body"]) # type: ignore + response.raise_for_status() + except httpx.HTTPStatusError as err: + error_code = err.response.status_code + raise BedrockError(status_code=error_code, message=err.response.text) + except httpx.TimeoutException: + raise BedrockError(status_code=408, message="Timeout error occurred.") + + return BedrockRerankConfig()._transform_response(response.json()) + + def _prepare_request( + self, + api_base: Optional[str], + extra_headers: Optional[dict], + data: dict, + optional_params: dict, + ) -> BedrockPreparedRequest: + try: + import boto3 + from botocore.auth import SigV4Auth + from botocore.awsrequest import AWSRequest + from botocore.credentials import Credentials + except ImportError: + raise ImportError("Missing boto3 to call bedrock. Run 'pip install boto3'.") + boto3_credentials_info = self._get_boto_credentials_from_optional_params( + optional_params + ) + + ### SET RUNTIME ENDPOINT ### + _, proxy_endpoint_url = self.get_runtime_endpoint( + api_base=api_base, + aws_bedrock_runtime_endpoint=boto3_credentials_info.aws_bedrock_runtime_endpoint, + aws_region_name=boto3_credentials_info.aws_region_name, + ) + proxy_endpoint_url = proxy_endpoint_url.replace( + "bedrock-runtime", "bedrock-agent-runtime" + ) + proxy_endpoint_url = f"{proxy_endpoint_url}/rerank" + sigv4 = SigV4Auth( + boto3_credentials_info.credentials, + "bedrock", + boto3_credentials_info.aws_region_name, + ) + # Make POST Request + body = json.dumps(data).encode("utf-8") + + headers = {"Content-Type": "application/json"} + if extra_headers is not None: + headers = {"Content-Type": "application/json", **extra_headers} + request = AWSRequest( + method="POST", url=proxy_endpoint_url, data=body, headers=headers + ) + sigv4.add_auth(request) + if ( + extra_headers is not None and "Authorization" in extra_headers + ): # prevent sigv4 from overwriting the auth header + request.headers["Authorization"] = extra_headers["Authorization"] + prepped = request.prepare() + + return BedrockPreparedRequest( + endpoint_url=proxy_endpoint_url, + prepped=prepped, + body=body, + data=data, + ) diff --git a/litellm/llms/bedrock/rerank/transformation.py b/litellm/llms/bedrock/rerank/transformation.py new file mode 100644 index 0000000000..7dc9b0aab1 --- /dev/null +++ b/litellm/llms/bedrock/rerank/transformation.py @@ -0,0 +1,117 @@ +""" +Translates from Cohere's `/v1/rerank` input format to Bedrock's `/rerank` input format. + +Why separate file? Make it easy to see how transformation works +""" + +import uuid +from typing import List, Optional, Union + +from litellm.types.llms.bedrock import ( + BedrockRerankBedrockRerankingConfiguration, + BedrockRerankConfiguration, + BedrockRerankInlineDocumentSource, + BedrockRerankModelConfiguration, + BedrockRerankQuery, + BedrockRerankRequest, + BedrockRerankSource, + BedrockRerankTextDocument, + BedrockRerankTextQuery, +) +from litellm.types.rerank import ( + RerankBilledUnits, + RerankRequest, + RerankResponse, + RerankResponseMeta, + RerankResponseResult, + RerankTokens, +) + + +class BedrockRerankConfig: + + def _transform_sources( + self, documents: List[Union[str, dict]] + ) -> List[BedrockRerankSource]: + """ + Transform the sources from RerankRequest format to Bedrock format. + """ + _sources = [] + for document in documents: + if isinstance(document, str): + _sources.append( + BedrockRerankSource( + inlineDocumentSource=BedrockRerankInlineDocumentSource( + textDocument=BedrockRerankTextDocument(text=document), + type="TEXT", + ), + type="INLINE", + ) + ) + else: + _sources.append( + BedrockRerankSource( + inlineDocumentSource=BedrockRerankInlineDocumentSource( + jsonDocument=document, type="JSON" + ), + type="INLINE", + ) + ) + return _sources + + def _transform_request(self, request_data: RerankRequest) -> BedrockRerankRequest: + """ + Transform the request from RerankRequest format to Bedrock format. + """ + _sources = self._transform_sources(request_data.documents) + + return BedrockRerankRequest( + queries=[ + BedrockRerankQuery( + textQuery=BedrockRerankTextQuery(text=request_data.query), + type="TEXT", + ) + ], + rerankingConfiguration=BedrockRerankConfiguration( + bedrockRerankingConfiguration=BedrockRerankBedrockRerankingConfiguration( + modelConfiguration=BedrockRerankModelConfiguration( + modelArn=request_data.model + ), + numberOfResults=request_data.top_n or len(request_data.documents), + ), + type="BEDROCK_RERANKING_MODEL", + ), + sources=_sources, + ) + + def _transform_response(self, response: dict) -> RerankResponse: + """ + Transform the response from Bedrock into the RerankResponse format. + + example input: + {"results":[{"index":0,"relevanceScore":0.6847912669181824},{"index":1,"relevanceScore":0.5980774760246277}]} + """ + _billed_units = RerankBilledUnits(**response.get("usage", {})) + _tokens = RerankTokens(**response.get("usage", {})) + rerank_meta = RerankResponseMeta(billed_units=_billed_units, tokens=_tokens) + + _results: Optional[List[RerankResponseResult]] = None + + bedrock_results = response.get("results") + if bedrock_results: + _results = [ + RerankResponseResult( + index=result.get("index"), + relevance_score=result.get("relevanceScore"), + ) + for result in bedrock_results + ] + + if _results is None: + raise ValueError(f"No results found in the response={response}") + + return RerankResponse( + id=response.get("id") or str(uuid.uuid4()), + results=_results, + meta=rerank_meta, + ) # Return response diff --git a/litellm/llms/jina_ai/rerank/transformation.py b/litellm/llms/jina_ai/rerank/transformation.py index 82039a15b2..a6c0a810c7 100644 --- a/litellm/llms/jina_ai/rerank/transformation.py +++ b/litellm/llms/jina_ai/rerank/transformation.py @@ -31,6 +31,6 @@ class JinaAIRerankConfig: return RerankResponse( id=response.get("id") or str(uuid.uuid4()), - results=_results, + results=_results, # type: ignore meta=rerank_meta, ) # Return response diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 2f55bb7bac..c218377e51 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -2485,10 +2485,24 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915 image_url=image_url ) _parts.append(_part) # type: ignore + _cache_point_block = ( + litellm.AmazonConverseConfig()._get_cache_point_block( + element, block_type="content_block" + ) + ) + if _cache_point_block is not None: + _parts.append(_cache_point_block) user_content.extend(_parts) else: _part = BedrockContentBlock(text=messages[msg_i]["content"]) + _cache_point_block = ( + litellm.AmazonConverseConfig()._get_cache_point_block( + messages[msg_i], block_type="content_block" + ) + ) user_content.append(_part) + if _cache_point_block is not None: + user_content.append(_cache_point_block) msg_i += 1 if user_content: diff --git a/litellm/llms/together_ai/chat.py b/litellm/llms/together_ai/chat.py index cb12d61475..54b7e48680 100644 --- a/litellm/llms/together_ai/chat.py +++ b/litellm/llms/together_ai/chat.py @@ -6,8 +6,54 @@ Calls done in OpenAI/openai.py as TogetherAI is openai-compatible. Docs: https://docs.together.ai/reference/completions-1 """ +from typing import Optional + +from litellm import get_model_info, verbose_logger + from ..OpenAI.chat.gpt_transformation import OpenAIGPTConfig class TogetherAIConfig(OpenAIGPTConfig): - pass + def get_supported_openai_params(self, model: str) -> list: + """ + Only some together models support response_format / tool calling + + Docs: https://docs.together.ai/docs/json-mode + """ + supports_function_calling: Optional[bool] = None + try: + model_info = get_model_info(model, custom_llm_provider="together_ai") + supports_function_calling = model_info.get( + "supports_function_calling", False + ) + except Exception as e: + verbose_logger.debug(f"Error getting supported openai params: {e}") + pass + + optional_params = super().get_supported_openai_params(model) + if supports_function_calling is not True: + verbose_logger.warning( + "Only some together models support function calling/response_format. Docs - https://docs.together.ai/docs/function-calling" + ) + optional_params.remove("tools") + optional_params.remove("tool_choice") + optional_params.remove("function_call") + optional_params.remove("response_format") + return optional_params + + def map_openai_params( + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, + ) -> dict: + mapped_openai_params = super().map_openai_params( + non_default_params, optional_params, model, drop_params + ) + + if "response_format" in mapped_openai_params and mapped_openai_params[ + "response_format" + ] == {"type": "text"}: + mapped_openai_params.pop("response_format") + return mapped_openai_params diff --git a/litellm/llms/together_ai/rerank/transformation.py b/litellm/llms/together_ai/rerank/transformation.py index b2024b5cda..b74e0b6c00 100644 --- a/litellm/llms/together_ai/rerank/transformation.py +++ b/litellm/llms/together_ai/rerank/transformation.py @@ -29,6 +29,6 @@ class TogetherAIRerankConfig: return RerankResponse( id=response.get("id") or str(uuid.uuid4()), - results=_results, + results=_results, # type: ignore meta=rerank_meta, ) # Return response diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 659b7dd8ea..7ad438b100 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -12,7 +12,8 @@ "supports_vision": true, "supports_audio_input": true, "supports_audio_output": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_response_schema": true }, "gpt-4": { "max_tokens": 4096, @@ -4818,7 +4819,8 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, - "supports_pdf_input": true + "supports_pdf_input": true, + "supports_prompt_caching": true }, "amazon.nova-lite-v1:0": { "max_tokens": 4096, @@ -4830,7 +4832,8 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, - "supports_pdf_input": true + "supports_pdf_input": true, + "supports_prompt_caching": true }, "amazon.nova-pro-v1:0": { "max_tokens": 4096, @@ -4842,7 +4845,8 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, - "supports_pdf_input": true + "supports_pdf_input": true, + "supports_prompt_caching": true }, "anthropic.claude-3-sonnet-20240229-v1:0": { "max_tokens": 4096, @@ -4876,7 +4880,8 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, - "supports_assistant_prefill": true + "supports_assistant_prefill": true, + "supports_prompt_caching": true }, "anthropic.claude-3-haiku-20240307-v1:0": { "max_tokens": 4096, @@ -4898,7 +4903,8 @@ "litellm_provider": "bedrock", "mode": "chat", "supports_assistant_prefill": true, - "supports_function_calling": true + "supports_function_calling": true, + "supports_prompt_caching": true }, "anthropic.claude-3-opus-20240229-v1:0": { "max_tokens": 4096, @@ -4911,139 +4917,6 @@ "supports_function_calling": true, "supports_vision": true }, - "us.anthropic.claude-3-sonnet-20240229-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000015, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true - }, - "us.anthropic.claude-3-5-sonnet-20240620-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000015, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true - }, - "us.anthropic.claude-3-5-sonnet-20241022-v2:0": { - "max_tokens": 8192, - "max_input_tokens": 200000, - "max_output_tokens": 8192, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000015, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true, - "supports_assistant_prefill": true - }, - "us.anthropic.claude-3-haiku-20240307-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.00000025, - "output_cost_per_token": 0.00000125, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true - }, - "us.anthropic.claude-3-5-haiku-20241022-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000005, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_assistant_prefill": true, - "supports_function_calling": true - }, - "us.anthropic.claude-3-opus-20240229-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.000015, - "output_cost_per_token": 0.000075, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true - }, - "eu.anthropic.claude-3-sonnet-20240229-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000015, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true - }, - "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000015, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true - }, - "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": { - "max_tokens": 8192, - "max_input_tokens": 200000, - "max_output_tokens": 8192, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000015, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true, - "supports_assistant_prefill": true - }, - "eu.anthropic.claude-3-haiku-20240307-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.00000025, - "output_cost_per_token": 0.00000125, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true - }, - "eu.anthropic.claude-3-5-haiku-20241022-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000005, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true - }, - "eu.anthropic.claude-3-opus-20240229-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.000015, - "output_cost_per_token": 0.000075, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true - }, "anthropic.claude-v1": { "max_tokens": 8191, "max_input_tokens": 100000, @@ -6097,6 +5970,30 @@ "litellm_provider": "together_ai", "mode": "embedding" }, + "together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { + "input_cost_per_token": 0.00000018, + "output_cost_per_token": 0.00000018, + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "mode": "chat" + }, + "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { + "input_cost_per_token": 0.00000088, + "output_cost_per_token": 0.00000088, + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "mode": "chat" + }, + "together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": { + "input_cost_per_token": 0.0000035, + "output_cost_per_token": 0.0000035, + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "mode": "chat" + }, "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": { "input_cost_per_token": 0.0000006, "output_cost_per_token": 0.0000006, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index f2569f2f28..599a1bf230 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -39,10 +39,10 @@ model_list: access_groups: ["private-openai-models"] router_settings: - routing_strategy: usage-based-routing-v2 + # routing_strategy: usage-based-routing-v2 #redis_url: "os.environ/REDIS_URL" redis_host: "os.environ/REDIS_HOST" redis_port: "os.environ/REDIS_PORT" litellm_settings: - success_callback: ["langsmith"] \ No newline at end of file + callbacks: ["datadog"] \ No newline at end of file diff --git a/litellm/rerank_api/main.py b/litellm/rerank_api/main.py index 7e6dc75035..3b3eaad016 100644 --- a/litellm/rerank_api/main.py +++ b/litellm/rerank_api/main.py @@ -7,6 +7,7 @@ import litellm from litellm._logging import verbose_logger from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.llms.azure_ai.rerank import AzureAIRerank +from litellm.llms.bedrock.rerank.handler import BedrockRerankHandler from litellm.llms.cohere.rerank import CohereRerank from litellm.llms.jina_ai.rerank.handler import JinaAIRerank from litellm.llms.together_ai.rerank.handler import TogetherAIRerank @@ -21,6 +22,7 @@ cohere_rerank = CohereRerank() together_rerank = TogetherAIRerank() azure_ai_rerank = AzureAIRerank() jina_ai_rerank = JinaAIRerank() +bedrock_rerank = BedrockRerankHandler() ################################################# @@ -70,7 +72,7 @@ async def arerank( @client -def rerank( +def rerank( # noqa: PLR0915 model: str, query: str, documents: List[Union[str, Dict[str, Any]]], @@ -268,6 +270,27 @@ def rerank( max_chunks_per_doc=max_chunks_per_doc, _is_async=_is_async, ) + elif _custom_llm_provider == "bedrock": + api_base = ( + dynamic_api_base + or optional_params.api_base + or litellm.api_base + or get_secret("BEDROCK_API_BASE") # type: ignore + ) + + response = bedrock_rerank.rerank( + model=model, + query=query, + documents=documents, + top_n=top_n, + rank_fields=rank_fields, + return_documents=return_documents, + max_chunks_per_doc=max_chunks_per_doc, + _is_async=_is_async, + optional_params=optional_params.model_dump(exclude_unset=True), + api_base=api_base, + logging_obj=litellm_logging_obj, + ) else: raise ValueError(f"Unsupported provider: {_custom_llm_provider}") diff --git a/litellm/types/llms/bedrock.py b/litellm/types/llms/bedrock.py index 88f329adeb..e94ffd80a3 100644 --- a/litellm/types/llms/bedrock.py +++ b/litellm/types/llms/bedrock.py @@ -2,6 +2,7 @@ import json from typing import Any, List, Literal, Optional, TypedDict, Union from typing_extensions import ( + TYPE_CHECKING, Protocol, Required, Self, @@ -14,8 +15,13 @@ from typing_extensions import ( from .openai import ChatCompletionToolCallChunk -class SystemContentBlock(TypedDict): +class CachePointBlock(TypedDict, total=False): + type: Literal["default"] + + +class SystemContentBlock(TypedDict, total=False): text: str + cachePoint: CachePointBlock class SourceBlock(TypedDict): @@ -58,6 +64,7 @@ class ContentBlock(TypedDict, total=False): document: DocumentBlock toolResult: ToolResultBlock toolUse: ToolUseBlock + cachePoint: CachePointBlock class MessageBlock(TypedDict): @@ -312,3 +319,71 @@ class AmazonStability3TextToImageResponse(TypedDict, total=False): images: List[str] seeds: List[str] finish_reasons: List[str] + + +if TYPE_CHECKING: + from botocore.awsrequest import AWSPreparedRequest +else: + AWSPreparedRequest = Any + +from pydantic import BaseModel + + +class BedrockPreparedRequest(TypedDict): + """ + Internal/Helper class for preparing the request for bedrock image generation + """ + + endpoint_url: str + prepped: AWSPreparedRequest + body: bytes + data: dict + + +class BedrockRerankTextQuery(TypedDict): + text: str + + +class BedrockRerankQuery(TypedDict): + textQuery: BedrockRerankTextQuery + type: Literal["TEXT"] + + +class BedrockRerankModelConfiguration(TypedDict, total=False): + modelArn: Required[str] + modelConfiguration: dict + + +class BedrockRerankBedrockRerankingConfiguration(TypedDict): + modelConfiguration: BedrockRerankModelConfiguration + numberOfResults: int + + +class BedrockRerankConfiguration(TypedDict): + bedrockRerankingConfiguration: BedrockRerankBedrockRerankingConfiguration + type: Literal["BEDROCK_RERANKING_MODEL"] + + +class BedrockRerankTextDocument(TypedDict, total=False): + text: str + + +class BedrockRerankInlineDocumentSource(TypedDict, total=False): + jsonDocument: dict + textDocument: BedrockRerankTextDocument + type: Literal["TEXT", "JSON"] + + +class BedrockRerankSource(TypedDict): + inlineDocumentSource: BedrockRerankInlineDocumentSource + type: Literal["INLINE"] + + +class BedrockRerankRequest(TypedDict): + """ + Request for Bedrock Rerank API + """ + + queries: List[BedrockRerankQuery] + rerankingConfiguration: BedrockRerankConfiguration + sources: List[BedrockRerankSource] diff --git a/litellm/types/rerank.py b/litellm/types/rerank.py index 00b07ba139..8a2332fe36 100644 --- a/litellm/types/rerank.py +++ b/litellm/types/rerank.py @@ -36,9 +36,14 @@ class RerankResponseMeta(TypedDict, total=False): tokens: RerankTokens +class RerankResponseResult(TypedDict): + index: int + relevance_score: float + + class RerankResponse(BaseModel): id: str - results: List[dict] # Contains index and relevance_score + results: List[RerankResponseResult] # Contains index and relevance_score meta: Optional[RerankResponseMeta] = None # Contains api_version and billed_units # Define private attributes using PrivateAttr diff --git a/litellm/utils.py b/litellm/utils.py index 86c0a60294..b470cac61e 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1874,22 +1874,11 @@ def supports_prompt_caching( Raises: Exception: If the given model is not found or there's an error in retrieval. """ - try: - model, custom_llm_provider, _, _ = litellm.get_llm_provider( - model=model, custom_llm_provider=custom_llm_provider - ) - - model_info = litellm.get_model_info( - model=model, custom_llm_provider=custom_llm_provider - ) - - if model_info.get("supports_prompt_caching", False) is True: - return True - return False - except Exception as e: - raise Exception( - f"Model not found or error in checking prompt caching support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}" - ) + return _supports_factory( + model=model, + custom_llm_provider=custom_llm_provider, + key="supports_prompt_caching", + ) def supports_vision(model: str, custom_llm_provider: Optional[str] = None) -> bool: diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 659b7dd8ea..7ad438b100 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -12,7 +12,8 @@ "supports_vision": true, "supports_audio_input": true, "supports_audio_output": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_response_schema": true }, "gpt-4": { "max_tokens": 4096, @@ -4818,7 +4819,8 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, - "supports_pdf_input": true + "supports_pdf_input": true, + "supports_prompt_caching": true }, "amazon.nova-lite-v1:0": { "max_tokens": 4096, @@ -4830,7 +4832,8 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, - "supports_pdf_input": true + "supports_pdf_input": true, + "supports_prompt_caching": true }, "amazon.nova-pro-v1:0": { "max_tokens": 4096, @@ -4842,7 +4845,8 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, - "supports_pdf_input": true + "supports_pdf_input": true, + "supports_prompt_caching": true }, "anthropic.claude-3-sonnet-20240229-v1:0": { "max_tokens": 4096, @@ -4876,7 +4880,8 @@ "mode": "chat", "supports_function_calling": true, "supports_vision": true, - "supports_assistant_prefill": true + "supports_assistant_prefill": true, + "supports_prompt_caching": true }, "anthropic.claude-3-haiku-20240307-v1:0": { "max_tokens": 4096, @@ -4898,7 +4903,8 @@ "litellm_provider": "bedrock", "mode": "chat", "supports_assistant_prefill": true, - "supports_function_calling": true + "supports_function_calling": true, + "supports_prompt_caching": true }, "anthropic.claude-3-opus-20240229-v1:0": { "max_tokens": 4096, @@ -4911,139 +4917,6 @@ "supports_function_calling": true, "supports_vision": true }, - "us.anthropic.claude-3-sonnet-20240229-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000015, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true - }, - "us.anthropic.claude-3-5-sonnet-20240620-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000015, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true - }, - "us.anthropic.claude-3-5-sonnet-20241022-v2:0": { - "max_tokens": 8192, - "max_input_tokens": 200000, - "max_output_tokens": 8192, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000015, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true, - "supports_assistant_prefill": true - }, - "us.anthropic.claude-3-haiku-20240307-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.00000025, - "output_cost_per_token": 0.00000125, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true - }, - "us.anthropic.claude-3-5-haiku-20241022-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000005, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_assistant_prefill": true, - "supports_function_calling": true - }, - "us.anthropic.claude-3-opus-20240229-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.000015, - "output_cost_per_token": 0.000075, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true - }, - "eu.anthropic.claude-3-sonnet-20240229-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000015, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true - }, - "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000015, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true - }, - "eu.anthropic.claude-3-5-sonnet-20241022-v2:0": { - "max_tokens": 8192, - "max_input_tokens": 200000, - "max_output_tokens": 8192, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000015, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true, - "supports_assistant_prefill": true - }, - "eu.anthropic.claude-3-haiku-20240307-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.00000025, - "output_cost_per_token": 0.00000125, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true - }, - "eu.anthropic.claude-3-5-haiku-20241022-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.000001, - "output_cost_per_token": 0.000005, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true - }, - "eu.anthropic.claude-3-opus-20240229-v1:0": { - "max_tokens": 4096, - "max_input_tokens": 200000, - "max_output_tokens": 4096, - "input_cost_per_token": 0.000015, - "output_cost_per_token": 0.000075, - "litellm_provider": "bedrock", - "mode": "chat", - "supports_function_calling": true, - "supports_vision": true - }, "anthropic.claude-v1": { "max_tokens": 8191, "max_input_tokens": 100000, @@ -6097,6 +5970,30 @@ "litellm_provider": "together_ai", "mode": "embedding" }, + "together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { + "input_cost_per_token": 0.00000018, + "output_cost_per_token": 0.00000018, + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "mode": "chat" + }, + "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { + "input_cost_per_token": 0.00000088, + "output_cost_per_token": 0.00000088, + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "mode": "chat" + }, + "together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": { + "input_cost_per_token": 0.0000035, + "output_cost_per_token": 0.0000035, + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "mode": "chat" + }, "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": { "input_cost_per_token": 0.0000006, "output_cost_per_token": 0.0000006, diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py index 5004d45994..143a989c18 100644 --- a/tests/llm_translation/base_llm_unit_tests.py +++ b/tests/llm_translation/base_llm_unit_tests.py @@ -23,6 +23,34 @@ from litellm.utils import ( from abc import ABC, abstractmethod +def _usage_format_tests(usage: litellm.Usage): + """ + OpenAI prompt caching + - prompt_tokens = sum of non-cache hit tokens + cache-hit tokens + - total_tokens = prompt_tokens + completion_tokens + + Example + ``` + "usage": { + "prompt_tokens": 2006, + "completion_tokens": 300, + "total_tokens": 2306, + "prompt_tokens_details": { + "cached_tokens": 1920 + }, + "completion_tokens_details": { + "reasoning_tokens": 0 + } + # ANTHROPIC_ONLY # + "cache_creation_input_tokens": 0 + } + ``` + """ + assert usage.total_tokens == usage.prompt_tokens + usage.completion_tokens + + assert usage.prompt_tokens > usage.prompt_tokens_details.cached_tokens + + class BaseLLMChatTest(ABC): """ Abstract base test class that enforces a common test across all test classes. @@ -273,6 +301,78 @@ class BaseLLMChatTest(ABC): response = litellm.completion(**base_completion_call_args, messages=messages) assert response is not None + def test_prompt_caching(self): + litellm.set_verbose = True + from litellm.utils import supports_prompt_caching + + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + base_completion_call_args = self.get_base_completion_call_args() + if not supports_prompt_caching(base_completion_call_args["model"], None): + print("Model does not support prompt caching") + pytest.skip("Model does not support prompt caching") + + try: + for _ in range(2): + response = litellm.completion( + **base_completion_call_args, + messages=[ + # System Message + { + "role": "system", + "content": [ + { + "type": "text", + "text": "Here is the full text of a complex legal agreement" + * 400, + "cache_control": {"type": "ephemeral"}, + } + ], + }, + # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache. + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What are the key terms and conditions in this agreement?", + "cache_control": {"type": "ephemeral"}, + } + ], + }, + { + "role": "assistant", + "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo", + }, + # The final turn is marked with cache-control, for continuing in followups. + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What are the key terms and conditions in this agreement?", + "cache_control": {"type": "ephemeral"}, + } + ], + }, + ], + temperature=0.2, + max_tokens=10, + ) + + _usage_format_tests(response.usage) + + print("response=", response) + print("response.usage=", response.usage) + + _usage_format_tests(response.usage) + + assert "prompt_tokens_details" in response.usage + assert response.usage.prompt_tokens_details.cached_tokens > 0 + except litellm.InternalServerError: + pass + @pytest.fixture def pdf_messages(self): import base64 diff --git a/tests/llm_translation/base_rerank_unit_tests.py b/tests/llm_translation/base_rerank_unit_tests.py index 2a8b801946..54f6009fc6 100644 --- a/tests/llm_translation/base_rerank_unit_tests.py +++ b/tests/llm_translation/base_rerank_unit_tests.py @@ -79,6 +79,7 @@ class BaseLLMRerankTest(ABC): @pytest.mark.asyncio() @pytest.mark.parametrize("sync_mode", [True, False]) async def test_basic_rerank(self, sync_mode): + litellm.set_verbose = True rerank_call_args = self.get_base_rerank_call_args() custom_llm_provider = self.get_custom_llm_provider() if sync_mode is True: @@ -86,7 +87,7 @@ class BaseLLMRerankTest(ABC): **rerank_call_args, query="hello", documents=["hello", "world"], - top_n=3, + top_n=2, ) print("re rank response: ", response) @@ -102,7 +103,7 @@ class BaseLLMRerankTest(ABC): **rerank_call_args, query="hello", documents=["hello", "world"], - top_n=3, + top_n=2, ) print("async re rank response: ", response) diff --git a/tests/llm_translation/test_anthropic_completion.py b/tests/llm_translation/test_anthropic_completion.py index b5a10953d4..32d0ef1a4f 100644 --- a/tests/llm_translation/test_anthropic_completion.py +++ b/tests/llm_translation/test_anthropic_completion.py @@ -666,7 +666,7 @@ from litellm import completion class TestAnthropicCompletion(BaseLLMChatTest): def get_base_completion_call_args(self) -> dict: - return {"model": "claude-3-haiku-20240307"} + return {"model": "anthropic/claude-3-5-sonnet-20240620"} def test_tool_call_no_arguments(self, tool_call_no_arguments): """Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833""" diff --git a/tests/llm_translation/test_bedrock_completion.py b/tests/llm_translation/test_bedrock_completion.py index 8a5e6818fc..60b3d2583c 100644 --- a/tests/llm_translation/test_bedrock_completion.py +++ b/tests/llm_translation/test_bedrock_completion.py @@ -1,3 +1,7 @@ +""" +Tests Bedrock Completion + Rerank endpoints +""" + # @pytest.mark.skip(reason="AWS Suspended Account") import os import sys @@ -31,6 +35,7 @@ from litellm.llms.bedrock.chat import BedrockLLM from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.llms.prompt_templates.factory import _bedrock_tools_pt from base_llm_unit_tests import BaseLLMChatTest +from base_rerank_unit_tests import BaseLLMRerankTest # litellm.num_retries = 3 litellm.cache = None @@ -1971,13 +1976,67 @@ def test_bedrock_base_model_helper(): assert model == "us.amazon.nova-pro-v1:0" +@pytest.mark.parametrize( + "messages, expected_cache_control", + [ + ( + [ # test system prompt cache + { + "role": "system", + "content": [ + { + "type": "text", + "text": "You are an AI assistant tasked with analyzing legal documents.", + }, + { + "type": "text", + "text": "Here is the full text of a complex legal agreement", + "cache_control": {"type": "ephemeral"}, + }, + ], + }, + { + "role": "user", + "content": "what are the key terms and conditions in this agreement?", + }, + ], + True, + ), + ( + [ # test user prompt cache + { + "role": "user", + "content": "what are the key terms and conditions in this agreement?", + "cache_control": {"type": "ephemeral"}, + }, + ], + True, + ), + ], +) +def test_bedrock_prompt_caching_message(messages, expected_cache_control): + import litellm + import json + + transformed_messages = litellm.AmazonConverseConfig()._transform_request( + model="bedrock/anthropic.claude-3-5-haiku-20241022-v1:0", + messages=messages, + optional_params={}, + litellm_params={}, + ) + if expected_cache_control: + assert "cachePoint" in json.dumps(transformed_messages) + else: + assert "cachePoint" not in json.dumps(transformed_messages) + + class TestBedrockConverseChat(BaseLLMChatTest): def get_base_completion_call_args(self) -> dict: os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" litellm.model_cost = litellm.get_model_cost_map(url="") litellm.add_known_models() return { - "model": "bedrock/us.anthropic.claude-3-haiku-20240307-v1:0", + "model": "bedrock/anthropic.claude-3-5-haiku-20241022-v1:0", } def test_tool_call_no_arguments(self, tool_call_no_arguments): @@ -1991,3 +2050,19 @@ class TestBedrockConverseChat(BaseLLMChatTest): Todo: if litellm.modify_params is True ensure it's a valid utf-8 sequence """ pass + + def test_prompt_caching(self): + """ + Remove override once we have access to Bedrock prompt caching + """ + pass + + +class TestBedrockRerank(BaseLLMRerankTest): + def get_custom_llm_provider(self) -> litellm.LlmProviders: + return litellm.LlmProviders.BEDROCK + + def get_base_rerank_call_args(self) -> dict: + return { + "model": "bedrock/arn:aws:bedrock:us-west-2::foundation-model/amazon.rerank-v1:0", + } diff --git a/tests/llm_translation/test_together_ai.py b/tests/llm_translation/test_together_ai.py new file mode 100644 index 0000000000..b83a700002 --- /dev/null +++ b/tests/llm_translation/test_together_ai.py @@ -0,0 +1,58 @@ +""" +Test TogetherAI LLM +""" + +from base_llm_unit_tests import BaseLLMChatTest +import json +import os +import sys +from datetime import datetime +from unittest.mock import AsyncMock + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path + +import litellm +import pytest + + +class TestTogetherAI(BaseLLMChatTest): + def get_base_completion_call_args(self) -> dict: + litellm.set_verbose = True + return {"model": "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1"} + + def test_tool_call_no_arguments(self, tool_call_no_arguments): + """Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833""" + pass + + def test_multilingual_requests(self): + """ + Mistral API raises a 400 BadRequest error when the request contains invalid utf-8 sequences. + """ + pass + + @pytest.mark.parametrize( + "model, expected_bool", + [ + ("meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", True), + ("nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", False), + ], + ) + def test_get_supported_response_format_together_ai( + self, model: str, expected_bool: bool + ) -> None: + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + optional_params = litellm.get_supported_openai_params( + model, custom_llm_provider="together_ai" + ) + # Mapped provider + assert isinstance(optional_params, list) + + if expected_bool: + assert "response_format" in optional_params + assert "tools" in optional_params + else: + assert "response_format" not in optional_params + assert "tools" not in optional_params diff --git a/tests/local_testing/test_caching_handler.py b/tests/local_testing/test_caching_handler.py index 11f7831bc3..b2c8022649 100644 --- a/tests/local_testing/test_caching_handler.py +++ b/tests/local_testing/test_caching_handler.py @@ -197,7 +197,7 @@ async def test_async_log_cache_hit_on_callbacks(): ), ( CallTypes.rerank.value, - {"id": "test", "results": [{"index": 0, "score": 0.9}]}, + {"id": "test", "results": [{"index": 0, "relevance_score": 0.9}]}, RerankResponse, ), ( diff --git a/tests/local_testing/test_prompt_caching.py b/tests/local_testing/test_prompt_caching.py index c73bda04ec..11c4d583d0 100644 --- a/tests/local_testing/test_prompt_caching.py +++ b/tests/local_testing/test_prompt_caching.py @@ -38,76 +38,6 @@ def _usage_format_tests(usage: litellm.Usage): assert usage.prompt_tokens > usage.prompt_tokens_details.cached_tokens -@pytest.mark.parametrize( - "model", - [ - "anthropic/claude-3-5-sonnet-20240620", - # "openai/gpt-4o", - # "deepseek/deepseek-chat", - ], -) -def test_prompt_caching_model(model): - try: - for _ in range(2): - response = litellm.completion( - model=model, - messages=[ - # System Message - { - "role": "system", - "content": [ - { - "type": "text", - "text": "Here is the full text of a complex legal agreement" - * 400, - "cache_control": {"type": "ephemeral"}, - } - ], - }, - # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache. - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What are the key terms and conditions in this agreement?", - "cache_control": {"type": "ephemeral"}, - } - ], - }, - { - "role": "assistant", - "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo", - }, - # The final turn is marked with cache-control, for continuing in followups. - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What are the key terms and conditions in this agreement?", - "cache_control": {"type": "ephemeral"}, - } - ], - }, - ], - temperature=0.2, - max_tokens=10, - ) - - _usage_format_tests(response.usage) - - print("response=", response) - print("response.usage=", response.usage) - - _usage_format_tests(response.usage) - - assert "prompt_tokens_details" in response.usage - assert response.usage.prompt_tokens_details.cached_tokens > 0 - except litellm.InternalServerError: - pass - - def test_supports_prompt_caching(): from litellm.utils import supports_prompt_caching diff --git a/tests/logging_callback_tests/test_log_db_redis_services.py b/tests/logging_callback_tests/test_log_db_redis_services.py index 9824e1a5bb..fa0c3b595a 100644 --- a/tests/logging_callback_tests/test_log_db_redis_services.py +++ b/tests/logging_callback_tests/test_log_db_redis_services.py @@ -185,3 +185,22 @@ async def test_log_db_metrics_failure_error_types(exception, should_log): else: # Assert failure was NOT logged for non-DB errors mock_proxy_logging.service_logging_obj.async_service_failure_hook.assert_not_called() + + +@pytest.mark.asyncio +async def test_dd_log_db_spend_failure_metrics(): + from litellm._service_logger import ServiceLogging + from litellm.integrations.datadog.datadog import DataDogLogger + + dd_logger = DataDogLogger() + with patch.object(dd_logger, "async_service_failure_hook", new_callable=AsyncMock): + service_logging_obj = ServiceLogging() + + litellm.service_callback = [dd_logger] + + await service_logging_obj.async_service_failure_hook( + service=ServiceTypes.DB, + call_type="test_call_type", + error="test_error", + duration=1.0, + )