diff --git a/litellm/__init__.py b/litellm/__init__.py index f95640b58..d76dd37bc 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -89,6 +89,7 @@ retry = True ### AUTH ### api_key: Optional[str] = None openai_key: Optional[str] = None +groq_key: Optional[str] = None databricks_key: Optional[str] = None azure_key: Optional[str] = None anthropic_key: Optional[str] = None @@ -892,7 +893,11 @@ ALL_LITELLM_RESPONSE_TYPES = [ from .types.utils import ImageObject from .llms.custom_llm import CustomLLM from .llms.huggingface_restapi import HuggingfaceConfig -from .llms.anthropic.chat import AnthropicConfig +from .llms.anthropic.chat.handler import AnthropicConfig +from .llms.anthropic.experimental_pass_through.transformation import ( + AnthropicExperimentalPassThroughConfig, +) +from .llms.groq.stt.transformation import GroqSTTConfig from .llms.anthropic.completion import AnthropicTextConfig from .llms.databricks.chat import DatabricksConfig, DatabricksEmbeddingConfig from .llms.predibase import PredibaseConfig @@ -962,8 +967,8 @@ from .llms.OpenAI.openai import ( OpenAITextCompletionConfig, MistralEmbeddingConfig, DeepInfraConfig, - GroqConfig, ) +from .llms.groq.chat.transformation import GroqChatConfig from .llms.azure_ai.chat.transformation import AzureAIStudioConfig from .llms.mistral.mistral_chat_transformation import MistralConfig from .llms.OpenAI.chat.o1_transformation import ( diff --git a/litellm/adapters/anthropic_adapter.py b/litellm/adapters/anthropic_adapter.py index 1bff003be..47fba3630 100644 --- a/litellm/adapters/anthropic_adapter.py +++ b/litellm/adapters/anthropic_adapter.py @@ -34,7 +34,7 @@ class AnthropicAdapter(CustomLogger): """ request_body = AnthropicMessagesRequest(**kwargs) # type: ignore - translated_body = litellm.AnthropicConfig().translate_anthropic_to_openai( + translated_body = litellm.AnthropicExperimentalPassThroughConfig().translate_anthropic_to_openai( anthropic_message_request=request_body ) @@ -44,7 +44,7 @@ class AnthropicAdapter(CustomLogger): self, response: litellm.ModelResponse ) -> Optional[AnthropicResponse]: - return litellm.AnthropicConfig().translate_openai_response_to_anthropic( + return litellm.AnthropicExperimentalPassThroughConfig().translate_openai_response_to_anthropic( response=response ) @@ -99,7 +99,7 @@ class AnthropicStreamWrapper(AdapterCompletionStreamWrapper): if chunk == "None" or chunk is None: raise Exception - processed_chunk = litellm.AnthropicConfig().translate_streaming_openai_response_to_anthropic( + processed_chunk = litellm.AnthropicExperimentalPassThroughConfig().translate_streaming_openai_response_to_anthropic( response=chunk ) if ( @@ -163,7 +163,7 @@ class AnthropicStreamWrapper(AdapterCompletionStreamWrapper): async for chunk in self.completion_stream: if chunk == "None" or chunk is None: raise Exception - processed_chunk = litellm.AnthropicConfig().translate_streaming_openai_response_to_anthropic( + processed_chunk = litellm.AnthropicExperimentalPassThroughConfig().translate_streaming_openai_response_to_anthropic( response=chunk ) if ( diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py index d2343d429..748c904a3 100644 --- a/litellm/integrations/langfuse.py +++ b/litellm/integrations/langfuse.py @@ -601,7 +601,7 @@ class LangFuseLogger: "input": input if not mask_input else "redacted-by-litellm", "output": output if not mask_output else "redacted-by-litellm", "usage": usage, - "metadata": clean_metadata, + "metadata": log_requester_metadata(clean_metadata), "level": level, "version": clean_metadata.pop("version", None), } @@ -768,3 +768,15 @@ def log_provider_specific_information_as_span( name="vertex_ai_grounding_metadata", input=vertex_ai_grounding_metadata, ) + + +def log_requester_metadata(clean_metadata: dict): + returned_metadata = {} + requester_metadata = clean_metadata.get("requester_metadata") or {} + for k, v in clean_metadata.items(): + if k not in requester_metadata: + returned_metadata[k] = v + + returned_metadata.update({"requester_metadata": requester_metadata}) + + return returned_metadata diff --git a/litellm/litellm_core_utils/exception_mapping_utils.py b/litellm/litellm_core_utils/exception_mapping_utils.py index 5ac26c7ae..b1b378f43 100644 --- a/litellm/litellm_core_utils/exception_mapping_utils.py +++ b/litellm/litellm_core_utils/exception_mapping_utils.py @@ -1,6 +1,32 @@ import json +import os +import threading +import traceback from typing import Optional +import httpx + +import litellm +from litellm import verbose_logger + +from ..exceptions import ( + APIConnectionError, + APIError, + AuthenticationError, + BadRequestError, + BudgetExceededError, + ContentPolicyViolationError, + ContextWindowExceededError, + NotFoundError, + OpenAIError, + PermissionDeniedError, + RateLimitError, + ServiceUnavailableError, + Timeout, + UnprocessableEntityError, + UnsupportedParamsError, +) + def get_error_message(error_obj) -> Optional[str]: """ @@ -38,3 +64,2015 @@ def get_error_message(error_obj) -> Optional[str]: return None except Exception as e: return None + + +####### EXCEPTION MAPPING ################ +def _get_litellm_response_headers( + original_exception: Exception, +) -> Optional[httpx.Headers]: + """ + Extract and return the response headers from a mapped exception, if present. + + Used for accurate retry logic. + """ + _response_headers: Optional[httpx.Headers] = None + try: + _response_headers = getattr( + original_exception, "litellm_response_headers", None + ) + except Exception: + return None + + return _response_headers + + +def _get_response_headers(original_exception: Exception) -> Optional[httpx.Headers]: + """ + Extract and return the response headers from an exception, if present. + + Used for accurate retry logic. + """ + _response_headers: Optional[httpx.Headers] = None + try: + _response_headers = getattr(original_exception, "headers", None) + error_response = getattr(original_exception, "response", None) + if _response_headers is None and error_response: + _response_headers = getattr(error_response, "headers", None) + except Exception: + return None + + return _response_headers + + +def exception_type( # type: ignore + model, + original_exception, + custom_llm_provider, + completion_kwargs={}, + extra_kwargs={}, +): + + if any( + isinstance(original_exception, exc_type) + for exc_type in litellm.LITELLM_EXCEPTION_TYPES + ): + return original_exception + exception_mapping_worked = False + exception_provider = custom_llm_provider + if litellm.suppress_debug_info is False: + print() # noqa + print( # noqa + "\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m" # noqa + ) # noqa + print( # noqa + "LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'." # noqa + ) # noqa + print() # noqa + + litellm_response_headers = _get_response_headers( + original_exception=original_exception + ) + try: + if model: + if hasattr(original_exception, "message"): + error_str = str(original_exception.message) + else: + error_str = str(original_exception) + if isinstance(original_exception, BaseException): + exception_type = type(original_exception).__name__ + else: + exception_type = "" + + ################################################################################ + # Common Extra information needed for all providers + # We pass num retries, api_base, vertex_deployment etc to the exception here + ################################################################################ + extra_information = "" + try: + _api_base = litellm.get_api_base( + model=model, optional_params=extra_kwargs + ) + messages = litellm.get_first_chars_messages(kwargs=completion_kwargs) + _vertex_project = extra_kwargs.get("vertex_project") + _vertex_location = extra_kwargs.get("vertex_location") + _metadata = extra_kwargs.get("metadata", {}) or {} + _model_group = _metadata.get("model_group") + _deployment = _metadata.get("deployment") + extra_information = f"\nModel: {model}" + + if ( + isinstance(custom_llm_provider, str) + and len(custom_llm_provider) > 0 + ): + exception_provider = ( + custom_llm_provider[0].upper() + + custom_llm_provider[1:] + + "Exception" + ) + + if _api_base: + extra_information += f"\nAPI Base: `{_api_base}`" + if ( + messages + and len(messages) > 0 + and litellm.redact_messages_in_exceptions is False + ): + extra_information += f"\nMessages: `{messages}`" + + if _model_group is not None: + extra_information += f"\nmodel_group: `{_model_group}`\n" + if _deployment is not None: + extra_information += f"\ndeployment: `{_deployment}`\n" + if _vertex_project is not None: + extra_information += f"\nvertex_project: `{_vertex_project}`\n" + if _vertex_location is not None: + extra_information += f"\nvertex_location: `{_vertex_location}`\n" + + # on litellm proxy add key name + team to exceptions + extra_information = _add_key_name_and_team_to_alert( + request_info=extra_information, metadata=_metadata + ) + except Exception: + # DO NOT LET this Block raising the original exception + pass + + ################################################################################ + # End of Common Extra information Needed for all providers + ################################################################################ + + ################################################################################ + #################### Start of Provider Exception mapping #################### + ################################################################################ + + if "Request Timeout Error" in error_str or "Request timed out" in error_str: + exception_mapping_worked = True + raise Timeout( + message=f"APITimeoutError - Request timed out. \nerror_str: {error_str}", + model=model, + llm_provider=custom_llm_provider, + litellm_debug_info=extra_information, + ) + + if ( + custom_llm_provider == "openai" + or custom_llm_provider == "text-completion-openai" + or custom_llm_provider == "custom_openai" + or custom_llm_provider in litellm.openai_compatible_providers + ): + # custom_llm_provider is openai, make it OpenAI + message = get_error_message(error_obj=original_exception) + if message is None: + if hasattr(original_exception, "message"): + message = original_exception.message + else: + message = str(original_exception) + + if message is not None and isinstance( + message, str + ): # done to prevent user-confusion. Relevant issue - https://github.com/BerriAI/litellm/issues/1414 + message = message.replace("OPENAI", custom_llm_provider.upper()) + message = message.replace( + "openai.OpenAIError", + "{}.{}Error".format(custom_llm_provider, custom_llm_provider), + ) + if custom_llm_provider == "openai": + exception_provider = "OpenAI" + "Exception" + else: + exception_provider = ( + custom_llm_provider[0].upper() + + custom_llm_provider[1:] + + "Exception" + ) + + if ( + "This model's maximum context length is" in error_str + or "string too long. Expected a string with maximum length" + in error_str + ): + exception_mapping_worked = True + raise ContextWindowExceededError( + message=f"ContextWindowExceededError: {exception_provider} - {message}", + llm_provider=custom_llm_provider, + model=model, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif ( + "invalid_request_error" in error_str + and "model_not_found" in error_str + ): + exception_mapping_worked = True + raise NotFoundError( + message=f"{exception_provider} - {message}", + llm_provider=custom_llm_provider, + model=model, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif "A timeout occurred" in error_str: + exception_mapping_worked = True + raise Timeout( + message=f"{exception_provider} - {message}", + model=model, + llm_provider=custom_llm_provider, + litellm_debug_info=extra_information, + ) + elif ( + "invalid_request_error" in error_str + and "content_policy_violation" in error_str + ): + exception_mapping_worked = True + raise ContentPolicyViolationError( + message=f"ContentPolicyViolationError: {exception_provider} - {message}", + llm_provider=custom_llm_provider, + model=model, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif ( + "invalid_request_error" in error_str + and "Incorrect API key provided" not in error_str + ): + exception_mapping_worked = True + raise BadRequestError( + message=f"{exception_provider} - {message}", + llm_provider=custom_llm_provider, + model=model, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif "Web server is returning an unknown error" in error_str: + exception_mapping_worked = True + raise litellm.InternalServerError( + message=f"{exception_provider} - {message}", + model=model, + llm_provider=custom_llm_provider, + ) + elif "Request too large" in error_str: + exception_mapping_worked = True + raise RateLimitError( + message=f"RateLimitError: {exception_provider} - {message}", + model=model, + llm_provider=custom_llm_provider, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif ( + "The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable" + in error_str + ): + exception_mapping_worked = True + raise AuthenticationError( + message=f"AuthenticationError: {exception_provider} - {message}", + llm_provider=custom_llm_provider, + model=model, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif "Mistral API raised a streaming error" in error_str: + exception_mapping_worked = True + _request = httpx.Request( + method="POST", url="https://api.openai.com/v1" + ) + raise APIError( + status_code=500, + message=f"{exception_provider} - {message}", + llm_provider=custom_llm_provider, + model=model, + request=_request, + litellm_debug_info=extra_information, + ) + elif hasattr(original_exception, "status_code"): + exception_mapping_worked = True + if original_exception.status_code == 400: + exception_mapping_worked = True + raise BadRequestError( + message=f"{exception_provider} - {message}", + llm_provider=custom_llm_provider, + model=model, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 401: + exception_mapping_worked = True + raise AuthenticationError( + message=f"AuthenticationError: {exception_provider} - {message}", + llm_provider=custom_llm_provider, + model=model, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 404: + exception_mapping_worked = True + raise NotFoundError( + message=f"NotFoundError: {exception_provider} - {message}", + model=model, + llm_provider=custom_llm_provider, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 408: + exception_mapping_worked = True + raise Timeout( + message=f"Timeout Error: {exception_provider} - {message}", + model=model, + llm_provider=custom_llm_provider, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 422: + exception_mapping_worked = True + raise BadRequestError( + message=f"{exception_provider} - {message}", + model=model, + llm_provider=custom_llm_provider, + response=getattr(original_exception, "response", None), + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 429: + exception_mapping_worked = True + raise RateLimitError( + message=f"RateLimitError: {exception_provider} - {message}", + model=model, + llm_provider=custom_llm_provider, + response=getattr(original_exception, "response", None), + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 503: + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"ServiceUnavailableError: {exception_provider} - {message}", + model=model, + llm_provider=custom_llm_provider, + response=getattr(original_exception, "response", None), + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 504: # gateway timeout error + exception_mapping_worked = True + raise Timeout( + message=f"Timeout Error: {exception_provider} - {message}", + model=model, + llm_provider=custom_llm_provider, + litellm_debug_info=extra_information, + ) + else: + exception_mapping_worked = True + raise APIError( + status_code=original_exception.status_code, + message=f"APIError: {exception_provider} - {message}", + llm_provider=custom_llm_provider, + model=model, + request=getattr(original_exception, "request", None), + litellm_debug_info=extra_information, + ) + else: + # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors + # exception_mapping_worked = True + raise APIConnectionError( + message=f"APIConnectionError: {exception_provider} - {message}", + llm_provider=custom_llm_provider, + model=model, + litellm_debug_info=extra_information, + request=httpx.Request( + method="POST", url="https://api.openai.com/v1/" + ), + ) + elif custom_llm_provider == "anthropic": # one of the anthropics + if "prompt is too long" in error_str or "prompt: length" in error_str: + exception_mapping_worked = True + raise ContextWindowExceededError( + message="AnthropicError - {}".format(error_str), + model=model, + llm_provider="anthropic", + ) + if "Invalid API Key" in error_str: + exception_mapping_worked = True + raise AuthenticationError( + message="AnthropicError - {}".format(error_str), + model=model, + llm_provider="anthropic", + ) + if "content filtering policy" in error_str: + exception_mapping_worked = True + raise ContentPolicyViolationError( + message="AnthropicError - {}".format(error_str), + model=model, + llm_provider="anthropic", + ) + if "Client error '400 Bad Request'" in error_str: + exception_mapping_worked = True + raise BadRequestError( + message="AnthropicError - {}".format(error_str), + model=model, + llm_provider="anthropic", + ) + if hasattr(original_exception, "status_code"): + verbose_logger.debug( + f"status_code: {original_exception.status_code}" + ) + if original_exception.status_code == 401: + exception_mapping_worked = True + raise AuthenticationError( + message=f"AnthropicException - {error_str}", + llm_provider="anthropic", + model=model, + ) + elif ( + original_exception.status_code == 400 + or original_exception.status_code == 413 + ): + exception_mapping_worked = True + raise BadRequestError( + message=f"AnthropicException - {error_str}", + model=model, + llm_provider="anthropic", + ) + elif original_exception.status_code == 404: + exception_mapping_worked = True + raise NotFoundError( + message=f"AnthropicException - {error_str}", + model=model, + llm_provider="anthropic", + ) + elif original_exception.status_code == 408: + exception_mapping_worked = True + raise Timeout( + message=f"AnthropicException - {error_str}", + model=model, + llm_provider="anthropic", + ) + elif original_exception.status_code == 429: + exception_mapping_worked = True + raise RateLimitError( + message=f"AnthropicException - {error_str}", + llm_provider="anthropic", + model=model, + ) + elif ( + original_exception.status_code == 500 + or original_exception.status_code == 529 + ): + exception_mapping_worked = True + raise litellm.InternalServerError( + message=f"AnthropicException - {error_str}. Handle with `litellm.InternalServerError`.", + llm_provider="anthropic", + model=model, + ) + elif original_exception.status_code == 503: + exception_mapping_worked = True + raise litellm.ServiceUnavailableError( + message=f"AnthropicException - {error_str}. Handle with `litellm.ServiceUnavailableError`.", + llm_provider="anthropic", + model=model, + ) + elif custom_llm_provider == "replicate": + if "Incorrect authentication token" in error_str: + exception_mapping_worked = True + raise AuthenticationError( + message=f"ReplicateException - {error_str}", + llm_provider="replicate", + model=model, + response=original_exception.response, + ) + elif "input is too long" in error_str: + exception_mapping_worked = True + raise ContextWindowExceededError( + message=f"ReplicateException - {error_str}", + model=model, + llm_provider="replicate", + response=original_exception.response, + ) + elif exception_type == "ModelError": + exception_mapping_worked = True + raise BadRequestError( + message=f"ReplicateException - {error_str}", + model=model, + llm_provider="replicate", + response=original_exception.response, + ) + elif "Request was throttled" in error_str: + exception_mapping_worked = True + raise RateLimitError( + message=f"ReplicateException - {error_str}", + llm_provider="replicate", + model=model, + response=original_exception.response, + ) + elif hasattr(original_exception, "status_code"): + if original_exception.status_code == 401: + exception_mapping_worked = True + raise AuthenticationError( + message=f"ReplicateException - {original_exception.message}", + llm_provider="replicate", + model=model, + response=original_exception.response, + ) + elif ( + original_exception.status_code == 400 + or original_exception.status_code == 413 + ): + exception_mapping_worked = True + raise BadRequestError( + message=f"ReplicateException - {original_exception.message}", + model=model, + llm_provider="replicate", + response=original_exception.response, + ) + elif original_exception.status_code == 422: + exception_mapping_worked = True + raise UnprocessableEntityError( + message=f"ReplicateException - {original_exception.message}", + model=model, + llm_provider="replicate", + response=original_exception.response, + ) + elif original_exception.status_code == 408: + exception_mapping_worked = True + raise Timeout( + message=f"ReplicateException - {original_exception.message}", + model=model, + llm_provider="replicate", + ) + elif original_exception.status_code == 422: + exception_mapping_worked = True + raise UnprocessableEntityError( + message=f"ReplicateException - {original_exception.message}", + llm_provider="replicate", + model=model, + response=original_exception.response, + ) + elif original_exception.status_code == 429: + exception_mapping_worked = True + raise RateLimitError( + message=f"ReplicateException - {original_exception.message}", + llm_provider="replicate", + model=model, + response=original_exception.response, + ) + elif original_exception.status_code == 500: + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"ReplicateException - {original_exception.message}", + llm_provider="replicate", + model=model, + response=original_exception.response, + ) + exception_mapping_worked = True + raise APIError( + status_code=500, + message=f"ReplicateException - {str(original_exception)}", + llm_provider="replicate", + model=model, + request=httpx.Request( + method="POST", + url="https://api.replicate.com/v1/deployments", + ), + ) + elif custom_llm_provider == "watsonx": + if "token_quota_reached" in error_str: + exception_mapping_worked = True + raise RateLimitError( + message=f"WatsonxException: Rate Limit Errror - {error_str}", + llm_provider="watsonx", + model=model, + response=original_exception.response, + ) + elif ( + custom_llm_provider == "predibase" + or custom_llm_provider == "databricks" + ): + if "authorization denied for" in error_str: + exception_mapping_worked = True + + # Predibase returns the raw API Key in the response - this block ensures it's not returned in the exception + if ( + error_str is not None + and isinstance(error_str, str) + and "bearer" in error_str.lower() + ): + # only keep the first 10 chars after the occurnence of "bearer" + _bearer_token_start_index = error_str.lower().find("bearer") + error_str = error_str[: _bearer_token_start_index + 14] + error_str += "XXXXXXX" + '"' + + raise AuthenticationError( + message=f"{custom_llm_provider}Exception: Authentication Error - {error_str}", + llm_provider=custom_llm_provider, + model=model, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif hasattr(original_exception, "status_code"): + if original_exception.status_code == 500: + exception_mapping_worked = True + raise litellm.InternalServerError( + message=f"{custom_llm_provider}Exception - {original_exception.message}", + llm_provider=custom_llm_provider, + model=model, + ) + elif ( + original_exception.status_code == 401 + or original_exception.status_code == 403 + ): + exception_mapping_worked = True + raise AuthenticationError( + message=f"{custom_llm_provider}Exception - {original_exception.message}", + llm_provider=custom_llm_provider, + model=model, + ) + elif original_exception.status_code == 400: + exception_mapping_worked = True + raise BadRequestError( + message=f"{custom_llm_provider}Exception - {original_exception.message}", + llm_provider=custom_llm_provider, + model=model, + ) + elif original_exception.status_code == 404: + exception_mapping_worked = True + raise NotFoundError( + message=f"{custom_llm_provider}Exception - {original_exception.message}", + llm_provider=custom_llm_provider, + model=model, + ) + elif original_exception.status_code == 408: + exception_mapping_worked = True + raise Timeout( + message=f"{custom_llm_provider}Exception - {original_exception.message}", + model=model, + llm_provider=custom_llm_provider, + litellm_debug_info=extra_information, + ) + elif ( + original_exception.status_code == 422 + or original_exception.status_code == 424 + ): + exception_mapping_worked = True + raise BadRequestError( + message=f"{custom_llm_provider}Exception - {original_exception.message}", + model=model, + llm_provider=custom_llm_provider, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 429: + exception_mapping_worked = True + raise RateLimitError( + message=f"{custom_llm_provider}Exception - {original_exception.message}", + model=model, + llm_provider=custom_llm_provider, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 503: + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"{custom_llm_provider}Exception - {original_exception.message}", + model=model, + llm_provider=custom_llm_provider, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 504: # gateway timeout error + exception_mapping_worked = True + raise Timeout( + message=f"{custom_llm_provider}Exception - {original_exception.message}", + model=model, + llm_provider=custom_llm_provider, + litellm_debug_info=extra_information, + ) + elif custom_llm_provider == "bedrock": + if ( + "too many tokens" in error_str + or "expected maxLength:" in error_str + or "Input is too long" in error_str + or "prompt: length: 1.." in error_str + or "Too many input tokens" in error_str + ): + exception_mapping_worked = True + raise ContextWindowExceededError( + message=f"BedrockException: Context Window Error - {error_str}", + model=model, + llm_provider="bedrock", + ) + elif "Malformed input request" in error_str: + exception_mapping_worked = True + raise BadRequestError( + message=f"BedrockException - {error_str}", + model=model, + llm_provider="bedrock", + response=original_exception.response, + ) + elif "A conversation must start with a user message." in error_str: + exception_mapping_worked = True + raise BadRequestError( + message=f"BedrockException - {error_str}\n. Pass in default user message via `completion(..,user_continue_message=)` or enable `litellm.modify_params=True`.\nFor Proxy: do via `litellm_settings::modify_params: True` or user_continue_message under `litellm_params`", + model=model, + llm_provider="bedrock", + response=original_exception.response, + ) + elif ( + "Unable to locate credentials" in error_str + or "The security token included in the request is invalid" + in error_str + ): + exception_mapping_worked = True + raise AuthenticationError( + message=f"BedrockException Invalid Authentication - {error_str}", + model=model, + llm_provider="bedrock", + response=original_exception.response, + ) + elif "AccessDeniedException" in error_str: + exception_mapping_worked = True + raise PermissionDeniedError( + message=f"BedrockException PermissionDeniedError - {error_str}", + model=model, + llm_provider="bedrock", + response=original_exception.response, + ) + elif ( + "throttlingException" in error_str + or "ThrottlingException" in error_str + ): + exception_mapping_worked = True + raise RateLimitError( + message=f"BedrockException: Rate Limit Error - {error_str}", + model=model, + llm_provider="bedrock", + response=original_exception.response, + ) + elif ( + "Connect timeout on endpoint URL" in error_str + or "timed out" in error_str + ): + exception_mapping_worked = True + raise Timeout( + message=f"BedrockException: Timeout Error - {error_str}", + model=model, + llm_provider="bedrock", + ) + elif "Could not process image" in error_str: + exception_mapping_worked = True + raise litellm.InternalServerError( + message=f"BedrockException - {error_str}", + model=model, + llm_provider="bedrock", + ) + elif hasattr(original_exception, "status_code"): + if original_exception.status_code == 500: + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"BedrockException - {original_exception.message}", + llm_provider="bedrock", + model=model, + response=httpx.Response( + status_code=500, + request=httpx.Request( + method="POST", url="https://api.openai.com/v1/" + ), + ), + ) + elif original_exception.status_code == 401: + exception_mapping_worked = True + raise AuthenticationError( + message=f"BedrockException - {original_exception.message}", + llm_provider="bedrock", + model=model, + response=original_exception.response, + ) + elif original_exception.status_code == 400: + exception_mapping_worked = True + raise BadRequestError( + message=f"BedrockException - {original_exception.message}", + llm_provider="bedrock", + model=model, + response=original_exception.response, + ) + elif original_exception.status_code == 404: + exception_mapping_worked = True + raise NotFoundError( + message=f"BedrockException - {original_exception.message}", + llm_provider="bedrock", + model=model, + response=original_exception.response, + ) + elif original_exception.status_code == 408: + exception_mapping_worked = True + raise Timeout( + message=f"BedrockException - {original_exception.message}", + model=model, + llm_provider=custom_llm_provider, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 422: + exception_mapping_worked = True + raise BadRequestError( + message=f"BedrockException - {original_exception.message}", + model=model, + llm_provider=custom_llm_provider, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 429: + exception_mapping_worked = True + raise RateLimitError( + message=f"BedrockException - {original_exception.message}", + model=model, + llm_provider=custom_llm_provider, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 503: + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"BedrockException - {original_exception.message}", + model=model, + llm_provider=custom_llm_provider, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 504: # gateway timeout error + exception_mapping_worked = True + raise Timeout( + message=f"BedrockException - {original_exception.message}", + model=model, + llm_provider=custom_llm_provider, + litellm_debug_info=extra_information, + ) + elif custom_llm_provider == "sagemaker": + if "Unable to locate credentials" in error_str: + exception_mapping_worked = True + raise BadRequestError( + message=f"litellm.BadRequestError: SagemakerException - {error_str}", + model=model, + llm_provider="sagemaker", + response=original_exception.response, + ) + elif ( + "Input validation error: `best_of` must be > 0 and <= 2" + in error_str + ): + exception_mapping_worked = True + raise BadRequestError( + message=f"SagemakerException - the value of 'n' must be > 0 and <= 2 for sagemaker endpoints", + model=model, + llm_provider="sagemaker", + response=original_exception.response, + ) + elif ( + "`inputs` tokens + `max_new_tokens` must be <=" in error_str + or "instance type with more CPU capacity or memory" in error_str + ): + exception_mapping_worked = True + raise ContextWindowExceededError( + message=f"SagemakerException - {error_str}", + model=model, + llm_provider="sagemaker", + response=original_exception.response, + ) + elif ( + custom_llm_provider == "vertex_ai" + or custom_llm_provider == "vertex_ai_beta" + or custom_llm_provider == "gemini" + ): + if ( + "Vertex AI API has not been used in project" in error_str + or "Unable to find your project" in error_str + ): + exception_mapping_worked = True + raise BadRequestError( + message=f"litellm.BadRequestError: VertexAIException - {error_str}", + model=model, + llm_provider="vertex_ai", + response=httpx.Response( + status_code=400, + request=httpx.Request( + method="POST", + url=" https://cloud.google.com/vertex-ai/", + ), + ), + litellm_debug_info=extra_information, + ) + if "400 Request payload size exceeds" in error_str: + exception_mapping_worked = True + raise ContextWindowExceededError( + message=f"VertexException - {error_str}", + model=model, + llm_provider=custom_llm_provider, + ) + elif ( + "None Unknown Error." in error_str + or "Content has no parts." in error_str + ): + exception_mapping_worked = True + raise litellm.InternalServerError( + message=f"litellm.InternalServerError: VertexAIException - {error_str}", + model=model, + llm_provider="vertex_ai", + response=httpx.Response( + status_code=500, + content=str(original_exception), + request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore + ), + litellm_debug_info=extra_information, + ) + elif "API key not valid." in error_str: + exception_mapping_worked = True + raise AuthenticationError( + message=f"{custom_llm_provider}Exception - {error_str}", + model=model, + llm_provider=custom_llm_provider, + litellm_debug_info=extra_information, + ) + elif "403" in error_str: + exception_mapping_worked = True + raise BadRequestError( + message=f"VertexAIException BadRequestError - {error_str}", + model=model, + llm_provider="vertex_ai", + response=httpx.Response( + status_code=403, + request=httpx.Request( + method="POST", + url=" https://cloud.google.com/vertex-ai/", + ), + ), + litellm_debug_info=extra_information, + ) + elif ( + "The response was blocked." in error_str + or "Output blocked by content filtering policy" + in error_str # anthropic on vertex ai + ): + exception_mapping_worked = True + raise ContentPolicyViolationError( + message=f"VertexAIException ContentPolicyViolationError - {error_str}", + model=model, + llm_provider="vertex_ai", + litellm_debug_info=extra_information, + response=httpx.Response( + status_code=400, + request=httpx.Request( + method="POST", + url=" https://cloud.google.com/vertex-ai/", + ), + ), + ) + elif ( + "429 Quota exceeded" in error_str + or "Quota exceeded for" in error_str + or "IndexError: list index out of range" in error_str + or "429 Unable to submit request because the service is temporarily out of capacity." + in error_str + ): + exception_mapping_worked = True + raise RateLimitError( + message=f"litellm.RateLimitError: VertexAIException - {error_str}", + model=model, + llm_provider="vertex_ai", + litellm_debug_info=extra_information, + response=httpx.Response( + status_code=429, + request=httpx.Request( + method="POST", + url=" https://cloud.google.com/vertex-ai/", + ), + ), + ) + elif "500 Internal Server Error" in error_str: + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"litellm.ServiceUnavailableError: VertexAIException - {error_str}", + model=model, + llm_provider="vertex_ai", + litellm_debug_info=extra_information, + ) + if hasattr(original_exception, "status_code"): + if original_exception.status_code == 400: + exception_mapping_worked = True + raise BadRequestError( + message=f"VertexAIException BadRequestError - {error_str}", + model=model, + llm_provider="vertex_ai", + litellm_debug_info=extra_information, + response=httpx.Response( + status_code=400, + request=httpx.Request( + method="POST", + url="https://cloud.google.com/vertex-ai/", + ), + ), + ) + if original_exception.status_code == 401: + exception_mapping_worked = True + raise AuthenticationError( + message=f"VertexAIException - {original_exception.message}", + llm_provider=custom_llm_provider, + model=model, + ) + if original_exception.status_code == 404: + exception_mapping_worked = True + raise NotFoundError( + message=f"VertexAIException - {original_exception.message}", + llm_provider=custom_llm_provider, + model=model, + ) + if original_exception.status_code == 408: + exception_mapping_worked = True + raise Timeout( + message=f"VertexAIException - {original_exception.message}", + llm_provider=custom_llm_provider, + model=model, + ) + + if original_exception.status_code == 429: + exception_mapping_worked = True + raise RateLimitError( + message=f"litellm.RateLimitError: VertexAIException - {error_str}", + model=model, + llm_provider="vertex_ai", + litellm_debug_info=extra_information, + response=httpx.Response( + status_code=429, + request=httpx.Request( + method="POST", + url=" https://cloud.google.com/vertex-ai/", + ), + ), + ) + if original_exception.status_code == 500: + exception_mapping_worked = True + raise litellm.InternalServerError( + message=f"VertexAIException InternalServerError - {error_str}", + model=model, + llm_provider="vertex_ai", + litellm_debug_info=extra_information, + response=httpx.Response( + status_code=500, + content=str(original_exception), + request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore + ), + ) + if original_exception.status_code == 503: + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"VertexAIException - {original_exception.message}", + llm_provider=custom_llm_provider, + model=model, + ) + elif custom_llm_provider == "palm" or custom_llm_provider == "gemini": + if "503 Getting metadata" in error_str: + # auth errors look like this + # 503 Getting metadata from plugin failed with error: Reauthentication is needed. Please run `gcloud auth application-default login` to reauthenticate. + exception_mapping_worked = True + raise BadRequestError( + message=f"GeminiException - Invalid api key", + model=model, + llm_provider="palm", + response=original_exception.response, + ) + if ( + "504 Deadline expired before operation could complete." in error_str + or "504 Deadline Exceeded" in error_str + ): + exception_mapping_worked = True + raise Timeout( + message=f"GeminiException - {original_exception.message}", + model=model, + llm_provider="palm", + ) + if "400 Request payload size exceeds" in error_str: + exception_mapping_worked = True + raise ContextWindowExceededError( + message=f"GeminiException - {error_str}", + model=model, + llm_provider="palm", + response=original_exception.response, + ) + if ( + "500 An internal error has occurred." in error_str + or "list index out of range" in error_str + ): + exception_mapping_worked = True + raise APIError( + status_code=getattr(original_exception, "status_code", 500), + message=f"GeminiException - {original_exception.message}", + llm_provider="palm", + model=model, + request=httpx.Response( + status_code=429, + request=httpx.Request( + method="POST", + url=" https://cloud.google.com/vertex-ai/", + ), + ), + ) + if hasattr(original_exception, "status_code"): + if original_exception.status_code == 400: + exception_mapping_worked = True + raise BadRequestError( + message=f"GeminiException - {error_str}", + model=model, + llm_provider="palm", + response=original_exception.response, + ) + # Dailed: Error occurred: 400 Request payload size exceeds the limit: 20000 bytes + elif custom_llm_provider == "cloudflare": + if "Authentication error" in error_str: + exception_mapping_worked = True + raise AuthenticationError( + message=f"Cloudflare Exception - {original_exception.message}", + llm_provider="cloudflare", + model=model, + response=original_exception.response, + ) + if "must have required property" in error_str: + exception_mapping_worked = True + raise BadRequestError( + message=f"Cloudflare Exception - {original_exception.message}", + llm_provider="cloudflare", + model=model, + response=original_exception.response, + ) + elif ( + custom_llm_provider == "cohere" or custom_llm_provider == "cohere_chat" + ): # Cohere + if ( + "invalid api token" in error_str + or "No API key provided." in error_str + ): + exception_mapping_worked = True + raise AuthenticationError( + message=f"CohereException - {original_exception.message}", + llm_provider="cohere", + model=model, + response=original_exception.response, + ) + elif "too many tokens" in error_str: + exception_mapping_worked = True + raise ContextWindowExceededError( + message=f"CohereException - {original_exception.message}", + model=model, + llm_provider="cohere", + response=original_exception.response, + ) + elif hasattr(original_exception, "status_code"): + if ( + original_exception.status_code == 400 + or original_exception.status_code == 498 + ): + exception_mapping_worked = True + raise BadRequestError( + message=f"CohereException - {original_exception.message}", + llm_provider="cohere", + model=model, + response=original_exception.response, + ) + elif original_exception.status_code == 408: + exception_mapping_worked = True + raise Timeout( + message=f"CohereException - {original_exception.message}", + llm_provider="cohere", + model=model, + ) + elif original_exception.status_code == 500: + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"CohereException - {original_exception.message}", + llm_provider="cohere", + model=model, + response=original_exception.response, + ) + elif ( + "CohereConnectionError" in exception_type + ): # cohere seems to fire these errors when we load test it (1k+ messages / min) + exception_mapping_worked = True + raise RateLimitError( + message=f"CohereException - {original_exception.message}", + llm_provider="cohere", + model=model, + response=original_exception.response, + ) + elif "invalid type:" in error_str: + exception_mapping_worked = True + raise BadRequestError( + message=f"CohereException - {original_exception.message}", + llm_provider="cohere", + model=model, + response=original_exception.response, + ) + elif "Unexpected server error" in error_str: + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"CohereException - {original_exception.message}", + llm_provider="cohere", + model=model, + response=original_exception.response, + ) + else: + if hasattr(original_exception, "status_code"): + exception_mapping_worked = True + raise APIError( + status_code=original_exception.status_code, + message=f"CohereException - {original_exception.message}", + llm_provider="cohere", + model=model, + request=original_exception.request, + ) + raise original_exception + elif custom_llm_provider == "huggingface": + if "length limit exceeded" in error_str: + exception_mapping_worked = True + raise ContextWindowExceededError( + message=error_str, + model=model, + llm_provider="huggingface", + response=original_exception.response, + ) + elif "A valid user token is required" in error_str: + exception_mapping_worked = True + raise BadRequestError( + message=error_str, + llm_provider="huggingface", + model=model, + response=original_exception.response, + ) + elif "Rate limit reached" in error_str: + exception_mapping_worked = True + raise RateLimitError( + message=error_str, + llm_provider="huggingface", + model=model, + response=original_exception.response, + ) + if hasattr(original_exception, "status_code"): + if original_exception.status_code == 401: + exception_mapping_worked = True + raise AuthenticationError( + message=f"HuggingfaceException - {original_exception.message}", + llm_provider="huggingface", + model=model, + response=original_exception.response, + ) + elif original_exception.status_code == 400: + exception_mapping_worked = True + raise BadRequestError( + message=f"HuggingfaceException - {original_exception.message}", + model=model, + llm_provider="huggingface", + response=original_exception.response, + ) + elif original_exception.status_code == 408: + exception_mapping_worked = True + raise Timeout( + message=f"HuggingfaceException - {original_exception.message}", + model=model, + llm_provider="huggingface", + ) + elif original_exception.status_code == 429: + exception_mapping_worked = True + raise RateLimitError( + message=f"HuggingfaceException - {original_exception.message}", + llm_provider="huggingface", + model=model, + response=original_exception.response, + ) + elif original_exception.status_code == 503: + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"HuggingfaceException - {original_exception.message}", + llm_provider="huggingface", + model=model, + response=original_exception.response, + ) + else: + exception_mapping_worked = True + raise APIError( + status_code=original_exception.status_code, + message=f"HuggingfaceException - {original_exception.message}", + llm_provider="huggingface", + model=model, + request=original_exception.request, + ) + elif custom_llm_provider == "ai21": + if hasattr(original_exception, "message"): + if "Prompt has too many tokens" in original_exception.message: + exception_mapping_worked = True + raise ContextWindowExceededError( + message=f"AI21Exception - {original_exception.message}", + model=model, + llm_provider="ai21", + response=original_exception.response, + ) + if "Bad or missing API token." in original_exception.message: + exception_mapping_worked = True + raise BadRequestError( + message=f"AI21Exception - {original_exception.message}", + model=model, + llm_provider="ai21", + response=original_exception.response, + ) + if hasattr(original_exception, "status_code"): + if original_exception.status_code == 401: + exception_mapping_worked = True + raise AuthenticationError( + message=f"AI21Exception - {original_exception.message}", + llm_provider="ai21", + model=model, + response=original_exception.response, + ) + elif original_exception.status_code == 408: + exception_mapping_worked = True + raise Timeout( + message=f"AI21Exception - {original_exception.message}", + model=model, + llm_provider="ai21", + ) + if original_exception.status_code == 422: + exception_mapping_worked = True + raise BadRequestError( + message=f"AI21Exception - {original_exception.message}", + model=model, + llm_provider="ai21", + response=original_exception.response, + ) + elif original_exception.status_code == 429: + exception_mapping_worked = True + raise RateLimitError( + message=f"AI21Exception - {original_exception.message}", + llm_provider="ai21", + model=model, + response=original_exception.response, + ) + else: + exception_mapping_worked = True + raise APIError( + status_code=original_exception.status_code, + message=f"AI21Exception - {original_exception.message}", + llm_provider="ai21", + model=model, + request=original_exception.request, + ) + elif custom_llm_provider == "nlp_cloud": + if "detail" in error_str: + if "Input text length should not exceed" in error_str: + exception_mapping_worked = True + raise ContextWindowExceededError( + message=f"NLPCloudException - {error_str}", + model=model, + llm_provider="nlp_cloud", + response=original_exception.response, + ) + elif "value is not a valid" in error_str: + exception_mapping_worked = True + raise BadRequestError( + message=f"NLPCloudException - {error_str}", + model=model, + llm_provider="nlp_cloud", + response=original_exception.response, + ) + else: + exception_mapping_worked = True + raise APIError( + status_code=500, + message=f"NLPCloudException - {error_str}", + model=model, + llm_provider="nlp_cloud", + request=original_exception.request, + ) + if hasattr( + original_exception, "status_code" + ): # https://docs.nlpcloud.com/?shell#errors + if ( + original_exception.status_code == 400 + or original_exception.status_code == 406 + or original_exception.status_code == 413 + or original_exception.status_code == 422 + ): + exception_mapping_worked = True + raise BadRequestError( + message=f"NLPCloudException - {original_exception.message}", + llm_provider="nlp_cloud", + model=model, + response=original_exception.response, + ) + elif ( + original_exception.status_code == 401 + or original_exception.status_code == 403 + ): + exception_mapping_worked = True + raise AuthenticationError( + message=f"NLPCloudException - {original_exception.message}", + llm_provider="nlp_cloud", + model=model, + response=original_exception.response, + ) + elif ( + original_exception.status_code == 522 + or original_exception.status_code == 524 + ): + exception_mapping_worked = True + raise Timeout( + message=f"NLPCloudException - {original_exception.message}", + model=model, + llm_provider="nlp_cloud", + ) + elif ( + original_exception.status_code == 429 + or original_exception.status_code == 402 + ): + exception_mapping_worked = True + raise RateLimitError( + message=f"NLPCloudException - {original_exception.message}", + llm_provider="nlp_cloud", + model=model, + response=original_exception.response, + ) + elif ( + original_exception.status_code == 500 + or original_exception.status_code == 503 + ): + exception_mapping_worked = True + raise APIError( + status_code=original_exception.status_code, + message=f"NLPCloudException - {original_exception.message}", + llm_provider="nlp_cloud", + model=model, + request=original_exception.request, + ) + elif ( + original_exception.status_code == 504 + or original_exception.status_code == 520 + ): + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"NLPCloudException - {original_exception.message}", + model=model, + llm_provider="nlp_cloud", + response=original_exception.response, + ) + else: + exception_mapping_worked = True + raise APIError( + status_code=original_exception.status_code, + message=f"NLPCloudException - {original_exception.message}", + llm_provider="nlp_cloud", + model=model, + request=original_exception.request, + ) + elif custom_llm_provider == "together_ai": + try: + error_response = json.loads(error_str) + except Exception: + error_response = {"error": error_str} + if ( + "error" in error_response + and "`inputs` tokens + `max_new_tokens` must be <=" + in error_response["error"] + ): + exception_mapping_worked = True + raise ContextWindowExceededError( + message=f"TogetherAIException - {error_response['error']}", + model=model, + llm_provider="together_ai", + response=original_exception.response, + ) + elif ( + "error" in error_response + and "invalid private key" in error_response["error"] + ): + exception_mapping_worked = True + raise AuthenticationError( + message=f"TogetherAIException - {error_response['error']}", + llm_provider="together_ai", + model=model, + response=original_exception.response, + ) + elif ( + "error" in error_response + and "INVALID_ARGUMENT" in error_response["error"] + ): + exception_mapping_worked = True + raise BadRequestError( + message=f"TogetherAIException - {error_response['error']}", + model=model, + llm_provider="together_ai", + response=original_exception.response, + ) + elif "A timeout occurred" in error_str: + exception_mapping_worked = True + raise Timeout( + message=f"TogetherAIException - {error_str}", + model=model, + llm_provider="together_ai", + ) + elif ( + "error" in error_response + and "API key doesn't match expected format." + in error_response["error"] + ): + exception_mapping_worked = True + raise BadRequestError( + message=f"TogetherAIException - {error_response['error']}", + model=model, + llm_provider="together_ai", + response=original_exception.response, + ) + elif ( + "error_type" in error_response + and error_response["error_type"] == "validation" + ): + exception_mapping_worked = True + raise BadRequestError( + message=f"TogetherAIException - {error_response['error']}", + model=model, + llm_provider="together_ai", + response=original_exception.response, + ) + if hasattr(original_exception, "status_code"): + if original_exception.status_code == 408: + exception_mapping_worked = True + raise Timeout( + message=f"TogetherAIException - {original_exception.message}", + model=model, + llm_provider="together_ai", + ) + elif original_exception.status_code == 422: + exception_mapping_worked = True + raise BadRequestError( + message=f"TogetherAIException - {error_response['error']}", + model=model, + llm_provider="together_ai", + response=original_exception.response, + ) + elif original_exception.status_code == 429: + exception_mapping_worked = True + raise RateLimitError( + message=f"TogetherAIException - {original_exception.message}", + llm_provider="together_ai", + model=model, + response=original_exception.response, + ) + elif original_exception.status_code == 524: + exception_mapping_worked = True + raise Timeout( + message=f"TogetherAIException - {original_exception.message}", + llm_provider="together_ai", + model=model, + ) + else: + exception_mapping_worked = True + raise APIError( + status_code=original_exception.status_code, + message=f"TogetherAIException - {original_exception.message}", + llm_provider="together_ai", + model=model, + request=original_exception.request, + ) + elif custom_llm_provider == "aleph_alpha": + if ( + "This is longer than the model's maximum context length" + in error_str + ): + exception_mapping_worked = True + raise ContextWindowExceededError( + message=f"AlephAlphaException - {original_exception.message}", + llm_provider="aleph_alpha", + model=model, + response=original_exception.response, + ) + elif "InvalidToken" in error_str or "No token provided" in error_str: + exception_mapping_worked = True + raise BadRequestError( + message=f"AlephAlphaException - {original_exception.message}", + llm_provider="aleph_alpha", + model=model, + response=original_exception.response, + ) + elif hasattr(original_exception, "status_code"): + verbose_logger.debug( + f"status code: {original_exception.status_code}" + ) + if original_exception.status_code == 401: + exception_mapping_worked = True + raise AuthenticationError( + message=f"AlephAlphaException - {original_exception.message}", + llm_provider="aleph_alpha", + model=model, + ) + elif original_exception.status_code == 400: + exception_mapping_worked = True + raise BadRequestError( + message=f"AlephAlphaException - {original_exception.message}", + llm_provider="aleph_alpha", + model=model, + response=original_exception.response, + ) + elif original_exception.status_code == 429: + exception_mapping_worked = True + raise RateLimitError( + message=f"AlephAlphaException - {original_exception.message}", + llm_provider="aleph_alpha", + model=model, + response=original_exception.response, + ) + elif original_exception.status_code == 500: + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"AlephAlphaException - {original_exception.message}", + llm_provider="aleph_alpha", + model=model, + response=original_exception.response, + ) + raise original_exception + raise original_exception + elif ( + custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat" + ): + if isinstance(original_exception, dict): + error_str = original_exception.get("error", "") + else: + error_str = str(original_exception) + if "no such file or directory" in error_str: + exception_mapping_worked = True + raise BadRequestError( + message=f"OllamaException: Invalid Model/Model not loaded - {original_exception}", + model=model, + llm_provider="ollama", + response=original_exception.response, + ) + elif "Failed to establish a new connection" in error_str: + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"OllamaException: {original_exception}", + llm_provider="ollama", + model=model, + response=original_exception.response, + ) + elif "Invalid response object from API" in error_str: + exception_mapping_worked = True + raise BadRequestError( + message=f"OllamaException: {original_exception}", + llm_provider="ollama", + model=model, + response=original_exception.response, + ) + elif "Read timed out" in error_str: + exception_mapping_worked = True + raise Timeout( + message=f"OllamaException: {original_exception}", + llm_provider="ollama", + model=model, + ) + elif custom_llm_provider == "vllm": + if hasattr(original_exception, "status_code"): + if original_exception.status_code == 0: + exception_mapping_worked = True + raise APIConnectionError( + message=f"VLLMException - {original_exception.message}", + llm_provider="vllm", + model=model, + request=original_exception.request, + ) + elif custom_llm_provider == "azure" or custom_llm_provider == "azure_text": + message = get_error_message(error_obj=original_exception) + if message is None: + if hasattr(original_exception, "message"): + message = original_exception.message + else: + message = str(original_exception) + + if "Internal server error" in error_str: + exception_mapping_worked = True + raise litellm.InternalServerError( + message=f"AzureException Internal server error - {message}", + llm_provider="azure", + model=model, + litellm_debug_info=extra_information, + ) + elif "This model's maximum context length is" in error_str: + exception_mapping_worked = True + raise ContextWindowExceededError( + message=f"AzureException ContextWindowExceededError - {message}", + llm_provider="azure", + model=model, + litellm_debug_info=extra_information, + ) + elif "DeploymentNotFound" in error_str: + exception_mapping_worked = True + raise NotFoundError( + message=f"AzureException NotFoundError - {message}", + llm_provider="azure", + model=model, + litellm_debug_info=extra_information, + ) + elif ( + ( + "invalid_request_error" in error_str + and "content_policy_violation" in error_str + ) + or ( + "The response was filtered due to the prompt triggering Azure OpenAI's content management" + in error_str + ) + or "Your task failed as a result of our safety system" in error_str + or "The model produced invalid content" in error_str + or "content_filter_policy" in error_str + ): + exception_mapping_worked = True + raise ContentPolicyViolationError( + message=f"litellm.ContentPolicyViolationError: AzureException - {message}", + llm_provider="azure", + model=model, + litellm_debug_info=extra_information, + ) + elif "invalid_request_error" in error_str: + exception_mapping_worked = True + raise BadRequestError( + message=f"AzureException BadRequestError - {message}", + llm_provider="azure", + model=model, + litellm_debug_info=extra_information, + ) + elif ( + "The api_key client option must be set either by passing api_key to the client or by setting" + in error_str + ): + exception_mapping_worked = True + raise AuthenticationError( + message=f"{exception_provider} AuthenticationError - {message}", + llm_provider=custom_llm_provider, + model=model, + litellm_debug_info=extra_information, + ) + elif "Connection error" in error_str: + exception_mapping_worked = True + raise APIConnectionError( + message=f"{exception_provider} APIConnectionError - {message}", + llm_provider=custom_llm_provider, + model=model, + litellm_debug_info=extra_information, + ) + elif hasattr(original_exception, "status_code"): + exception_mapping_worked = True + if original_exception.status_code == 400: + exception_mapping_worked = True + raise BadRequestError( + message=f"AzureException - {message}", + llm_provider="azure", + model=model, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 401: + exception_mapping_worked = True + raise AuthenticationError( + message=f"AzureException AuthenticationError - {message}", + llm_provider="azure", + model=model, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 408: + exception_mapping_worked = True + raise Timeout( + message=f"AzureException Timeout - {message}", + model=model, + litellm_debug_info=extra_information, + llm_provider="azure", + ) + elif original_exception.status_code == 422: + exception_mapping_worked = True + raise BadRequestError( + message=f"AzureException BadRequestError - {message}", + model=model, + llm_provider="azure", + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 429: + exception_mapping_worked = True + raise RateLimitError( + message=f"AzureException RateLimitError - {message}", + model=model, + llm_provider="azure", + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 503: + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"AzureException ServiceUnavailableError - {message}", + model=model, + llm_provider="azure", + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 504: # gateway timeout error + exception_mapping_worked = True + raise Timeout( + message=f"AzureException Timeout - {message}", + model=model, + litellm_debug_info=extra_information, + llm_provider="azure", + ) + else: + exception_mapping_worked = True + raise APIError( + status_code=original_exception.status_code, + message=f"AzureException APIError - {message}", + llm_provider="azure", + litellm_debug_info=extra_information, + model=model, + request=httpx.Request( + method="POST", url="https://openai.com/" + ), + ) + else: + # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors + raise APIConnectionError( + message=f"{exception_provider} APIConnectionError - {message}\n{traceback.format_exc()}", + llm_provider="azure", + model=model, + litellm_debug_info=extra_information, + request=httpx.Request(method="POST", url="https://openai.com/"), + ) + if custom_llm_provider == "openrouter": + if hasattr(original_exception, "status_code"): + exception_mapping_worked = True + if original_exception.status_code == 400: + exception_mapping_worked = True + raise BadRequestError( + message=f"{exception_provider} - {error_str}", + llm_provider=custom_llm_provider, + model=model, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 401: + exception_mapping_worked = True + raise AuthenticationError( + message=f"AuthenticationError: {exception_provider} - {error_str}", + llm_provider=custom_llm_provider, + model=model, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 404: + exception_mapping_worked = True + raise NotFoundError( + message=f"NotFoundError: {exception_provider} - {error_str}", + model=model, + llm_provider=custom_llm_provider, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 408: + exception_mapping_worked = True + raise Timeout( + message=f"Timeout Error: {exception_provider} - {error_str}", + model=model, + llm_provider=custom_llm_provider, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 422: + exception_mapping_worked = True + raise BadRequestError( + message=f"BadRequestError: {exception_provider} - {error_str}", + model=model, + llm_provider=custom_llm_provider, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 429: + exception_mapping_worked = True + raise RateLimitError( + message=f"RateLimitError: {exception_provider} - {error_str}", + model=model, + llm_provider=custom_llm_provider, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 503: + exception_mapping_worked = True + raise ServiceUnavailableError( + message=f"ServiceUnavailableError: {exception_provider} - {error_str}", + model=model, + llm_provider=custom_llm_provider, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 504: # gateway timeout error + exception_mapping_worked = True + raise Timeout( + message=f"Timeout Error: {exception_provider} - {error_str}", + model=model, + llm_provider=custom_llm_provider, + litellm_debug_info=extra_information, + ) + else: + exception_mapping_worked = True + raise APIError( + status_code=original_exception.status_code, + message=f"APIError: {exception_provider} - {error_str}", + llm_provider=custom_llm_provider, + model=model, + request=original_exception.request, + litellm_debug_info=extra_information, + ) + else: + # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors + raise APIConnectionError( + message=f"APIConnectionError: {exception_provider} - {error_str}", + llm_provider=custom_llm_provider, + model=model, + litellm_debug_info=extra_information, + request=httpx.Request( + method="POST", url="https://api.openai.com/v1/" + ), + ) + if ( + "BadRequestError.__init__() missing 1 required positional argument: 'param'" + in str(original_exception) + ): # deal with edge-case invalid request error bug in openai-python sdk + exception_mapping_worked = True + raise BadRequestError( + message=f"{exception_provider} BadRequestError : This can happen due to missing AZURE_API_VERSION: {str(original_exception)}", + model=model, + llm_provider=custom_llm_provider, + response=getattr(original_exception, "response", None), + ) + else: # ensure generic errors always return APIConnectionError= + """ + For unmapped exceptions - raise the exception with traceback - https://github.com/BerriAI/litellm/issues/4201 + """ + exception_mapping_worked = True + if hasattr(original_exception, "request"): + raise APIConnectionError( + message="{} - {}".format(exception_provider, error_str), + llm_provider=custom_llm_provider, + model=model, + request=original_exception.request, + ) + else: + raise APIConnectionError( + message="{}\n{}".format( + str(original_exception), traceback.format_exc() + ), + llm_provider=custom_llm_provider, + model=model, + request=httpx.Request( + method="POST", url="https://api.openai.com/v1/" + ), # stub the request + ) + except Exception as e: + # LOGGING + exception_logging( + logger_fn=None, + additional_args={ + "exception_mapping_worked": exception_mapping_worked, + "original_exception": original_exception, + }, + exception=e, + ) + + # don't let an error with mapping interrupt the user from receiving an error from the llm api calls + if exception_mapping_worked: + setattr(e, "litellm_response_headers", litellm_response_headers) + raise e + else: + for error_type in litellm.LITELLM_EXCEPTION_TYPES: + if isinstance(e, error_type): + setattr(e, "litellm_response_headers", litellm_response_headers) + raise e # it's already mapped + raised_exc = APIConnectionError( + message="{}\n{}".format(original_exception, traceback.format_exc()), + llm_provider="", + model="", + ) + setattr(raised_exc, "litellm_response_headers", litellm_response_headers) + raise raised_exc + + +####### LOGGING ################### + + +def exception_logging( + additional_args={}, + logger_fn=None, + exception=None, +): + try: + model_call_details = {} + if exception: + model_call_details["exception"] = exception + model_call_details["additional_args"] = additional_args + # User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs + verbose_logger.debug( + f"Logging Details: logger_fn - {logger_fn} | callable(logger_fn) - {callable(logger_fn)}" + ) + if logger_fn and callable(logger_fn): + try: + logger_fn( + model_call_details + ) # Expectation: any logger function passed in by the user should accept a dict object + except Exception as e: + verbose_logger.debug( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" + ) + except Exception as e: + verbose_logger.debug( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" + ) + pass diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 85a2b3cd2..8b5c15ca3 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -1015,9 +1015,8 @@ class Logging: != langFuseLogger.public_key ) or ( - self.langfuse_public_key is not None - and self.langfuse_public_key - != langFuseLogger.public_key + self.langfuse_secret is not None + and self.langfuse_secret != langFuseLogger.secret_key ) or ( self.langfuse_host is not None @@ -1045,7 +1044,6 @@ class Logging: service_name="langfuse", logging_obj=temp_langfuse_logger, ) - if temp_langfuse_logger is not None: _response = temp_langfuse_logger.log_event( kwargs=kwargs, diff --git a/litellm/llms/OpenAI/openai.py b/litellm/llms/OpenAI/openai.py index e0ab26b98..d586496fc 100644 --- a/litellm/llms/OpenAI/openai.py +++ b/litellm/llms/OpenAI/openai.py @@ -220,104 +220,6 @@ class DeepInfraConfig: return optional_params -class GroqConfig: - """ - Reference: https://deepinfra.com/docs/advanced/openai_api - - The class `DeepInfra` provides configuration for the DeepInfra's Chat Completions API interface. Below are the parameters: - """ - - frequency_penalty: Optional[int] = None - function_call: Optional[Union[str, dict]] = None - functions: Optional[list] = None - logit_bias: Optional[dict] = None - max_tokens: Optional[int] = None - n: Optional[int] = None - presence_penalty: Optional[int] = None - stop: Optional[Union[str, list]] = None - temperature: Optional[int] = None - top_p: Optional[int] = None - response_format: Optional[dict] = None - tools: Optional[list] = None - tool_choice: Optional[Union[str, dict]] = None - - def __init__( - self, - frequency_penalty: Optional[int] = None, - function_call: Optional[Union[str, dict]] = None, - functions: Optional[list] = None, - logit_bias: Optional[dict] = None, - max_tokens: Optional[int] = None, - n: Optional[int] = None, - presence_penalty: Optional[int] = None, - stop: Optional[Union[str, list]] = None, - temperature: Optional[int] = None, - top_p: Optional[int] = None, - response_format: Optional[dict] = None, - tools: Optional[list] = None, - tool_choice: Optional[Union[str, dict]] = None, - ) -> None: - locals_ = locals().copy() - for key, value in locals_.items(): - if key != "self" and value is not None: - setattr(self.__class__, key, value) - - @classmethod - def get_config(cls): - return { - k: v - for k, v in cls.__dict__.items() - if not k.startswith("__") - and not isinstance( - v, - ( - types.FunctionType, - types.BuiltinFunctionType, - classmethod, - staticmethod, - ), - ) - and v is not None - } - - def get_supported_openai_params_stt(self): - return [ - "prompt", - "response_format", - "temperature", - "language", - ] - - def get_supported_openai_response_formats_stt(self) -> List[str]: - return ["json", "verbose_json", "text"] - - def map_openai_params_stt( - self, - non_default_params: dict, - optional_params: dict, - model: str, - drop_params: bool, - ) -> dict: - response_formats = self.get_supported_openai_response_formats_stt() - for param, value in non_default_params.items(): - if param == "response_format": - if value in response_formats: - optional_params[param] = value - else: - if litellm.drop_params is True or drop_params is True: - pass - else: - raise litellm.utils.UnsupportedParamsError( - message="Groq doesn't support response_format={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format( - value - ), - status_code=400, - ) - else: - optional_params[param] = value - return optional_params - - class OpenAIConfig: """ Reference: https://platform.openai.com/docs/api-reference/chat/create diff --git a/litellm/llms/anthropic/chat/__init__.py b/litellm/llms/anthropic/chat/__init__.py new file mode 100644 index 000000000..ae84c3b1e --- /dev/null +++ b/litellm/llms/anthropic/chat/__init__.py @@ -0,0 +1 @@ +from .handler import AnthropicChatCompletion, ModelResponseIterator diff --git a/litellm/llms/anthropic/chat.py b/litellm/llms/anthropic/chat/handler.py similarity index 50% rename from litellm/llms/anthropic/chat.py rename to litellm/llms/anthropic/chat/handler.py index cf4f23905..3603183c4 100644 --- a/litellm/llms/anthropic/chat.py +++ b/litellm/llms/anthropic/chat/handler.py @@ -71,12 +71,19 @@ from litellm.types.llms.openai import ( ChatCompletionToolParamFunctionChunk, ChatCompletionUsageBlock, ChatCompletionUserMessage, + OpenAIMessageContent, ) from litellm.types.utils import Choices, GenericStreamingChunk from litellm.utils import CustomStreamWrapper, ModelResponse, Usage -from ..base import BaseLLM -from ..prompt_templates.factory import custom_prompt, prompt_factory +from ...base import BaseLLM +from ...prompt_templates.factory import ( + anthropic_messages_pt, + custom_prompt, + prompt_factory, +) +from ..common_utils import AnthropicError +from .transformation import AnthropicConfig class AnthropicConstants(Enum): @@ -86,558 +93,6 @@ class AnthropicConstants(Enum): # constants from https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/_constants.py -class AnthropicError(Exception): - def __init__(self, status_code: int, message): - self.status_code = status_code - self.message: str = message - self.request = httpx.Request( - method="POST", url="https://api.anthropic.com/v1/messages" - ) - self.response = httpx.Response(status_code=status_code, request=self.request) - super().__init__( - self.message - ) # Call the base class constructor with the parameters it needs - - -class AnthropicConfig: - """ - Reference: https://docs.anthropic.com/claude/reference/messages_post - - to pass metadata to anthropic, it's {"user_id": "any-relevant-information"} - """ - - max_tokens: Optional[int] = ( - 4096 # anthropic requires a default value (Opus, Sonnet, and Haiku have the same default) - ) - stop_sequences: Optional[list] = None - temperature: Optional[int] = None - top_p: Optional[int] = None - top_k: Optional[int] = None - metadata: Optional[dict] = None - system: Optional[str] = None - - def __init__( - self, - max_tokens: Optional[ - int - ] = 4096, # You can pass in a value yourself or use the default value 4096 - stop_sequences: Optional[list] = None, - temperature: Optional[int] = None, - top_p: Optional[int] = None, - top_k: Optional[int] = None, - metadata: Optional[dict] = None, - system: Optional[str] = None, - ) -> None: - locals_ = locals() - for key, value in locals_.items(): - if key != "self" and value is not None: - setattr(self.__class__, key, value) - - @classmethod - def get_config(cls): - return { - k: v - for k, v in cls.__dict__.items() - if not k.startswith("__") - and not isinstance( - v, - ( - types.FunctionType, - types.BuiltinFunctionType, - classmethod, - staticmethod, - ), - ) - and v is not None - } - - def get_supported_openai_params(self): - return [ - "stream", - "stop", - "temperature", - "top_p", - "max_tokens", - "max_completion_tokens", - "tools", - "tool_choice", - "extra_headers", - ] - - def get_cache_control_headers(self) -> dict: - return { - "anthropic-version": "2023-06-01", - "anthropic-beta": "prompt-caching-2024-07-31", - } - - def map_openai_params(self, non_default_params: dict, optional_params: dict): - for param, value in non_default_params.items(): - if param == "max_tokens": - optional_params["max_tokens"] = value - if param == "max_completion_tokens": - optional_params["max_tokens"] = value - if param == "tools": - optional_params["tools"] = value - if param == "tool_choice": - _tool_choice: Optional[AnthropicMessagesToolChoice] = None - if value == "auto": - _tool_choice = {"type": "auto"} - elif value == "required": - _tool_choice = {"type": "any"} - elif isinstance(value, dict): - _tool_choice = {"type": "tool", "name": value["function"]["name"]} - - if _tool_choice is not None: - optional_params["tool_choice"] = _tool_choice - if param == "stream" and value == True: - optional_params["stream"] = value - if param == "stop": - if isinstance(value, str): - if ( - value == "\n" - ) and litellm.drop_params == True: # anthropic doesn't allow whitespace characters as stop-sequences - continue - value = [value] - elif isinstance(value, list): - new_v = [] - for v in value: - if ( - v == "\n" - ) and litellm.drop_params == True: # anthropic doesn't allow whitespace characters as stop-sequences - continue - new_v.append(v) - if len(new_v) > 0: - value = new_v - else: - continue - optional_params["stop_sequences"] = value - if param == "temperature": - optional_params["temperature"] = value - if param == "top_p": - optional_params["top_p"] = value - return optional_params - - def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool: - """ - Return if {"cache_control": ..} in message content block - - Used to check if anthropic prompt caching headers need to be set. - """ - for message in messages: - if message["content"] is not None and isinstance(message["content"], list): - for content in message["content"]: - if "cache_control" in content: - return True - - return False - - def translate_system_message( - self, messages: List[AllMessageValues] - ) -> List[AnthropicSystemMessageContent]: - system_prompt_indices = [] - anthropic_system_message_list: List[AnthropicSystemMessageContent] = [] - for idx, message in enumerate(messages): - if message["role"] == "system": - valid_content: bool = False - system_message_block = ChatCompletionSystemMessage(**message) - if isinstance(system_message_block["content"], str): - anthropic_system_message_content = AnthropicSystemMessageContent( - type="text", - text=system_message_block["content"], - ) - if "cache_control" in system_message_block: - anthropic_system_message_content["cache_control"] = ( - system_message_block["cache_control"] - ) - anthropic_system_message_list.append( - anthropic_system_message_content - ) - valid_content = True - elif isinstance(message["content"], list): - for _content in message["content"]: - anthropic_system_message_content = ( - AnthropicSystemMessageContent( - type=_content.get("type"), - text=_content.get("text"), - ) - ) - if "cache_control" in _content: - anthropic_system_message_content["cache_control"] = ( - _content["cache_control"] - ) - - anthropic_system_message_list.append( - anthropic_system_message_content - ) - valid_content = True - - if valid_content: - system_prompt_indices.append(idx) - if len(system_prompt_indices) > 0: - for idx in reversed(system_prompt_indices): - messages.pop(idx) - - return anthropic_system_message_list - - ### FOR [BETA] `/v1/messages` endpoint support - - def translatable_anthropic_params(self) -> List: - """ - Which anthropic params, we need to translate to the openai format. - """ - return ["messages", "metadata", "system", "tool_choice", "tools"] - - def translate_anthropic_messages_to_openai( - self, - messages: List[ - Union[ - AnthropicMessagesUserMessageParam, - AnthopicMessagesAssistantMessageParam, - ] - ], - ) -> List: - new_messages: List[AllMessageValues] = [] - for m in messages: - user_message: Optional[ChatCompletionUserMessage] = None - tool_message_list: List[ChatCompletionToolMessage] = [] - new_user_content_list: List[ - Union[ChatCompletionTextObject, ChatCompletionImageObject] - ] = [] - ## USER MESSAGE ## - if m["role"] == "user": - ## translate user message - if isinstance(m["content"], str): - user_message = ChatCompletionUserMessage( - role="user", content=m["content"] - ) - elif isinstance(m["content"], list): - for content in m["content"]: - if content["type"] == "text": - text_obj = ChatCompletionTextObject( - type="text", text=content["text"] - ) - new_user_content_list.append(text_obj) - elif content["type"] == "image": - image_url = ChatCompletionImageUrlObject( - url=f"data:{content['type']};base64,{content['source']}" - ) - image_obj = ChatCompletionImageObject( - type="image_url", image_url=image_url - ) - - new_user_content_list.append(image_obj) - elif content["type"] == "tool_result": - if "content" not in content: - tool_result = ChatCompletionToolMessage( - role="tool", - tool_call_id=content["tool_use_id"], - content="", - ) - tool_message_list.append(tool_result) - elif isinstance(content["content"], str): - tool_result = ChatCompletionToolMessage( - role="tool", - tool_call_id=content["tool_use_id"], - content=content["content"], - ) - tool_message_list.append(tool_result) - elif isinstance(content["content"], list): - for c in content["content"]: - if c["type"] == "text": - tool_result = ChatCompletionToolMessage( - role="tool", - tool_call_id=content["tool_use_id"], - content=c["text"], - ) - tool_message_list.append(tool_result) - elif c["type"] == "image": - image_str = ( - f"data:{c['type']};base64,{c['source']}" - ) - tool_result = ChatCompletionToolMessage( - role="tool", - tool_call_id=content["tool_use_id"], - content=image_str, - ) - tool_message_list.append(tool_result) - - if user_message is not None: - new_messages.append(user_message) - - if len(new_user_content_list) > 0: - new_messages.append({"role": "user", "content": new_user_content_list}) # type: ignore - - if len(tool_message_list) > 0: - new_messages.extend(tool_message_list) - - ## ASSISTANT MESSAGE ## - assistant_message_str: Optional[str] = None - tool_calls: List[ChatCompletionAssistantToolCall] = [] - if m["role"] == "assistant": - if isinstance(m["content"], str): - assistant_message_str = m["content"] - elif isinstance(m["content"], list): - for content in m["content"]: - if content["type"] == "text": - if assistant_message_str is None: - assistant_message_str = content["text"] - else: - assistant_message_str += content["text"] - elif content["type"] == "tool_use": - function_chunk = ChatCompletionToolCallFunctionChunk( - name=content["name"], - arguments=json.dumps(content["input"]), - ) - - tool_calls.append( - ChatCompletionAssistantToolCall( - id=content["id"], - type="function", - function=function_chunk, - ) - ) - - if assistant_message_str is not None or len(tool_calls) > 0: - assistant_message = ChatCompletionAssistantMessage( - role="assistant", - content=assistant_message_str, - ) - if len(tool_calls) > 0: - assistant_message["tool_calls"] = tool_calls - new_messages.append(assistant_message) - - return new_messages - - def translate_anthropic_tool_choice_to_openai( - self, tool_choice: AnthropicMessagesToolChoice - ) -> ChatCompletionToolChoiceValues: - if tool_choice["type"] == "any": - return "required" - elif tool_choice["type"] == "auto": - return "auto" - elif tool_choice["type"] == "tool": - tc_function_param = ChatCompletionToolChoiceFunctionParam( - name=tool_choice.get("name", "") - ) - return ChatCompletionToolChoiceObjectParam( - type="function", function=tc_function_param - ) - else: - raise ValueError( - "Incompatible tool choice param submitted - {}".format(tool_choice) - ) - - def translate_anthropic_tools_to_openai( - self, tools: List[AnthropicMessagesTool] - ) -> List[ChatCompletionToolParam]: - new_tools: List[ChatCompletionToolParam] = [] - for tool in tools: - function_chunk = ChatCompletionToolParamFunctionChunk( - name=tool["name"], - parameters=tool["input_schema"], - ) - if "description" in tool: - function_chunk["description"] = tool["description"] - new_tools.append( - ChatCompletionToolParam(type="function", function=function_chunk) - ) - - return new_tools - - def translate_anthropic_to_openai( - self, anthropic_message_request: AnthropicMessagesRequest - ) -> ChatCompletionRequest: - """ - This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format. - """ - new_messages: List[AllMessageValues] = [] - - ## CONVERT ANTHROPIC MESSAGES TO OPENAI - new_messages = self.translate_anthropic_messages_to_openai( - messages=anthropic_message_request["messages"] - ) - ## ADD SYSTEM MESSAGE TO MESSAGES - if "system" in anthropic_message_request: - new_messages.insert( - 0, - ChatCompletionSystemMessage( - role="system", content=anthropic_message_request["system"] - ), - ) - - new_kwargs: ChatCompletionRequest = { - "model": anthropic_message_request["model"], - "messages": new_messages, - } - ## CONVERT METADATA (user_id) - if "metadata" in anthropic_message_request: - if "user_id" in anthropic_message_request["metadata"]: - new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"] - - # Pass litellm proxy specific metadata - if "litellm_metadata" in anthropic_message_request: - # metadata will be passed to litellm.acompletion(), it's a litellm_param - new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata") - - ## CONVERT TOOL CHOICE - if "tool_choice" in anthropic_message_request: - new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai( - tool_choice=anthropic_message_request["tool_choice"] - ) - ## CONVERT TOOLS - if "tools" in anthropic_message_request: - new_kwargs["tools"] = self.translate_anthropic_tools_to_openai( - tools=anthropic_message_request["tools"] - ) - - translatable_params = self.translatable_anthropic_params() - for k, v in anthropic_message_request.items(): - if k not in translatable_params: # pass remaining params as is - new_kwargs[k] = v # type: ignore - - return new_kwargs - - def _translate_openai_content_to_anthropic( - self, choices: List[Choices] - ) -> List[ - Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse] - ]: - new_content: List[ - Union[ - AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse - ] - ] = [] - for choice in choices: - if ( - choice.message.tool_calls is not None - and len(choice.message.tool_calls) > 0 - ): - for tool_call in choice.message.tool_calls: - new_content.append( - AnthropicResponseContentBlockToolUse( - type="tool_use", - id=tool_call.id, - name=tool_call.function.name or "", - input=json.loads(tool_call.function.arguments), - ) - ) - elif choice.message.content is not None: - new_content.append( - AnthropicResponseContentBlockText( - type="text", text=choice.message.content - ) - ) - - return new_content - - def _translate_openai_finish_reason_to_anthropic( - self, openai_finish_reason: str - ) -> AnthropicFinishReason: - if openai_finish_reason == "stop": - return "end_turn" - elif openai_finish_reason == "length": - return "max_tokens" - elif openai_finish_reason == "tool_calls": - return "tool_use" - return "end_turn" - - def translate_openai_response_to_anthropic( - self, response: litellm.ModelResponse - ) -> AnthropicResponse: - ## translate content block - anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices) # type: ignore - ## extract finish reason - anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic( - openai_finish_reason=response.choices[0].finish_reason # type: ignore - ) - # extract usage - usage: litellm.Usage = getattr(response, "usage") - anthropic_usage = AnthropicResponseUsageBlock( - input_tokens=usage.prompt_tokens or 0, - output_tokens=usage.completion_tokens or 0, - ) - translated_obj = AnthropicResponse( - id=response.id, - type="message", - role="assistant", - model=response.model or "unknown-model", - stop_sequence=None, - usage=anthropic_usage, - content=anthropic_content, - stop_reason=anthropic_finish_reason, - ) - - return translated_obj - - def _translate_streaming_openai_chunk_to_anthropic( - self, choices: List[OpenAIStreamingChoice] - ) -> Tuple[ - Literal["text_delta", "input_json_delta"], - Union[ContentTextBlockDelta, ContentJsonBlockDelta], - ]: - text: str = "" - partial_json: Optional[str] = None - for choice in choices: - if choice.delta.content is not None: - text += choice.delta.content - elif choice.delta.tool_calls is not None: - partial_json = "" - for tool in choice.delta.tool_calls: - if ( - tool.function is not None - and tool.function.arguments is not None - ): - partial_json += tool.function.arguments - - if partial_json is not None: - return "input_json_delta", ContentJsonBlockDelta( - type="input_json_delta", partial_json=partial_json - ) - else: - return "text_delta", ContentTextBlockDelta(type="text_delta", text=text) - - def translate_streaming_openai_response_to_anthropic( - self, response: litellm.ModelResponse - ) -> Union[ContentBlockDelta, MessageBlockDelta]: - ## base case - final chunk w/ finish reason - if response.choices[0].finish_reason is not None: - delta = MessageDelta( - stop_reason=self._translate_openai_finish_reason_to_anthropic( - response.choices[0].finish_reason - ), - ) - if getattr(response, "usage", None) is not None: - litellm_usage_chunk: Optional[litellm.Usage] = response.usage # type: ignore - elif ( - hasattr(response, "_hidden_params") - and "usage" in response._hidden_params - ): - litellm_usage_chunk = response._hidden_params["usage"] - else: - litellm_usage_chunk = None - if litellm_usage_chunk is not None: - usage_delta = UsageDelta( - input_tokens=litellm_usage_chunk.prompt_tokens or 0, - output_tokens=litellm_usage_chunk.completion_tokens or 0, - ) - else: - usage_delta = UsageDelta(input_tokens=0, output_tokens=0) - return MessageBlockDelta( - type="message_delta", delta=delta, usage=usage_delta - ) - ( - type_of_content, - content_block_delta, - ) = self._translate_streaming_openai_chunk_to_anthropic( - choices=response.choices # type: ignore - ) - return ContentBlockDelta( - type="content_block_delta", - index=response.choices[0].index, - delta=content_block_delta, - ) - - # makes headers for API call def validate_environment( api_key, user_headers, model, messages: List[AllMessageValues] @@ -684,8 +139,14 @@ async def make_call( api_base, headers=headers, data=data, stream=True, timeout=timeout ) except httpx.HTTPStatusError as e: + error_headers = getattr(e, "headers", None) + error_response = getattr(e, "response", None) + if error_headers is None and error_response: + error_headers = getattr(error_response, "headers", None) raise AnthropicError( - status_code=e.response.status_code, message=await e.response.aread() + status_code=e.response.status_code, + message=await e.response.aread(), + headers=error_headers, ) except Exception as e: for exception in litellm.LITELLM_EXCEPTION_TYPES: @@ -726,8 +187,14 @@ def make_sync_call( api_base, headers=headers, data=data, stream=True, timeout=timeout ) except httpx.HTTPStatusError as e: + error_headers = getattr(e, "headers", None) + error_response = getattr(e, "response", None) + if error_headers is None and error_response: + error_headers = getattr(error_response, "headers", None) raise AnthropicError( - status_code=e.response.status_code, message=e.response.read() + status_code=e.response.status_code, + message=e.response.read(), + headers=error_headers, ) except Exception as e: for exception in litellm.LITELLM_EXCEPTION_TYPES: @@ -736,7 +203,12 @@ def make_sync_call( raise AnthropicError(status_code=500, message=str(e)) if response.status_code != 200: - raise AnthropicError(status_code=response.status_code, message=response.read()) + response_headers = getattr(response, "headers", None) + raise AnthropicError( + status_code=response.status_code, + message=response.read(), + headers=response_headers, + ) completion_stream = ModelResponseIterator( streaming_response=response.iter_lines(), sync_stream=True @@ -763,7 +235,7 @@ class AnthropicChatCompletion(BaseLLM): response: Union[requests.Response, httpx.Response], model_response: ModelResponse, stream: bool, - logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, + logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, # type: ignore optional_params: dict, api_key: str, data: Union[dict, str], @@ -772,6 +244,14 @@ class AnthropicChatCompletion(BaseLLM): encoding, json_mode: bool, ) -> ModelResponse: + _hidden_params = {} + _response_headers = dict(response.headers) + if _response_headers is not None: + llm_response_headers = { + "{}-{}".format("llm_provider", k): v + for k, v in _response_headers.items() + } + _hidden_params["additional_headers"] = llm_response_headers ## LOGGING logging_obj.post_call( input=messages, @@ -783,14 +263,21 @@ class AnthropicChatCompletion(BaseLLM): ## RESPONSE OBJECT try: completion_response = response.json() - except: + except Exception as e: + response_headers = getattr(response, "headers", None) raise AnthropicError( - message=response.text, status_code=response.status_code + message="Unable to get json response - {}, Original Response: {}".format( + str(e), response.text + ), + status_code=response.status_code, + headers=response_headers, ) if "error" in completion_response: + response_headers = getattr(response, "headers", None) raise AnthropicError( message=str(completion_response["error"]), status_code=response.status_code, + headers=response_headers, ) else: text_content = "" @@ -856,6 +343,8 @@ class AnthropicChatCompletion(BaseLLM): if "cache_read_input_tokens" in _usage: usage["cache_read_input_tokens"] = _usage["cache_read_input_tokens"] setattr(model_response, "usage", usage) # type: ignore + + model_response._hidden_params = _hidden_params return model_response async def acompletion_stream_function( @@ -919,9 +408,9 @@ class AnthropicChatCompletion(BaseLLM): litellm_params=None, logger_fn=None, headers={}, - client=None, + client: Optional[AsyncHTTPHandler] = None, ) -> Union[ModelResponse, CustomStreamWrapper]: - async_handler = get_async_httpx_client( + async_handler = client or get_async_httpx_client( llm_provider=litellm.LlmProviders.ANTHROPIC ) @@ -937,7 +426,17 @@ class AnthropicChatCompletion(BaseLLM): original_response=str(e), additional_args={"complete_input_dict": data}, ) - raise e + status_code = getattr(e, "status_code", 500) + error_headers = getattr(e, "headers", None) + error_text = getattr(e, "text", str(e)) + error_response = getattr(e, "response", None) + if error_headers is None and error_response: + error_headers = getattr(error_response, "headers", None) + raise AnthropicError( + message=error_text, + status_code=status_code, + headers=error_headers, + ) return self._process_response( model=model, @@ -977,73 +476,18 @@ class AnthropicChatCompletion(BaseLLM): _is_function_call = False messages = copy.deepcopy(messages) optional_params = copy.deepcopy(optional_params) - if model in custom_prompt_dict: - # check if the model has a registered custom prompt - model_prompt_details = custom_prompt_dict[model] - prompt = custom_prompt( - role_dict=model_prompt_details["roles"], - initial_prompt_value=model_prompt_details["initial_prompt_value"], - final_prompt_value=model_prompt_details["final_prompt_value"], - messages=messages, - ) - else: - # Separate system prompt from rest of message - anthropic_system_message_list = AnthropicConfig().translate_system_message( - messages=messages - ) - # Handling anthropic API Prompt Caching - if len(anthropic_system_message_list) > 0: - optional_params["system"] = anthropic_system_message_list - # Format rest of message according to anthropic guidelines - try: - messages = prompt_factory( - model=model, messages=messages, custom_llm_provider="anthropic" - ) - except Exception as e: - raise AnthropicError( - status_code=400, - message="{}\nReceived Messages={}".format(str(e), messages), - ) # don't use verbose_logger.exception, if exception is raised - - ## Load Config - config = litellm.AnthropicConfig.get_config() - for k, v in config.items(): - if ( - k not in optional_params - ): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in - optional_params[k] = v - - ## Handle Tool Calling - if "tools" in optional_params: - _is_function_call = True - if "anthropic-beta" not in headers: - # default to v1 of "anthropic-beta" - headers["anthropic-beta"] = "tools-2024-05-16" - - anthropic_tools = [] - for tool in optional_params["tools"]: - if "input_schema" in tool: # assume in anthropic format - anthropic_tools.append(tool) - else: # assume openai tool call - new_tool = tool["function"] - new_tool["input_schema"] = new_tool.pop("parameters") # rename key - if "cache_control" in tool: - new_tool["cache_control"] = tool["cache_control"] - anthropic_tools.append(new_tool) - - optional_params["tools"] = anthropic_tools - stream = optional_params.pop("stream", None) - is_vertex_request: bool = optional_params.pop("is_vertex_request", False) json_mode: bool = optional_params.pop("json_mode", False) + is_vertex_request: bool = optional_params.pop("is_vertex_request", False) - data = { - "messages": messages, - **optional_params, - } - - if is_vertex_request is False: - data["model"] = model + data = AnthropicConfig()._transform_request( + model=model, + messages=messages, + optional_params=optional_params, + headers=headers, + _is_function_call=_is_function_call, + is_vertex_request=is_vertex_request, + ) ## LOGGING logging_obj.pre_call( @@ -1136,12 +580,25 @@ class AnthropicChatCompletion(BaseLLM): client = HTTPHandler(timeout=timeout) # type: ignore else: client = client - response = client.post( - api_base, headers=headers, data=json.dumps(data), timeout=timeout - ) - if response.status_code != 200: + + try: + response = client.post( + api_base, + headers=headers, + data=json.dumps(data), + timeout=timeout, + ) + except Exception as e: + status_code = getattr(e, "status_code", 500) + error_headers = getattr(e, "headers", None) + error_text = getattr(e, "text", str(e)) + error_response = getattr(e, "response", None) + if error_headers is None and error_response: + error_headers = getattr(error_response, "headers", None) raise AnthropicError( - status_code=response.status_code, message=response.text + message=error_text, + status_code=status_code, + headers=error_headers, ) return self._process_response( @@ -1151,7 +608,7 @@ class AnthropicChatCompletion(BaseLLM): stream=stream, logging_obj=logging_obj, api_key=api_key, - data=data, + data=data, # type: ignore messages=messages, print_verbose=print_verbose, optional_params=optional_params, @@ -1192,7 +649,7 @@ class ModelResponseIterator: return False def _handle_usage( - self, anthropic_usage_chunk: dict + self, anthropic_usage_chunk: Union[dict, UsageDelta] ) -> AnthropicChatCompletionUsageBlock: special_fields = ["input_tokens", "output_tokens"] @@ -1203,15 +660,19 @@ class ModelResponseIterator: + anthropic_usage_chunk.get("output_tokens", 0), ) - if "cache_creation_input_tokens" in anthropic_usage_chunk: - usage_block["cache_creation_input_tokens"] = anthropic_usage_chunk[ - "cache_creation_input_tokens" - ] + cache_creation_input_tokens = anthropic_usage_chunk.get( + "cache_creation_input_tokens" + ) + if cache_creation_input_tokens is not None and isinstance( + cache_creation_input_tokens, int + ): + usage_block["cache_creation_input_tokens"] = cache_creation_input_tokens - if "cache_read_input_tokens" in anthropic_usage_chunk: - usage_block["cache_read_input_tokens"] = anthropic_usage_chunk[ - "cache_read_input_tokens" - ] + cache_read_input_tokens = anthropic_usage_chunk.get("cache_read_input_tokens") + if cache_read_input_tokens is not None and isinstance( + cache_read_input_tokens, int + ): + usage_block["cache_read_input_tokens"] = cache_read_input_tokens return usage_block @@ -1313,9 +774,10 @@ class ModelResponseIterator: } """ message_start_block = MessageStartBlock(**chunk) # type: ignore - usage = self._handle_usage( - anthropic_usage_chunk=message_start_block["message"]["usage"] - ) + if "usage" in message_start_block["message"]: + usage = self._handle_usage( + anthropic_usage_chunk=message_start_block["message"]["usage"] + ) elif type_chunk == "error": """ {"type":"error","error":{"details":null,"type":"api_error","message":"Internal server error"} } diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py new file mode 100644 index 000000000..2ca22db3b --- /dev/null +++ b/litellm/llms/anthropic/chat/transformation.py @@ -0,0 +1,289 @@ +import types +from typing import List, Literal, Optional, Tuple, Union + +import litellm +from litellm.llms.prompt_templates.factory import anthropic_messages_pt +from litellm.types.llms.anthropic import ( + AnthropicMessageRequestBase, + AnthropicMessagesRequest, + AnthropicMessagesToolChoice, + AnthropicSystemMessageContent, +) +from litellm.types.llms.openai import AllMessageValues, ChatCompletionSystemMessage +from litellm.utils import has_tool_call_blocks + +from ..common_utils import AnthropicError + + +class AnthropicConfig: + """ + Reference: https://docs.anthropic.com/claude/reference/messages_post + + to pass metadata to anthropic, it's {"user_id": "any-relevant-information"} + """ + + max_tokens: Optional[int] = ( + 4096 # anthropic requires a default value (Opus, Sonnet, and Haiku have the same default) + ) + stop_sequences: Optional[list] = None + temperature: Optional[int] = None + top_p: Optional[int] = None + top_k: Optional[int] = None + metadata: Optional[dict] = None + system: Optional[str] = None + + def __init__( + self, + max_tokens: Optional[ + int + ] = 4096, # You can pass in a value yourself or use the default value 4096 + stop_sequences: Optional[list] = None, + temperature: Optional[int] = None, + top_p: Optional[int] = None, + top_k: Optional[int] = None, + metadata: Optional[dict] = None, + system: Optional[str] = None, + ) -> None: + locals_ = locals() + for key, value in locals_.items(): + if key != "self" and value is not None: + setattr(self.__class__, key, value) + + @classmethod + def get_config(cls): + return { + k: v + for k, v in cls.__dict__.items() + if not k.startswith("__") + and not isinstance( + v, + ( + types.FunctionType, + types.BuiltinFunctionType, + classmethod, + staticmethod, + ), + ) + and v is not None + } + + def get_supported_openai_params(self): + return [ + "stream", + "stop", + "temperature", + "top_p", + "max_tokens", + "max_completion_tokens", + "tools", + "tool_choice", + "extra_headers", + ] + + def get_cache_control_headers(self) -> dict: + return { + "anthropic-version": "2023-06-01", + "anthropic-beta": "prompt-caching-2024-07-31", + } + + def map_openai_params( + self, + non_default_params: dict, + optional_params: dict, + messages: Optional[List[AllMessageValues]] = None, + ): + for param, value in non_default_params.items(): + if param == "max_tokens": + optional_params["max_tokens"] = value + if param == "max_completion_tokens": + optional_params["max_tokens"] = value + if param == "tools": + optional_params["tools"] = value + if param == "tool_choice": + _tool_choice: Optional[AnthropicMessagesToolChoice] = None + if value == "auto": + _tool_choice = {"type": "auto"} + elif value == "required": + _tool_choice = {"type": "any"} + elif isinstance(value, dict): + _tool_choice = {"type": "tool", "name": value["function"]["name"]} + + if _tool_choice is not None: + optional_params["tool_choice"] = _tool_choice + if param == "stream" and value is True: + optional_params["stream"] = value + if param == "stop": + if isinstance(value, str): + if ( + value == "\n" + ) and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences + continue + value = [value] + elif isinstance(value, list): + new_v = [] + for v in value: + if ( + v == "\n" + ) and litellm.drop_params is True: # anthropic doesn't allow whitespace characters as stop-sequences + continue + new_v.append(v) + if len(new_v) > 0: + value = new_v + else: + continue + optional_params["stop_sequences"] = value + if param == "temperature": + optional_params["temperature"] = value + if param == "top_p": + optional_params["top_p"] = value + + ## VALIDATE REQUEST + """ + Anthropic doesn't support tool calling without `tools=` param specified. + """ + if ( + "tools" not in non_default_params + and messages is not None + and has_tool_call_blocks(messages) + ): + raise litellm.UnsupportedParamsError( + message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.", + model="", + llm_provider="anthropic", + ) + + return optional_params + + def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool: + """ + Return if {"cache_control": ..} in message content block + + Used to check if anthropic prompt caching headers need to be set. + """ + for message in messages: + _message_content = message.get("content") + if _message_content is not None and isinstance(_message_content, list): + for content in _message_content: + if "cache_control" in content: + return True + + return False + + def translate_system_message( + self, messages: List[AllMessageValues] + ) -> List[AnthropicSystemMessageContent]: + """ + Translate system message to anthropic format. + + Removes system message from the original list and returns a new list of anthropic system message content. + """ + system_prompt_indices = [] + anthropic_system_message_list: List[AnthropicSystemMessageContent] = [] + for idx, message in enumerate(messages): + if message["role"] == "system": + valid_content: bool = False + system_message_block = ChatCompletionSystemMessage(**message) + if isinstance(system_message_block["content"], str): + anthropic_system_message_content = AnthropicSystemMessageContent( + type="text", + text=system_message_block["content"], + ) + if "cache_control" in system_message_block: + anthropic_system_message_content["cache_control"] = ( + system_message_block["cache_control"] + ) + anthropic_system_message_list.append( + anthropic_system_message_content + ) + valid_content = True + elif isinstance(message["content"], list): + for _content in message["content"]: + anthropic_system_message_content = ( + AnthropicSystemMessageContent( + type=_content.get("type"), + text=_content.get("text"), + ) + ) + if "cache_control" in _content: + anthropic_system_message_content["cache_control"] = ( + _content["cache_control"] + ) + + anthropic_system_message_list.append( + anthropic_system_message_content + ) + valid_content = True + + if valid_content: + system_prompt_indices.append(idx) + if len(system_prompt_indices) > 0: + for idx in reversed(system_prompt_indices): + messages.pop(idx) + + return anthropic_system_message_list + + def _transform_request( + self, + model: str, + messages: List[AllMessageValues], + optional_params: dict, + headers: dict, + _is_function_call: bool, + is_vertex_request: bool, + ) -> dict: + """ + Translate messages to anthropic format. + """ + # Separate system prompt from rest of message + anthropic_system_message_list = self.translate_system_message(messages=messages) + # Handling anthropic API Prompt Caching + if len(anthropic_system_message_list) > 0: + optional_params["system"] = anthropic_system_message_list + # Format rest of message according to anthropic guidelines + try: + anthropic_messages = anthropic_messages_pt( + model=model, + messages=messages, + llm_provider="anthropic", + ) + except Exception as e: + raise AnthropicError( + status_code=400, + message="{}\nReceived Messages={}".format(str(e), messages), + ) # don't use verbose_logger.exception, if exception is raised + + ## Load Config + config = litellm.AnthropicConfig.get_config() + for k, v in config.items(): + if ( + k not in optional_params + ): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in + optional_params[k] = v + + ## Handle Tool Calling + if "tools" in optional_params: + _is_function_call = True + if "anthropic-beta" not in headers: + # default to v1 of "anthropic-beta" + headers["anthropic-beta"] = "tools-2024-05-16" + + anthropic_tools = [] + for tool in optional_params["tools"]: + if "input_schema" in tool: # assume in anthropic format + anthropic_tools.append(tool) + else: # assume openai tool call + new_tool = tool["function"] + new_tool["input_schema"] = new_tool.pop("parameters") # rename key + if "cache_control" in tool: + new_tool["cache_control"] = tool["cache_control"] + anthropic_tools.append(new_tool) + + optional_params["tools"] = anthropic_tools + + data = { + "messages": anthropic_messages, + **optional_params, + } + if not is_vertex_request: + data["model"] = model + return data diff --git a/litellm/llms/anthropic/common_utils.py b/litellm/llms/anthropic/common_utils.py new file mode 100644 index 000000000..f7cba3e4a --- /dev/null +++ b/litellm/llms/anthropic/common_utils.py @@ -0,0 +1,26 @@ +""" +This file contains common utils for anthropic calls. +""" + +from typing import Optional + +import httpx + + +class AnthropicError(Exception): + def __init__( + self, + status_code: int, + message, + headers: Optional[httpx.Headers] = None, + ): + self.status_code = status_code + self.message: str = message + self.headers = headers + self.request = httpx.Request( + method="POST", url="https://api.anthropic.com/v1/messages" + ) + self.response = httpx.Response(status_code=status_code, request=self.request) + super().__init__( + self.message + ) # Call the base class constructor with the parameters it needs diff --git a/litellm/llms/anthropic/experimental_pass_through/transformation.py b/litellm/llms/anthropic/experimental_pass_through/transformation.py new file mode 100644 index 000000000..2a82594ba --- /dev/null +++ b/litellm/llms/anthropic/experimental_pass_through/transformation.py @@ -0,0 +1,425 @@ +import json +import types +from typing import Any, Dict, List, Literal, Optional, Tuple, Union + +from openai.types.chat.chat_completion_chunk import Choice as OpenAIStreamingChoice + +import litellm +from litellm.types.llms.anthropic import ( + AnthopicMessagesAssistantMessageParam, + AnthropicChatCompletionUsageBlock, + AnthropicFinishReason, + AnthropicMessagesRequest, + AnthropicMessagesTool, + AnthropicMessagesToolChoice, + AnthropicMessagesUserMessageParam, + AnthropicResponse, + AnthropicResponseContentBlockText, + AnthropicResponseContentBlockToolUse, + AnthropicResponseUsageBlock, + AnthropicSystemMessageContent, + ContentBlockDelta, + ContentBlockStart, + ContentBlockStop, + ContentJsonBlockDelta, + ContentTextBlockDelta, + MessageBlockDelta, + MessageDelta, + MessageStartBlock, + UsageDelta, +) +from litellm.types.llms.openai import ( + AllMessageValues, + ChatCompletionAssistantMessage, + ChatCompletionAssistantToolCall, + ChatCompletionImageObject, + ChatCompletionImageUrlObject, + ChatCompletionRequest, + ChatCompletionResponseMessage, + ChatCompletionSystemMessage, + ChatCompletionTextObject, + ChatCompletionToolCallChunk, + ChatCompletionToolCallFunctionChunk, + ChatCompletionToolChoiceFunctionParam, + ChatCompletionToolChoiceObjectParam, + ChatCompletionToolChoiceValues, + ChatCompletionToolMessage, + ChatCompletionToolParam, + ChatCompletionToolParamFunctionChunk, + ChatCompletionUsageBlock, + ChatCompletionUserMessage, + OpenAIMessageContent, +) +from litellm.types.utils import Choices, GenericStreamingChunk +from litellm.utils import CustomStreamWrapper, ModelResponse, Usage + +from ...base import BaseLLM +from ...prompt_templates.factory import ( + anthropic_messages_pt, + custom_prompt, + prompt_factory, +) + + +class AnthropicExperimentalPassThroughConfig: + def __init__(self): + pass + + ### FOR [BETA] `/v1/messages` endpoint support + + def translatable_anthropic_params(self) -> List: + """ + Which anthropic params, we need to translate to the openai format. + """ + return ["messages", "metadata", "system", "tool_choice", "tools"] + + def translate_anthropic_messages_to_openai( + self, + messages: List[ + Union[ + AnthropicMessagesUserMessageParam, + AnthopicMessagesAssistantMessageParam, + ] + ], + ) -> List: + new_messages: List[AllMessageValues] = [] + for m in messages: + user_message: Optional[ChatCompletionUserMessage] = None + tool_message_list: List[ChatCompletionToolMessage] = [] + new_user_content_list: List[ + Union[ChatCompletionTextObject, ChatCompletionImageObject] + ] = [] + ## USER MESSAGE ## + if m["role"] == "user": + ## translate user message + message_content = m.get("content") + if message_content and isinstance(message_content, str): + user_message = ChatCompletionUserMessage( + role="user", content=message_content + ) + elif message_content and isinstance(message_content, list): + for content in message_content: + if content["type"] == "text": + text_obj = ChatCompletionTextObject( + type="text", text=content["text"] + ) + new_user_content_list.append(text_obj) + elif content["type"] == "image": + image_url = ChatCompletionImageUrlObject( + url=f"data:{content['type']};base64,{content['source']}" + ) + image_obj = ChatCompletionImageObject( + type="image_url", image_url=image_url + ) + + new_user_content_list.append(image_obj) + elif content["type"] == "tool_result": + if "content" not in content: + tool_result = ChatCompletionToolMessage( + role="tool", + tool_call_id=content["tool_use_id"], + content="", + ) + tool_message_list.append(tool_result) + elif isinstance(content["content"], str): + tool_result = ChatCompletionToolMessage( + role="tool", + tool_call_id=content["tool_use_id"], + content=content["content"], + ) + tool_message_list.append(tool_result) + elif isinstance(content["content"], list): + for c in content["content"]: + if c["type"] == "text": + tool_result = ChatCompletionToolMessage( + role="tool", + tool_call_id=content["tool_use_id"], + content=c["text"], + ) + tool_message_list.append(tool_result) + elif c["type"] == "image": + image_str = ( + f"data:{c['type']};base64,{c['source']}" + ) + tool_result = ChatCompletionToolMessage( + role="tool", + tool_call_id=content["tool_use_id"], + content=image_str, + ) + tool_message_list.append(tool_result) + + if user_message is not None: + new_messages.append(user_message) + + if len(new_user_content_list) > 0: + new_messages.append({"role": "user", "content": new_user_content_list}) # type: ignore + + if len(tool_message_list) > 0: + new_messages.extend(tool_message_list) + + ## ASSISTANT MESSAGE ## + assistant_message_str: Optional[str] = None + tool_calls: List[ChatCompletionAssistantToolCall] = [] + if m["role"] == "assistant": + if isinstance(m["content"], str): + assistant_message_str = m["content"] + elif isinstance(m["content"], list): + for content in m["content"]: + if content["type"] == "text": + if assistant_message_str is None: + assistant_message_str = content["text"] + else: + assistant_message_str += content["text"] + elif content["type"] == "tool_use": + function_chunk = ChatCompletionToolCallFunctionChunk( + name=content["name"], + arguments=json.dumps(content["input"]), + ) + + tool_calls.append( + ChatCompletionAssistantToolCall( + id=content["id"], + type="function", + function=function_chunk, + ) + ) + + if assistant_message_str is not None or len(tool_calls) > 0: + assistant_message = ChatCompletionAssistantMessage( + role="assistant", + content=assistant_message_str, + ) + if len(tool_calls) > 0: + assistant_message["tool_calls"] = tool_calls + new_messages.append(assistant_message) + + return new_messages + + def translate_anthropic_tool_choice_to_openai( + self, tool_choice: AnthropicMessagesToolChoice + ) -> ChatCompletionToolChoiceValues: + if tool_choice["type"] == "any": + return "required" + elif tool_choice["type"] == "auto": + return "auto" + elif tool_choice["type"] == "tool": + tc_function_param = ChatCompletionToolChoiceFunctionParam( + name=tool_choice.get("name", "") + ) + return ChatCompletionToolChoiceObjectParam( + type="function", function=tc_function_param + ) + else: + raise ValueError( + "Incompatible tool choice param submitted - {}".format(tool_choice) + ) + + def translate_anthropic_tools_to_openai( + self, tools: List[AnthropicMessagesTool] + ) -> List[ChatCompletionToolParam]: + new_tools: List[ChatCompletionToolParam] = [] + for tool in tools: + function_chunk = ChatCompletionToolParamFunctionChunk( + name=tool["name"], + parameters=tool["input_schema"], + ) + if "description" in tool: + function_chunk["description"] = tool["description"] + new_tools.append( + ChatCompletionToolParam(type="function", function=function_chunk) + ) + + return new_tools + + def translate_anthropic_to_openai( + self, anthropic_message_request: AnthropicMessagesRequest + ) -> ChatCompletionRequest: + """ + This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format. + """ + new_messages: List[AllMessageValues] = [] + + ## CONVERT ANTHROPIC MESSAGES TO OPENAI + new_messages = self.translate_anthropic_messages_to_openai( + messages=anthropic_message_request["messages"] + ) + ## ADD SYSTEM MESSAGE TO MESSAGES + if "system" in anthropic_message_request: + new_messages.insert( + 0, + ChatCompletionSystemMessage( + role="system", content=anthropic_message_request["system"] + ), + ) + + new_kwargs: ChatCompletionRequest = { + "model": anthropic_message_request["model"], + "messages": new_messages, + } + ## CONVERT METADATA (user_id) + if "metadata" in anthropic_message_request: + if "user_id" in anthropic_message_request["metadata"]: + new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"] + + # Pass litellm proxy specific metadata + if "litellm_metadata" in anthropic_message_request: + # metadata will be passed to litellm.acompletion(), it's a litellm_param + new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata") + + ## CONVERT TOOL CHOICE + if "tool_choice" in anthropic_message_request: + new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai( + tool_choice=anthropic_message_request["tool_choice"] + ) + ## CONVERT TOOLS + if "tools" in anthropic_message_request: + new_kwargs["tools"] = self.translate_anthropic_tools_to_openai( + tools=anthropic_message_request["tools"] + ) + + translatable_params = self.translatable_anthropic_params() + for k, v in anthropic_message_request.items(): + if k not in translatable_params: # pass remaining params as is + new_kwargs[k] = v # type: ignore + + return new_kwargs + + def _translate_openai_content_to_anthropic( + self, choices: List[Choices] + ) -> List[ + Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse] + ]: + new_content: List[ + Union[ + AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse + ] + ] = [] + for choice in choices: + if ( + choice.message.tool_calls is not None + and len(choice.message.tool_calls) > 0 + ): + for tool_call in choice.message.tool_calls: + new_content.append( + AnthropicResponseContentBlockToolUse( + type="tool_use", + id=tool_call.id, + name=tool_call.function.name or "", + input=json.loads(tool_call.function.arguments), + ) + ) + elif choice.message.content is not None: + new_content.append( + AnthropicResponseContentBlockText( + type="text", text=choice.message.content + ) + ) + + return new_content + + def _translate_openai_finish_reason_to_anthropic( + self, openai_finish_reason: str + ) -> AnthropicFinishReason: + if openai_finish_reason == "stop": + return "end_turn" + elif openai_finish_reason == "length": + return "max_tokens" + elif openai_finish_reason == "tool_calls": + return "tool_use" + return "end_turn" + + def translate_openai_response_to_anthropic( + self, response: litellm.ModelResponse + ) -> AnthropicResponse: + ## translate content block + anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices) # type: ignore + ## extract finish reason + anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic( + openai_finish_reason=response.choices[0].finish_reason # type: ignore + ) + # extract usage + usage: litellm.Usage = getattr(response, "usage") + anthropic_usage = AnthropicResponseUsageBlock( + input_tokens=usage.prompt_tokens or 0, + output_tokens=usage.completion_tokens or 0, + ) + translated_obj = AnthropicResponse( + id=response.id, + type="message", + role="assistant", + model=response.model or "unknown-model", + stop_sequence=None, + usage=anthropic_usage, + content=anthropic_content, + stop_reason=anthropic_finish_reason, + ) + + return translated_obj + + def _translate_streaming_openai_chunk_to_anthropic( + self, choices: List[OpenAIStreamingChoice] + ) -> Tuple[ + Literal["text_delta", "input_json_delta"], + Union[ContentTextBlockDelta, ContentJsonBlockDelta], + ]: + text: str = "" + partial_json: Optional[str] = None + for choice in choices: + if choice.delta.content is not None: + text += choice.delta.content + elif choice.delta.tool_calls is not None: + partial_json = "" + for tool in choice.delta.tool_calls: + if ( + tool.function is not None + and tool.function.arguments is not None + ): + partial_json += tool.function.arguments + + if partial_json is not None: + return "input_json_delta", ContentJsonBlockDelta( + type="input_json_delta", partial_json=partial_json + ) + else: + return "text_delta", ContentTextBlockDelta(type="text_delta", text=text) + + def translate_streaming_openai_response_to_anthropic( + self, response: litellm.ModelResponse + ) -> Union[ContentBlockDelta, MessageBlockDelta]: + ## base case - final chunk w/ finish reason + if response.choices[0].finish_reason is not None: + delta = MessageDelta( + stop_reason=self._translate_openai_finish_reason_to_anthropic( + response.choices[0].finish_reason + ), + ) + if getattr(response, "usage", None) is not None: + litellm_usage_chunk: Optional[litellm.Usage] = response.usage # type: ignore + elif ( + hasattr(response, "_hidden_params") + and "usage" in response._hidden_params + ): + litellm_usage_chunk = response._hidden_params["usage"] + else: + litellm_usage_chunk = None + if litellm_usage_chunk is not None: + usage_delta = UsageDelta( + input_tokens=litellm_usage_chunk.prompt_tokens or 0, + output_tokens=litellm_usage_chunk.completion_tokens or 0, + ) + else: + usage_delta = UsageDelta(input_tokens=0, output_tokens=0) + return MessageBlockDelta( + type="message_delta", delta=delta, usage=usage_delta + ) + ( + type_of_content, + content_block_delta, + ) = self._translate_streaming_openai_chunk_to_anthropic( + choices=response.choices # type: ignore + ) + return ContentBlockDelta( + type="content_block_delta", + index=response.choices[0].index, + delta=content_block_delta, + ) diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py index 8229f6a58..77946bfb6 100644 --- a/litellm/llms/bedrock/chat/converse_transformation.py +++ b/litellm/llms/bedrock/chat/converse_transformation.py @@ -22,7 +22,7 @@ from litellm.types.llms.openai import ( ChatCompletionToolParamFunctionChunk, ) from litellm.types.utils import ModelResponse, Usage -from litellm.utils import CustomStreamWrapper +from litellm.utils import CustomStreamWrapper, has_tool_call_blocks from ...prompt_templates.factory import _bedrock_converse_messages_pt, _bedrock_tools_pt from ..common_utils import BedrockError, get_bedrock_tool_name @@ -136,6 +136,7 @@ class AmazonConverseConfig: non_default_params: dict, optional_params: dict, drop_params: bool, + messages: Optional[List[AllMessageValues]] = None, ) -> dict: for param, value in non_default_params.items(): if param == "response_format": @@ -202,6 +203,21 @@ class AmazonConverseConfig: ) if _tool_choice_value is not None: optional_params["tool_choice"] = _tool_choice_value + + ## VALIDATE REQUEST + """ + Bedrock doesn't support tool calling without `tools=` param specified. + """ + if ( + "tools" not in non_default_params + and messages is not None + and has_tool_call_blocks(messages) + ): + raise litellm.UnsupportedParamsError( + message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param to enable tool calling.", + model="", + llm_provider="anthropic", + ) return optional_params def _transform_request( diff --git a/litellm/llms/groq/chat/handler.py b/litellm/llms/groq/chat/handler.py new file mode 100644 index 000000000..f4a16abc8 --- /dev/null +++ b/litellm/llms/groq/chat/handler.py @@ -0,0 +1,60 @@ +""" +Handles the chat completion request for groq +""" + +from typing import Any, Callable, Optional, Union + +from httpx._config import Timeout + +from litellm.utils import ModelResponse + +from ...groq.chat.transformation import GroqChatConfig +from ...OpenAI.openai import OpenAIChatCompletion + + +class GroqChatCompletion(OpenAIChatCompletion): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def completion( + self, + model_response: ModelResponse, + timeout: Union[float, Timeout], + optional_params: dict, + logging_obj: Any, + model: Optional[str] = None, + messages: Optional[list] = None, + print_verbose: Optional[Callable[..., Any]] = None, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + acompletion: bool = False, + litellm_params=None, + logger_fn=None, + headers: Optional[dict] = None, + custom_prompt_dict: dict = {}, + client=None, + organization: Optional[str] = None, + custom_llm_provider: Optional[str] = None, + drop_params: Optional[bool] = None, + ): + messages = GroqChatConfig()._transform_messages(messages) # type: ignore + return super().completion( + model_response, + timeout, + optional_params, + logging_obj, + model, + messages, + print_verbose, + api_key, + api_base, + acompletion, + litellm_params, + logger_fn, + headers, + custom_prompt_dict, + client, + organization, + custom_llm_provider, + drop_params, + ) diff --git a/litellm/llms/groq/chat/transformation.py b/litellm/llms/groq/chat/transformation.py new file mode 100644 index 000000000..c683130ef --- /dev/null +++ b/litellm/llms/groq/chat/transformation.py @@ -0,0 +1,88 @@ +""" +Translate from OpenAI's `/v1/chat/completions` to Groq's `/v1/chat/completions` +""" + +import types +from typing import List, Optional, Union + +from pydantic import BaseModel + +import litellm +from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage + +from ...OpenAI.chat.gpt_transformation import OpenAIGPTConfig + + +class GroqChatConfig(OpenAIGPTConfig): + + frequency_penalty: Optional[int] = None + function_call: Optional[Union[str, dict]] = None + functions: Optional[list] = None + logit_bias: Optional[dict] = None + max_tokens: Optional[int] = None + n: Optional[int] = None + presence_penalty: Optional[int] = None + stop: Optional[Union[str, list]] = None + temperature: Optional[int] = None + top_p: Optional[int] = None + response_format: Optional[dict] = None + tools: Optional[list] = None + tool_choice: Optional[Union[str, dict]] = None + + def __init__( + self, + frequency_penalty: Optional[int] = None, + function_call: Optional[Union[str, dict]] = None, + functions: Optional[list] = None, + logit_bias: Optional[dict] = None, + max_tokens: Optional[int] = None, + n: Optional[int] = None, + presence_penalty: Optional[int] = None, + stop: Optional[Union[str, list]] = None, + temperature: Optional[int] = None, + top_p: Optional[int] = None, + response_format: Optional[dict] = None, + tools: Optional[list] = None, + tool_choice: Optional[Union[str, dict]] = None, + ) -> None: + locals_ = locals().copy() + for key, value in locals_.items(): + if key != "self" and value is not None: + setattr(self.__class__, key, value) + + @classmethod + def get_config(cls): + return { + k: v + for k, v in cls.__dict__.items() + if not k.startswith("__") + and not isinstance( + v, + ( + types.FunctionType, + types.BuiltinFunctionType, + classmethod, + staticmethod, + ), + ) + and v is not None + } + + def _transform_messages(self, messages: List[AllMessageValues]) -> List: + for idx, message in enumerate(messages): + """ + 1. Don't pass 'null' function_call assistant message to groq - https://github.com/BerriAI/litellm/issues/5839 + """ + if isinstance(message, BaseModel): + _message = message.model_dump() + else: + _message = message + assistant_message = _message.get("role") == "assistant" + if assistant_message: + new_message = ChatCompletionAssistantMessage(role="assistant") + for k, v in _message.items(): + if v is not None: + new_message[k] = v # type: ignore + messages[idx] = new_message + + return messages diff --git a/litellm/llms/groq/stt/transformation.py b/litellm/llms/groq/stt/transformation.py new file mode 100644 index 000000000..c4dbd8d0c --- /dev/null +++ b/litellm/llms/groq/stt/transformation.py @@ -0,0 +1,101 @@ +""" +Translate from OpenAI's `/v1/audio/transcriptions` to Groq's `/v1/audio/transcriptions` +""" + +import types +from typing import List, Optional, Union + +import litellm + + +class GroqSTTConfig: + + frequency_penalty: Optional[int] = None + function_call: Optional[Union[str, dict]] = None + functions: Optional[list] = None + logit_bias: Optional[dict] = None + max_tokens: Optional[int] = None + n: Optional[int] = None + presence_penalty: Optional[int] = None + stop: Optional[Union[str, list]] = None + temperature: Optional[int] = None + top_p: Optional[int] = None + response_format: Optional[dict] = None + tools: Optional[list] = None + tool_choice: Optional[Union[str, dict]] = None + + def __init__( + self, + frequency_penalty: Optional[int] = None, + function_call: Optional[Union[str, dict]] = None, + functions: Optional[list] = None, + logit_bias: Optional[dict] = None, + max_tokens: Optional[int] = None, + n: Optional[int] = None, + presence_penalty: Optional[int] = None, + stop: Optional[Union[str, list]] = None, + temperature: Optional[int] = None, + top_p: Optional[int] = None, + response_format: Optional[dict] = None, + tools: Optional[list] = None, + tool_choice: Optional[Union[str, dict]] = None, + ) -> None: + locals_ = locals().copy() + for key, value in locals_.items(): + if key != "self" and value is not None: + setattr(self.__class__, key, value) + + @classmethod + def get_config(cls): + return { + k: v + for k, v in cls.__dict__.items() + if not k.startswith("__") + and not isinstance( + v, + ( + types.FunctionType, + types.BuiltinFunctionType, + classmethod, + staticmethod, + ), + ) + and v is not None + } + + def get_supported_openai_params_stt(self): + return [ + "prompt", + "response_format", + "temperature", + "language", + ] + + def get_supported_openai_response_formats_stt(self) -> List[str]: + return ["json", "verbose_json", "text"] + + def map_openai_params_stt( + self, + non_default_params: dict, + optional_params: dict, + model: str, + drop_params: bool, + ) -> dict: + response_formats = self.get_supported_openai_response_formats_stt() + for param, value in non_default_params.items(): + if param == "response_format": + if value in response_formats: + optional_params[param] = value + else: + if litellm.drop_params is True or drop_params is True: + pass + else: + raise litellm.utils.UnsupportedParamsError( + message="Groq doesn't support response_format={}. To drop unsupported openai params from the call, set `litellm.drop_params = True`".format( + value + ), + status_code=400, + ) + else: + optional_params[param] = value + return optional_params diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_anthropic.py b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_anthropic.py index ecb11e1c9..b67a3c433 100644 --- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_anthropic.py +++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_anthropic.py @@ -276,7 +276,7 @@ def completion( from anthropic import AnthropicVertex - from litellm.llms.anthropic.chat import AnthropicChatCompletion + from litellm.llms.anthropic.chat.handler import AnthropicChatCompletion from litellm.llms.vertex_ai_and_google_ai_studio.gemini.vertex_and_google_ai_studio_gemini import ( VertexLLM, ) @@ -367,7 +367,7 @@ async def async_completion( if client is None: vertex_ai_client = AsyncAnthropicVertex( - project_id=vertex_project, region=vertex_location, access_token=access_token + project_id=vertex_project, region=vertex_location, access_token=access_token # type: ignore ) else: vertex_ai_client = client @@ -438,7 +438,7 @@ async def async_streaming( if client is None: vertex_ai_client = AsyncAnthropicVertex( - project_id=vertex_project, region=vertex_location, access_token=access_token + project_id=vertex_project, region=vertex_location, access_token=access_token # type: ignore ) else: vertex_ai_client = client diff --git a/litellm/main.py b/litellm/main.py index ff9ca81c1..c681c3b6e 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -96,6 +96,7 @@ from .llms.cohere import completion as cohere_completion # type: ignore from .llms.cohere import embed as cohere_embed from .llms.custom_llm import CustomLLM, custom_chat_llm_router from .llms.databricks.chat import DatabricksChatCompletion +from .llms.groq.chat.handler import GroqChatCompletion from .llms.huggingface_restapi import Huggingface from .llms.OpenAI.audio_transcriptions import OpenAIAudioTranscription from .llms.OpenAI.chat.o1_handler import OpenAIO1ChatCompletion @@ -168,6 +169,7 @@ openai_text_completions = OpenAITextCompletion() openai_o1_chat_completions = OpenAIO1ChatCompletion() openai_audio_transcriptions = OpenAIAudioTranscription() databricks_chat_completions = DatabricksChatCompletion() +groq_chat_completions = GroqChatCompletion() azure_ai_chat_completions = AzureAIChatCompletion() azure_ai_embedding = AzureAIEmbedding() anthropic_chat_completions = AnthropicChatCompletion() @@ -958,6 +960,7 @@ def completion( extra_headers=extra_headers, api_version=api_version, parallel_tool_calls=parallel_tool_calls, + messages=messages, **non_default_params, ) @@ -1318,13 +1321,56 @@ def completion( additional_args={"headers": headers}, ) response = _response + elif custom_llm_provider == "groq": + api_base = ( + api_base # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there + or litellm.api_base + or get_secret("GROQ_API_BASE") + or "https://api.groq.com/openai/v1" + ) + # set API KEY + api_key = ( + api_key + or litellm.api_key # for deepinfra/perplexity/anyscale/friendliai we check in get_llm_provider and pass in the api key from there + or litellm.groq_key + or get_secret("GROQ_API_KEY") + ) + + headers = headers or litellm.headers + + ## LOAD CONFIG - if set + config = litellm.GroqChatConfig.get_config() + for k, v in config.items(): + if ( + k not in optional_params + ): # completion(top_k=3) > openai_config(top_k=3) <- allows for dynamic variables to be passed in + optional_params[k] = v + + response = groq_chat_completions.completion( + model=model, + messages=messages, + headers=headers, + model_response=model_response, + print_verbose=print_verbose, + api_key=api_key, + api_base=api_base, + acompletion=acompletion, + logging_obj=logging, + optional_params=optional_params, + litellm_params=litellm_params, + logger_fn=logger_fn, + timeout=timeout, # type: ignore + custom_prompt_dict=custom_prompt_dict, + client=client, # pass AsyncOpenAI, OpenAI client + organization=organization, + custom_llm_provider=custom_llm_provider, + ) elif ( model in litellm.open_ai_chat_completion_models or custom_llm_provider == "custom_openai" or custom_llm_provider == "deepinfra" or custom_llm_provider == "perplexity" - or custom_llm_provider == "groq" or custom_llm_provider == "nvidia_nim" or custom_llm_provider == "cerebras" or custom_llm_provider == "sambanova" @@ -1431,6 +1477,7 @@ def completion( original_response=response, additional_args={"headers": headers}, ) + elif ( "replicate" in model or custom_llm_provider == "replicate" @@ -2933,6 +2980,7 @@ def batch_completion( deployment_id=None, request_timeout: Optional[int] = None, timeout: Optional[int] = 600, + max_workers:Optional[int]= 100, # Optional liteLLM function params **kwargs, ): @@ -2956,6 +3004,7 @@ def batch_completion( user (str, optional): The user string for generating completions. Defaults to "". deployment_id (optional): The deployment ID for generating completions. Defaults to None. request_timeout (int, optional): The request timeout for generating completions. Defaults to None. + max_workers (int,optional): The maximum number of threads to use for parallel processing. Returns: list: A list of completion results. @@ -3001,7 +3050,7 @@ def batch_completion( for i in range(0, len(lst), n): yield lst[i : i + n] - with ThreadPoolExecutor(max_workers=100) as executor: + with ThreadPoolExecutor(max_workers=max_workers) as executor: for sub_batch in chunks(batch_messages, 100): for message_list in sub_batch: kwargs_modified = args.copy() diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 48b2a9322..e698fc5ba 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1173,6 +1173,18 @@ "supports_function_calling": true, "supports_assistant_prefill": true }, + "mistral/pixtral-12b-2409": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000015, + "litellm_provider": "mistral", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true, + "supports_vision": true + }, "mistral/open-mistral-7b": { "max_tokens": 8191, "max_input_tokens": 32000, diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py index d75440337..7764cf4e6 100644 --- a/litellm/proxy/hooks/parallel_request_limiter.py +++ b/litellm/proxy/hooks/parallel_request_limiter.py @@ -760,7 +760,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger): return _user_id_rate_limits.model_dump() except Exception as e: - verbose_proxy_logger.exception( + verbose_proxy_logger.debug( "Parallel Request Limiter: Error getting user object", str(e) ) return None diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index 24481e9c6..a36f42187 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -389,6 +389,9 @@ async def add_litellm_data_to_request( user_api_key_dict=user_api_key_dict, ) + verbose_proxy_logger.debug( + f"[PROXY]returned data from litellm_pre_call_utils: {data}" + ) return data diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 69dc730a7..8c5f91c15 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -1466,9 +1466,6 @@ class PrismaClient: ): args_passed_in = locals() start_time = time.time() - verbose_proxy_logger.debug( - f"PrismaClient: get_data - args_passed_in: {args_passed_in}" - ) hashed_token: Optional[str] = None try: response: Any = None diff --git a/litellm/tests/test_alangfuse.py b/litellm/tests/test_alangfuse.py index fa1260637..e9da35b77 100644 --- a/litellm/tests/test_alangfuse.py +++ b/litellm/tests/test_alangfuse.py @@ -1224,3 +1224,14 @@ def test_langfuse_prompt_type(prompt): _add_prompt_to_generation_params( generation_params=generation_params, clean_metadata=clean_metadata ) + + +def test_langfuse_logging_metadata(): + from litellm.integrations.langfuse import log_requester_metadata + + metadata = {"key": "value", "requester_metadata": {"key": "value"}} + + got_metadata = log_requester_metadata(clean_metadata=metadata) + expected_metadata = {"requester_metadata": {"key": "value"}} + + assert expected_metadata == got_metadata diff --git a/litellm/tests/test_anthropic_prompt_caching.py b/litellm/tests/test_anthropic_prompt_caching.py index 06f6916ed..2224da561 100644 --- a/litellm/tests/test_anthropic_prompt_caching.py +++ b/litellm/tests/test_anthropic_prompt_caching.py @@ -61,6 +61,7 @@ async def test_litellm_anthropic_prompt_caching_tools(): } mock_response.json = return_val + mock_response.headers = {"key": "value"} litellm.set_verbose = True with patch( @@ -466,6 +467,7 @@ async def test_litellm_anthropic_prompt_caching_system(): } mock_response.json = return_val + mock_response.headers = {"key": "value"} litellm.set_verbose = True with patch( diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index f49fb6254..a51dcc693 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -24,7 +24,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.llms.prompt_templates.factory import anthropic_messages_pt -# litellm.num_retries = 3 +# litellm.num_retries=3 litellm.cache = None litellm.success_callback = [] diff --git a/litellm/tests/test_custom_callback_input.py b/litellm/tests/test_custom_callback_input.py index 504c881fa..632112f5b 100644 --- a/litellm/tests/test_custom_callback_input.py +++ b/litellm/tests/test_custom_callback_input.py @@ -1173,7 +1173,12 @@ def test_turn_off_message_logging(): ##### VALID JSON ###### -@pytest.mark.parametrize("model", ["gpt-3.5-turbo", "azure/chatgpt-v-2"]) +@pytest.mark.parametrize( + "model", + [ + "ft:gpt-3.5-turbo:my-org:custom_suffix:id" + ], # "gpt-3.5-turbo", "azure/chatgpt-v-2", +) @pytest.mark.parametrize( "turn_off_message_logging", [ @@ -1200,7 +1205,7 @@ def test_standard_logging_payload(model, turn_off_message_logging): _ = litellm.completion( model=model, messages=[{"role": "user", "content": "Hey, how's it going?"}], - # mock_response="Going well!", + mock_response="Going well!", ) time.sleep(2) diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py index e23285422..2794fe68b 100644 --- a/litellm/tests/test_exceptions.py +++ b/litellm/tests/test_exceptions.py @@ -7,6 +7,8 @@ from typing import Any from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler + sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path @@ -884,6 +886,42 @@ def _pre_call_utils( return data, original_function, mapped_target +def _pre_call_utils_httpx( + call_type: str, + data: dict, + client: Union[HTTPHandler, AsyncHTTPHandler], + sync_mode: bool, + streaming: Optional[bool], +): + mapped_target: Any = client.client + if call_type == "embedding": + data["input"] = "Hello world!" + + if sync_mode: + original_function = litellm.embedding + else: + original_function = litellm.aembedding + elif call_type == "chat_completion": + data["messages"] = [{"role": "user", "content": "Hello world"}] + if streaming is True: + data["stream"] = True + + if sync_mode: + original_function = litellm.completion + else: + original_function = litellm.acompletion + elif call_type == "completion": + data["prompt"] = "Hello world" + if streaming is True: + data["stream"] = True + if sync_mode: + original_function = litellm.text_completion + else: + original_function = litellm.atext_completion + + return data, original_function, mapped_target + + @pytest.mark.parametrize( "sync_mode", [True, False], @@ -1006,3 +1044,111 @@ async def test_exception_with_headers(sync_mode, provider, model, call_type, str if exception_raised is False: print(resp) assert exception_raised + + +@pytest.mark.parametrize( + "sync_mode", + [True, False], +) +@pytest.mark.parametrize("streaming", [True, False]) +@pytest.mark.parametrize( + "provider, model, call_type", + [ + ("anthropic", "claude-3-haiku-20240307", "chat_completion"), + ], +) +@pytest.mark.asyncio +async def test_exception_with_headers_httpx( + sync_mode, provider, model, call_type, streaming +): + """ + User feedback: litellm says "No deployments available for selected model, Try again in 60 seconds" + but Azure says to retry in at most 9s + + ``` + {"message": "litellm.proxy.proxy_server.embeddings(): Exception occured - No deployments available for selected model, Try again in 60 seconds. Passed model=text-embedding-ada-002. pre-call-checks=False, allowed_model_region=n/a, cooldown_list=[('b49cbc9314273db7181fe69b1b19993f04efb88f2c1819947c538bac08097e4c', {'Exception Received': 'litellm.RateLimitError: AzureException RateLimitError - Requests to the Embeddings_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.', 'Status Code': '429'})]", "level": "ERROR", "timestamp": "2024-08-22T03:25:36.900476"} + ``` + """ + print(f"Received args: {locals()}") + import openai + + if sync_mode: + client = HTTPHandler() + else: + client = AsyncHTTPHandler() + + data = {"model": model} + data, original_function, mapped_target = _pre_call_utils_httpx( + call_type=call_type, + data=data, + client=client, + sync_mode=sync_mode, + streaming=streaming, + ) + + cooldown_time = 30.0 + + def _return_exception(*args, **kwargs): + import datetime + + from httpx import Headers, HTTPStatusError, Request, Response + + # Create the Request object + request = Request("POST", "http://0.0.0.0:9000/chat/completions") + + # Create the Response object with the necessary headers and status code + response = Response( + status_code=429, + headers=Headers( + { + "date": "Sat, 21 Sep 2024 22:56:53 GMT", + "server": "uvicorn", + "retry-after": "30", + "content-length": "30", + "content-type": "application/json", + } + ), + request=request, + ) + + # Create and raise the HTTPStatusError exception + raise HTTPStatusError( + message="Error code: 429 - Rate Limit Error!", + request=request, + response=response, + ) + + with patch.object( + mapped_target, + "send", + side_effect=_return_exception, + ): + new_retry_after_mock_client = MagicMock(return_value=-1) + + litellm.utils._get_retry_after_from_exception_header = ( + new_retry_after_mock_client + ) + + exception_raised = False + try: + if sync_mode: + resp = original_function(**data, client=client) + if streaming: + for chunk in resp: + continue + else: + resp = await original_function(**data, client=client) + + if streaming: + async for chunk in resp: + continue + + except litellm.RateLimitError as e: + exception_raised = True + assert e.litellm_response_headers is not None + print("e.litellm_response_headers", e.litellm_response_headers) + assert int(e.litellm_response_headers["retry-after"]) == cooldown_time + + if exception_raised is False: + print(resp) + assert exception_raised diff --git a/litellm/tests/test_function_calling.py b/litellm/tests/test_function_calling.py index 67d4fe6c9..d323325f4 100644 --- a/litellm/tests/test_function_calling.py +++ b/litellm/tests/test_function_calling.py @@ -45,11 +45,12 @@ def get_current_weather(location, unit="fahrenheit"): @pytest.mark.parametrize( "model", [ - # "gpt-3.5-turbo-1106", + "gpt-3.5-turbo-1106", # "mistral/mistral-large-latest", # "claude-3-haiku-20240307", # "gemini/gemini-1.5-pro", "anthropic.claude-3-sonnet-20240229-v1:0", + "groq/llama3-8b-8192", ], ) @pytest.mark.flaky(retries=3, delay=1) @@ -154,6 +155,105 @@ def test_aaparallel_function_call(model): # test_parallel_function_call() +from litellm.types.utils import ChatCompletionMessageToolCall, Function, Message + + +@pytest.mark.parametrize( + "model, provider", + [ + ( + "anthropic.claude-3-sonnet-20240229-v1:0", + "bedrock", + ), + ("claude-3-haiku-20240307", "anthropic"), + ], +) +@pytest.mark.parametrize( + "messages, expected_error_msg", + [ + ( + [ + { + "role": "user", + "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses", + }, + Message( + content="Here are the current weather conditions for San Francisco, Tokyo, and Paris:", + role="assistant", + tool_calls=[ + ChatCompletionMessageToolCall( + index=1, + function=Function( + arguments='{"location": "San Francisco, CA", "unit": "fahrenheit"}', + name="get_current_weather", + ), + id="tooluse_Jj98qn6xQlOP_PiQr-w9iA", + type="function", + ) + ], + function_call=None, + ), + { + "tool_call_id": "tooluse_Jj98qn6xQlOP_PiQr-w9iA", + "role": "tool", + "name": "get_current_weather", + "content": '{"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}', + }, + ], + True, + ), + ( + [ + { + "role": "user", + "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses", + } + ], + False, + ), + ], +) +def test_parallel_function_call_anthropic_error_msg( + model, provider, messages, expected_error_msg +): + """ + Anthropic doesn't support tool calling without `tools=` param specified. + + Ensure this error is thrown when `tools=` param is not specified. But tool call requests are made. + + Reference Issue: https://github.com/BerriAI/litellm/issues/5747, https://github.com/BerriAI/litellm/issues/5388 + """ + try: + litellm.set_verbose = True + + messages = messages + + if expected_error_msg: + with pytest.raises(litellm.UnsupportedParamsError) as e: + second_response = litellm.completion( + model=model, + messages=messages, + temperature=0.2, + seed=22, + drop_params=True, + ) # get a new response from the model where it can see the function response + print("second response\n", second_response) + else: + second_response = litellm.completion( + model=model, + messages=messages, + temperature=0.2, + seed=22, + drop_params=True, + ) # get a new response from the model where it can see the function response + print("second response\n", second_response) + except litellm.InternalServerError as e: + print(e) + except litellm.RateLimitError as e: + print(e) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + def test_parallel_function_call_stream(): try: diff --git a/litellm/tests/test_get_model_info.py b/litellm/tests/test_get_model_info.py index 3a923bd1e..19c72ab32 100644 --- a/litellm/tests/test_get_model_info.py +++ b/litellm/tests/test_get_model_info.py @@ -62,3 +62,9 @@ def test_get_model_info_shows_supports_prompt_caching(): info = litellm.get_model_info("deepseek/deepseek-chat") print("info", info) assert info.get("supports_prompt_caching") is True + + +def test_get_model_info_finetuned_models(): + info = litellm.get_model_info("ft:gpt-3.5-turbo:my-org:custom_suffix:id") + print("info", info) + assert info["input_cost_per_token"] == 0.000003 diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py index 720abf8dd..9e5a48c53 100644 --- a/litellm/types/llms/anthropic.py +++ b/litellm/types/llms/anthropic.py @@ -18,13 +18,13 @@ class AnthropicMessagesTool(TypedDict, total=False): class AnthropicMessagesTextParam(TypedDict, total=False): - type: Literal["text"] - text: str + type: Required[Literal["text"]] + text: Required[str] cache_control: Optional[Union[dict, ChatCompletionCachedContent]] class AnthropicMessagesToolUseParam(TypedDict): - type: Literal["tool_use"] + type: Required[Literal["tool_use"]] id: str name: str input: dict @@ -58,8 +58,8 @@ class AnthropicImageParamSource(TypedDict): class AnthropicMessagesImageParam(TypedDict, total=False): - type: Literal["image"] - source: AnthropicImageParamSource + type: Required[Literal["image"]] + source: Required[AnthropicImageParamSource] cache_control: Optional[Union[dict, ChatCompletionCachedContent]] @@ -102,16 +102,13 @@ class AnthropicSystemMessageContent(TypedDict, total=False): cache_control: Optional[Union[dict, ChatCompletionCachedContent]] -class AnthropicMessagesRequest(TypedDict, total=False): - model: Required[str] - messages: Required[ - List[ - Union[ - AnthropicMessagesUserMessageParam, - AnthopicMessagesAssistantMessageParam, - ] - ] - ] +AllAnthropicMessageValues = Union[ + AnthropicMessagesUserMessageParam, AnthopicMessagesAssistantMessageParam +] + + +class AnthropicMessageRequestBase(TypedDict, total=False): + messages: Required[List[AllAnthropicMessageValues]] max_tokens: Required[int] metadata: AnthropicMetadata stop_sequences: List[str] @@ -123,6 +120,9 @@ class AnthropicMessagesRequest(TypedDict, total=False): top_k: int top_p: float + +class AnthropicMessagesRequest(AnthropicMessageRequestBase, total=False): + model: Required[str] # litellm param - used for tracking litellm proxy metadata in the request litellm_metadata: dict @@ -291,9 +291,9 @@ class AnthropicResponse(BaseModel): """Billing and rate-limit usage.""" -class AnthropicChatCompletionUsageBlock(TypedDict, total=False): - prompt_tokens: Required[int] - completion_tokens: Required[int] - total_tokens: Required[int] +from .openai import ChatCompletionUsageBlock + + +class AnthropicChatCompletionUsageBlock(ChatCompletionUsageBlock, total=False): cache_creation_input_tokens: int cache_read_input_tokens: int diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index b73b4bc3d..ee8336699 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -343,11 +343,14 @@ class ChatCompletionImageObject(TypedDict): image_url: Union[str, ChatCompletionImageUrlObject] +OpenAIMessageContent = Union[ + str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]] +] + + class OpenAIChatCompletionUserMessage(TypedDict): role: Literal["user"] - content: Union[ - str, Iterable[Union[ChatCompletionTextObject, ChatCompletionImageObject]] - ] + content: OpenAIMessageContent class ChatCompletionUserMessage(OpenAIChatCompletionUserMessage, total=False): diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 3dc644030..e21a883f3 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -7,7 +7,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple, Union from openai._models import BaseModel as OpenAIObject from openai.types.audio.transcription_create_params import FileTypes # type: ignore from openai.types.completion_usage import CompletionTokensDetails, CompletionUsage -from pydantic import ConfigDict, Field, PrivateAttr +from pydantic import ConfigDict, PrivateAttr from typing_extensions import Callable, Dict, Required, TypedDict, override from ..litellm_core_utils.core_helpers import map_finish_reason diff --git a/litellm/utils.py b/litellm/utils.py index fe3ef51f1..48b6f3e48 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -59,7 +59,12 @@ import litellm.litellm_core_utils.audio_utils.utils import litellm.litellm_core_utils.json_validation_rule from litellm.caching import DualCache from litellm.litellm_core_utils.core_helpers import map_finish_reason -from litellm.litellm_core_utils.exception_mapping_utils import get_error_message +from litellm.litellm_core_utils.exception_mapping_utils import ( + _get_litellm_response_headers, + _get_response_headers, + exception_type, + get_error_message, +) from litellm.litellm_core_utils.get_llm_provider_logic import ( _is_non_openai_azure_model, get_llm_provider, @@ -246,39 +251,6 @@ def print_verbose( pass -####### LOGGING ################### - - -def exception_logging( - additional_args={}, - logger_fn=None, - exception=None, -): - try: - model_call_details = {} - if exception: - model_call_details["exception"] = exception - model_call_details["additional_args"] = additional_args - # User Logging -> if you pass in a custom logging function or want to use sentry breadcrumbs - print_verbose( - f"Logging Details: logger_fn - {logger_fn} | callable(logger_fn) - {callable(logger_fn)}" - ) - if logger_fn and callable(logger_fn): - try: - logger_fn( - model_call_details - ) # Expectation: any logger function passed in by the user should accept a dict object - except Exception as e: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" - ) - except Exception as e: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" - ) - pass - - ####### RULES ################### @@ -2462,9 +2434,9 @@ def get_optional_params_transcription( if custom_llm_provider == "openai" or custom_llm_provider == "azure": optional_params = non_default_params elif custom_llm_provider == "groq": - supported_params = litellm.GroqConfig().get_supported_openai_params_stt() + supported_params = litellm.GroqSTTConfig().get_supported_openai_params_stt() _check_valid_arg(supported_params=supported_params) - optional_params = litellm.GroqConfig().map_openai_params_stt( + optional_params = litellm.GroqSTTConfig().map_openai_params_stt( non_default_params=non_default_params, optional_params=optional_params, model=model, @@ -2778,6 +2750,7 @@ def get_optional_params( parallel_tool_calls=None, drop_params=None, additional_drop_params=None, + messages: Optional[List[AllMessageValues]] = None, **kwargs, ): # retrieve all parameters passed to the function @@ -2857,6 +2830,7 @@ def get_optional_params( "parallel_tool_calls": None, "drop_params": None, "additional_drop_params": None, + "messages": None, } # filter out those parameters that were passed with non-default values @@ -2869,6 +2843,7 @@ def get_optional_params( and k != "api_version" and k != "drop_params" and k != "additional_drop_params" + and k != "messages" and k in default_params and v != default_params[k] and _should_drop_param(k=k, additional_drop_params=additional_drop_params) @@ -3033,7 +3008,9 @@ def get_optional_params( ) _check_valid_arg(supported_params=supported_params) optional_params = litellm.AnthropicConfig().map_openai_params( - non_default_params=non_default_params, optional_params=optional_params + non_default_params=non_default_params, + optional_params=optional_params, + messages=messages, ) elif custom_llm_provider == "cohere": ## check if unsupported param passed in @@ -3383,6 +3360,7 @@ def get_optional_params( if drop_params is not None and isinstance(drop_params, bool) else False ), + messages=messages, ) elif "ai21" in model: _check_valid_arg(supported_params=supported_params) @@ -4752,6 +4730,28 @@ def _strip_stable_vertex_version(model_name) -> str: return re.sub(r"-\d+$", "", model_name) +def _strip_openai_finetune_model_name(model_name: str) -> str: + """ + Strips the organization, custom suffix, and ID from an OpenAI fine-tuned model name. + + input: ft:gpt-3.5-turbo:my-org:custom_suffix:id + output: ft:gpt-3.5-turbo + + Args: + model_name (str): The full model name + + Returns: + str: The stripped model name + """ + return re.sub(r"(:[^:]+){3}$", "", model_name) + + +def _strip_model_name(model: str) -> str: + strip_version = _strip_stable_vertex_version(model_name=model) + strip_finetune = _strip_openai_finetune_model_name(model_name=strip_version) + return strip_finetune + + def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> ModelInfo: """ Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token for a given model. @@ -4857,14 +4857,14 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod except: pass combined_model_name = model - combined_stripped_model_name = _strip_stable_vertex_version( - model_name=model - ) + stripped_model_name = _strip_model_name(model=model) + combined_stripped_model_name = stripped_model_name else: split_model = model combined_model_name = "{}/{}".format(custom_llm_provider, model) + stripped_model_name = _strip_model_name(model=model) combined_stripped_model_name = "{}/{}".format( - custom_llm_provider, _strip_stable_vertex_version(model_name=model) + custom_llm_provider, _strip_model_name(model=model) ) ######################### @@ -4894,8 +4894,9 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod Check if: (in order of specificity) 1. 'custom_llm_provider/model' in litellm.model_cost. Checks "groq/llama3-8b-8192" if model="llama3-8b-8192" and custom_llm_provider="groq" 2. 'combined_stripped_model_name' in litellm.model_cost. Checks if 'gemini/gemini-1.5-flash' in model map, if 'gemini/gemini-1.5-flash-001' given. - 3. 'model' in litellm.model_cost. Checks "groq/llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192" and custom_llm_provider=None - 4. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192" + 3. 'stripped_model_name' in litellm.model_cost. Checks if 'ft:gpt-3.5-turbo' in model map, if 'ft:gpt-3.5-turbo:my-org:custom_suffix:id' given. + 4. 'model' in litellm.model_cost. Checks "groq/llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192" and custom_llm_provider=None + 5. 'split_model' in litellm.model_cost. Checks "llama3-8b-8192" in litellm.model_cost if model="groq/llama3-8b-8192" """ if combined_model_name in litellm.model_cost: key = combined_model_name @@ -4912,7 +4913,7 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod else: raise Exception elif combined_stripped_model_name in litellm.model_cost: - key = model + key = combined_stripped_model_name _model_info = litellm.model_cost[combined_stripped_model_name] _model_info["supported_openai_params"] = supported_openai_params if ( @@ -4923,6 +4924,34 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod "litellm_provider" ].startswith("vertex_ai"): pass + elif custom_llm_provider == "fireworks_ai" and _model_info[ + "litellm_provider" + ].startswith("fireworks_ai"): + pass + else: + raise Exception( + "Got provider={}, Expected provider={}, for model={}".format( + _model_info["litellm_provider"], + custom_llm_provider, + model, + ) + ) + elif stripped_model_name in litellm.model_cost: + key = stripped_model_name + _model_info = litellm.model_cost[stripped_model_name] + _model_info["supported_openai_params"] = supported_openai_params + if ( + "litellm_provider" in _model_info + and _model_info["litellm_provider"] != custom_llm_provider + ): + if custom_llm_provider == "vertex_ai" and _model_info[ + "litellm_provider" + ].startswith("vertex_ai"): + pass + elif custom_llm_provider == "fireworks_ai" and _model_info[ + "litellm_provider" + ].startswith("fireworks_ai"): + pass else: raise Exception( "Got provider={}, Expected provider={}, for model={}".format( @@ -5052,7 +5081,7 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod "supports_prompt_caching", False ), ) - except Exception as e: + except Exception: raise Exception( "This model isn't mapped yet. model={}, custom_llm_provider={}. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json.".format( model, custom_llm_provider @@ -6275,1988 +6304,6 @@ def get_model_list(): ) -####### EXCEPTION MAPPING ################ -def _get_litellm_response_headers( - original_exception: Exception, -) -> Optional[httpx.Headers]: - """ - Extract and return the response headers from a mapped exception, if present. - - Used for accurate retry logic. - """ - _response_headers: Optional[httpx.Headers] = None - try: - _response_headers = getattr( - original_exception, "litellm_response_headers", None - ) - except Exception: - return None - - return _response_headers - - -def _get_response_headers(original_exception: Exception) -> Optional[httpx.Headers]: - """ - Extract and return the response headers from an exception, if present. - - Used for accurate retry logic. - """ - _response_headers: Optional[httpx.Headers] = None - try: - _response_headers = getattr(original_exception, "headers", None) - error_response = getattr(original_exception, "response", None) - if _response_headers is None and error_response: - _response_headers = getattr(error_response, "headers", None) - except Exception: - return None - - return _response_headers - - -def exception_type( - model, - original_exception, - custom_llm_provider, - completion_kwargs={}, - extra_kwargs={}, -): - global user_logger_fn, liteDebuggerClient - - if any( - isinstance(original_exception, exc_type) - for exc_type in litellm.LITELLM_EXCEPTION_TYPES - ): - return original_exception - exception_mapping_worked = False - exception_provider = custom_llm_provider - if litellm.suppress_debug_info is False: - print() # noqa - print( # noqa - "\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m" # noqa - ) # noqa - print( # noqa - "LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'." # noqa - ) # noqa - print() # noqa - - litellm_response_headers = _get_response_headers( - original_exception=original_exception - ) - try: - if model: - if hasattr(original_exception, "message"): - error_str = str(original_exception.message) - else: - error_str = str(original_exception) - if isinstance(original_exception, BaseException): - exception_type = type(original_exception).__name__ - else: - exception_type = "" - - ################################################################################ - # Common Extra information needed for all providers - # We pass num retries, api_base, vertex_deployment etc to the exception here - ################################################################################ - extra_information = "" - try: - _api_base = litellm.get_api_base( - model=model, optional_params=extra_kwargs - ) - messages = litellm.get_first_chars_messages(kwargs=completion_kwargs) - _vertex_project = extra_kwargs.get("vertex_project") - _vertex_location = extra_kwargs.get("vertex_location") - _metadata = extra_kwargs.get("metadata", {}) or {} - _model_group = _metadata.get("model_group") - _deployment = _metadata.get("deployment") - extra_information = f"\nModel: {model}" - - if ( - isinstance(custom_llm_provider, str) - and len(custom_llm_provider) > 0 - ): - exception_provider = ( - custom_llm_provider[0].upper() - + custom_llm_provider[1:] - + "Exception" - ) - - if _api_base: - extra_information += f"\nAPI Base: `{_api_base}`" - if ( - messages - and len(messages) > 0 - and litellm.redact_messages_in_exceptions is False - ): - extra_information += f"\nMessages: `{messages}`" - - if _model_group is not None: - extra_information += f"\nmodel_group: `{_model_group}`\n" - if _deployment is not None: - extra_information += f"\ndeployment: `{_deployment}`\n" - if _vertex_project is not None: - extra_information += f"\nvertex_project: `{_vertex_project}`\n" - if _vertex_location is not None: - extra_information += f"\nvertex_location: `{_vertex_location}`\n" - - # on litellm proxy add key name + team to exceptions - extra_information = _add_key_name_and_team_to_alert( - request_info=extra_information, metadata=_metadata - ) - except Exception: - # DO NOT LET this Block raising the original exception - pass - - ################################################################################ - # End of Common Extra information Needed for all providers - ################################################################################ - - ################################################################################ - #################### Start of Provider Exception mapping #################### - ################################################################################ - - if "Request Timeout Error" in error_str or "Request timed out" in error_str: - exception_mapping_worked = True - raise Timeout( - message=f"APITimeoutError - Request timed out. \nerror_str: {error_str}", - model=model, - llm_provider=custom_llm_provider, - litellm_debug_info=extra_information, - ) - - if ( - custom_llm_provider == "openai" - or custom_llm_provider == "text-completion-openai" - or custom_llm_provider == "custom_openai" - or custom_llm_provider in litellm.openai_compatible_providers - ): - # custom_llm_provider is openai, make it OpenAI - message = get_error_message(error_obj=original_exception) - if message is None: - if hasattr(original_exception, "message"): - message = original_exception.message - else: - message = str(original_exception) - - if message is not None and isinstance( - message, str - ): # done to prevent user-confusion. Relevant issue - https://github.com/BerriAI/litellm/issues/1414 - message = message.replace("OPENAI", custom_llm_provider.upper()) - message = message.replace( - "openai.OpenAIError", - "{}.{}Error".format(custom_llm_provider, custom_llm_provider), - ) - if custom_llm_provider == "openai": - exception_provider = "OpenAI" + "Exception" - else: - exception_provider = ( - custom_llm_provider[0].upper() - + custom_llm_provider[1:] - + "Exception" - ) - - if ( - "This model's maximum context length is" in error_str - or "string too long. Expected a string with maximum length" - in error_str - ): - exception_mapping_worked = True - raise ContextWindowExceededError( - message=f"ContextWindowExceededError: {exception_provider} - {message}", - llm_provider=custom_llm_provider, - model=model, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif ( - "invalid_request_error" in error_str - and "model_not_found" in error_str - ): - exception_mapping_worked = True - raise NotFoundError( - message=f"{exception_provider} - {message}", - llm_provider=custom_llm_provider, - model=model, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif "A timeout occurred" in error_str: - exception_mapping_worked = True - raise Timeout( - message=f"{exception_provider} - {message}", - model=model, - llm_provider=custom_llm_provider, - litellm_debug_info=extra_information, - ) - elif ( - "invalid_request_error" in error_str - and "content_policy_violation" in error_str - ): - exception_mapping_worked = True - raise ContentPolicyViolationError( - message=f"ContentPolicyViolationError: {exception_provider} - {message}", - llm_provider=custom_llm_provider, - model=model, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif ( - "invalid_request_error" in error_str - and "Incorrect API key provided" not in error_str - ): - exception_mapping_worked = True - raise BadRequestError( - message=f"{exception_provider} - {message}", - llm_provider=custom_llm_provider, - model=model, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif "Web server is returning an unknown error" in error_str: - exception_mapping_worked = True - raise litellm.InternalServerError( - message=f"{exception_provider} - {message}", - model=model, - llm_provider=custom_llm_provider, - ) - elif "Request too large" in error_str: - exception_mapping_worked = True - raise RateLimitError( - message=f"RateLimitError: {exception_provider} - {message}", - model=model, - llm_provider=custom_llm_provider, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif ( - "The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable" - in error_str - ): - exception_mapping_worked = True - raise AuthenticationError( - message=f"AuthenticationError: {exception_provider} - {message}", - llm_provider=custom_llm_provider, - model=model, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif "Mistral API raised a streaming error" in error_str: - exception_mapping_worked = True - _request = httpx.Request( - method="POST", url="https://api.openai.com/v1" - ) - raise APIError( - status_code=500, - message=f"{exception_provider} - {message}", - llm_provider=custom_llm_provider, - model=model, - request=_request, - litellm_debug_info=extra_information, - ) - elif hasattr(original_exception, "status_code"): - exception_mapping_worked = True - if original_exception.status_code == 400: - exception_mapping_worked = True - raise BadRequestError( - message=f"{exception_provider} - {message}", - llm_provider=custom_llm_provider, - model=model, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 401: - exception_mapping_worked = True - raise AuthenticationError( - message=f"AuthenticationError: {exception_provider} - {message}", - llm_provider=custom_llm_provider, - model=model, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 404: - exception_mapping_worked = True - raise NotFoundError( - message=f"NotFoundError: {exception_provider} - {message}", - model=model, - llm_provider=custom_llm_provider, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 408: - exception_mapping_worked = True - raise Timeout( - message=f"Timeout Error: {exception_provider} - {message}", - model=model, - llm_provider=custom_llm_provider, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 422: - exception_mapping_worked = True - raise BadRequestError( - message=f"{exception_provider} - {message}", - model=model, - llm_provider=custom_llm_provider, - response=getattr(original_exception, "response", None), - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 429: - exception_mapping_worked = True - raise RateLimitError( - message=f"RateLimitError: {exception_provider} - {message}", - model=model, - llm_provider=custom_llm_provider, - response=getattr(original_exception, "response", None), - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 503: - exception_mapping_worked = True - raise ServiceUnavailableError( - message=f"ServiceUnavailableError: {exception_provider} - {message}", - model=model, - llm_provider=custom_llm_provider, - response=getattr(original_exception, "response", None), - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 504: # gateway timeout error - exception_mapping_worked = True - raise Timeout( - message=f"Timeout Error: {exception_provider} - {message}", - model=model, - llm_provider=custom_llm_provider, - litellm_debug_info=extra_information, - ) - else: - exception_mapping_worked = True - raise APIError( - status_code=original_exception.status_code, - message=f"APIError: {exception_provider} - {message}", - llm_provider=custom_llm_provider, - model=model, - request=getattr(original_exception, "request", None), - litellm_debug_info=extra_information, - ) - else: - # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors - # exception_mapping_worked = True - raise APIConnectionError( - message=f"APIConnectionError: {exception_provider} - {message}", - llm_provider=custom_llm_provider, - model=model, - litellm_debug_info=extra_information, - request=httpx.Request( - method="POST", url="https://api.openai.com/v1/" - ), - ) - elif custom_llm_provider == "anthropic": # one of the anthropics - if "prompt is too long" in error_str or "prompt: length" in error_str: - exception_mapping_worked = True - raise ContextWindowExceededError( - message="AnthropicError - {}".format(error_str), - model=model, - llm_provider="anthropic", - ) - if "Invalid API Key" in error_str: - exception_mapping_worked = True - raise AuthenticationError( - message="AnthropicError - {}".format(error_str), - model=model, - llm_provider="anthropic", - ) - if "content filtering policy" in error_str: - exception_mapping_worked = True - raise ContentPolicyViolationError( - message="AnthropicError - {}".format(error_str), - model=model, - llm_provider="anthropic", - ) - if "Client error '400 Bad Request'" in error_str: - exception_mapping_worked = True - raise BadRequestError( - message="AnthropicError - {}".format(error_str), - model=model, - llm_provider="anthropic", - ) - if hasattr(original_exception, "status_code"): - print_verbose(f"status_code: {original_exception.status_code}") - if original_exception.status_code == 401: - exception_mapping_worked = True - raise AuthenticationError( - message=f"AnthropicException - {error_str}", - llm_provider="anthropic", - model=model, - ) - elif ( - original_exception.status_code == 400 - or original_exception.status_code == 413 - ): - exception_mapping_worked = True - raise BadRequestError( - message=f"AnthropicException - {error_str}", - model=model, - llm_provider="anthropic", - ) - elif original_exception.status_code == 404: - exception_mapping_worked = True - raise NotFoundError( - message=f"AnthropicException - {error_str}", - model=model, - llm_provider="anthropic", - ) - elif original_exception.status_code == 408: - exception_mapping_worked = True - raise Timeout( - message=f"AnthropicException - {error_str}", - model=model, - llm_provider="anthropic", - ) - elif original_exception.status_code == 429: - exception_mapping_worked = True - raise RateLimitError( - message=f"AnthropicException - {error_str}", - llm_provider="anthropic", - model=model, - ) - elif ( - original_exception.status_code == 500 - or original_exception.status_code == 529 - ): - exception_mapping_worked = True - raise litellm.InternalServerError( - message=f"AnthropicException - {error_str}. Handle with `litellm.InternalServerError`.", - llm_provider="anthropic", - model=model, - ) - elif original_exception.status_code == 503: - exception_mapping_worked = True - raise litellm.ServiceUnavailableError( - message=f"AnthropicException - {error_str}. Handle with `litellm.ServiceUnavailableError`.", - llm_provider="anthropic", - model=model, - ) - elif custom_llm_provider == "replicate": - if "Incorrect authentication token" in error_str: - exception_mapping_worked = True - raise AuthenticationError( - message=f"ReplicateException - {error_str}", - llm_provider="replicate", - model=model, - response=original_exception.response, - ) - elif "input is too long" in error_str: - exception_mapping_worked = True - raise ContextWindowExceededError( - message=f"ReplicateException - {error_str}", - model=model, - llm_provider="replicate", - response=original_exception.response, - ) - elif exception_type == "ModelError": - exception_mapping_worked = True - raise BadRequestError( - message=f"ReplicateException - {error_str}", - model=model, - llm_provider="replicate", - response=original_exception.response, - ) - elif "Request was throttled" in error_str: - exception_mapping_worked = True - raise RateLimitError( - message=f"ReplicateException - {error_str}", - llm_provider="replicate", - model=model, - response=original_exception.response, - ) - elif hasattr(original_exception, "status_code"): - if original_exception.status_code == 401: - exception_mapping_worked = True - raise AuthenticationError( - message=f"ReplicateException - {original_exception.message}", - llm_provider="replicate", - model=model, - response=original_exception.response, - ) - elif ( - original_exception.status_code == 400 - or original_exception.status_code == 413 - ): - exception_mapping_worked = True - raise BadRequestError( - message=f"ReplicateException - {original_exception.message}", - model=model, - llm_provider="replicate", - response=original_exception.response, - ) - elif original_exception.status_code == 422: - exception_mapping_worked = True - raise UnprocessableEntityError( - message=f"ReplicateException - {original_exception.message}", - model=model, - llm_provider="replicate", - response=original_exception.response, - ) - elif original_exception.status_code == 408: - exception_mapping_worked = True - raise Timeout( - message=f"ReplicateException - {original_exception.message}", - model=model, - llm_provider="replicate", - ) - elif original_exception.status_code == 422: - exception_mapping_worked = True - raise UnprocessableEntityError( - message=f"ReplicateException - {original_exception.message}", - llm_provider="replicate", - model=model, - response=original_exception.response, - ) - elif original_exception.status_code == 429: - exception_mapping_worked = True - raise RateLimitError( - message=f"ReplicateException - {original_exception.message}", - llm_provider="replicate", - model=model, - response=original_exception.response, - ) - elif original_exception.status_code == 500: - exception_mapping_worked = True - raise ServiceUnavailableError( - message=f"ReplicateException - {original_exception.message}", - llm_provider="replicate", - model=model, - response=original_exception.response, - ) - exception_mapping_worked = True - raise APIError( - status_code=500, - message=f"ReplicateException - {str(original_exception)}", - llm_provider="replicate", - model=model, - request=httpx.Request( - method="POST", - url="https://api.replicate.com/v1/deployments", - ), - ) - elif custom_llm_provider == "watsonx": - if "token_quota_reached" in error_str: - exception_mapping_worked = True - raise RateLimitError( - message=f"WatsonxException: Rate Limit Errror - {error_str}", - llm_provider="watsonx", - model=model, - response=original_exception.response, - ) - elif ( - custom_llm_provider == "predibase" - or custom_llm_provider == "databricks" - ): - if "authorization denied for" in error_str: - exception_mapping_worked = True - - # Predibase returns the raw API Key in the response - this block ensures it's not returned in the exception - if ( - error_str is not None - and isinstance(error_str, str) - and "bearer" in error_str.lower() - ): - # only keep the first 10 chars after the occurnence of "bearer" - _bearer_token_start_index = error_str.lower().find("bearer") - error_str = error_str[: _bearer_token_start_index + 14] - error_str += "XXXXXXX" + '"' - - raise AuthenticationError( - message=f"{custom_llm_provider}Exception: Authentication Error - {error_str}", - llm_provider=custom_llm_provider, - model=model, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif hasattr(original_exception, "status_code"): - if original_exception.status_code == 500: - exception_mapping_worked = True - raise litellm.InternalServerError( - message=f"{custom_llm_provider}Exception - {original_exception.message}", - llm_provider=custom_llm_provider, - model=model, - ) - elif ( - original_exception.status_code == 401 - or original_exception.status_code == 403 - ): - exception_mapping_worked = True - raise AuthenticationError( - message=f"{custom_llm_provider}Exception - {original_exception.message}", - llm_provider=custom_llm_provider, - model=model, - ) - elif original_exception.status_code == 400: - exception_mapping_worked = True - raise BadRequestError( - message=f"{custom_llm_provider}Exception - {original_exception.message}", - llm_provider=custom_llm_provider, - model=model, - ) - elif original_exception.status_code == 404: - exception_mapping_worked = True - raise NotFoundError( - message=f"{custom_llm_provider}Exception - {original_exception.message}", - llm_provider=custom_llm_provider, - model=model, - ) - elif original_exception.status_code == 408: - exception_mapping_worked = True - raise Timeout( - message=f"{custom_llm_provider}Exception - {original_exception.message}", - model=model, - llm_provider=custom_llm_provider, - litellm_debug_info=extra_information, - ) - elif ( - original_exception.status_code == 422 - or original_exception.status_code == 424 - ): - exception_mapping_worked = True - raise BadRequestError( - message=f"{custom_llm_provider}Exception - {original_exception.message}", - model=model, - llm_provider=custom_llm_provider, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 429: - exception_mapping_worked = True - raise RateLimitError( - message=f"{custom_llm_provider}Exception - {original_exception.message}", - model=model, - llm_provider=custom_llm_provider, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 503: - exception_mapping_worked = True - raise ServiceUnavailableError( - message=f"{custom_llm_provider}Exception - {original_exception.message}", - model=model, - llm_provider=custom_llm_provider, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 504: # gateway timeout error - exception_mapping_worked = True - raise Timeout( - message=f"{custom_llm_provider}Exception - {original_exception.message}", - model=model, - llm_provider=custom_llm_provider, - litellm_debug_info=extra_information, - ) - elif custom_llm_provider == "bedrock": - if ( - "too many tokens" in error_str - or "expected maxLength:" in error_str - or "Input is too long" in error_str - or "prompt: length: 1.." in error_str - or "Too many input tokens" in error_str - ): - exception_mapping_worked = True - raise ContextWindowExceededError( - message=f"BedrockException: Context Window Error - {error_str}", - model=model, - llm_provider="bedrock", - ) - elif "Malformed input request" in error_str: - exception_mapping_worked = True - raise BadRequestError( - message=f"BedrockException - {error_str}", - model=model, - llm_provider="bedrock", - response=original_exception.response, - ) - elif "A conversation must start with a user message." in error_str: - exception_mapping_worked = True - raise BadRequestError( - message=f"BedrockException - {error_str}\n. Pass in default user message via `completion(..,user_continue_message=)` or enable `litellm.modify_params=True`.\nFor Proxy: do via `litellm_settings::modify_params: True` or user_continue_message under `litellm_params`", - model=model, - llm_provider="bedrock", - response=original_exception.response, - ) - elif ( - "Unable to locate credentials" in error_str - or "The security token included in the request is invalid" - in error_str - ): - exception_mapping_worked = True - raise AuthenticationError( - message=f"BedrockException Invalid Authentication - {error_str}", - model=model, - llm_provider="bedrock", - response=original_exception.response, - ) - elif "AccessDeniedException" in error_str: - exception_mapping_worked = True - raise PermissionDeniedError( - message=f"BedrockException PermissionDeniedError - {error_str}", - model=model, - llm_provider="bedrock", - response=original_exception.response, - ) - elif ( - "throttlingException" in error_str - or "ThrottlingException" in error_str - ): - exception_mapping_worked = True - raise RateLimitError( - message=f"BedrockException: Rate Limit Error - {error_str}", - model=model, - llm_provider="bedrock", - response=original_exception.response, - ) - elif ( - "Connect timeout on endpoint URL" in error_str - or "timed out" in error_str - ): - exception_mapping_worked = True - raise Timeout( - message=f"BedrockException: Timeout Error - {error_str}", - model=model, - llm_provider="bedrock", - ) - elif "Could not process image" in error_str: - exception_mapping_worked = True - raise litellm.InternalServerError( - message=f"BedrockException - {error_str}", - model=model, - llm_provider="bedrock", - ) - elif hasattr(original_exception, "status_code"): - if original_exception.status_code == 500: - exception_mapping_worked = True - raise ServiceUnavailableError( - message=f"BedrockException - {original_exception.message}", - llm_provider="bedrock", - model=model, - response=httpx.Response( - status_code=500, - request=httpx.Request( - method="POST", url="https://api.openai.com/v1/" - ), - ), - ) - elif original_exception.status_code == 401: - exception_mapping_worked = True - raise AuthenticationError( - message=f"BedrockException - {original_exception.message}", - llm_provider="bedrock", - model=model, - response=original_exception.response, - ) - elif original_exception.status_code == 400: - exception_mapping_worked = True - raise BadRequestError( - message=f"BedrockException - {original_exception.message}", - llm_provider="bedrock", - model=model, - response=original_exception.response, - ) - elif original_exception.status_code == 404: - exception_mapping_worked = True - raise NotFoundError( - message=f"BedrockException - {original_exception.message}", - llm_provider="bedrock", - model=model, - response=original_exception.response, - ) - elif original_exception.status_code == 408: - exception_mapping_worked = True - raise Timeout( - message=f"BedrockException - {original_exception.message}", - model=model, - llm_provider=custom_llm_provider, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 422: - exception_mapping_worked = True - raise BadRequestError( - message=f"BedrockException - {original_exception.message}", - model=model, - llm_provider=custom_llm_provider, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 429: - exception_mapping_worked = True - raise RateLimitError( - message=f"BedrockException - {original_exception.message}", - model=model, - llm_provider=custom_llm_provider, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 503: - exception_mapping_worked = True - raise ServiceUnavailableError( - message=f"BedrockException - {original_exception.message}", - model=model, - llm_provider=custom_llm_provider, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 504: # gateway timeout error - exception_mapping_worked = True - raise Timeout( - message=f"BedrockException - {original_exception.message}", - model=model, - llm_provider=custom_llm_provider, - litellm_debug_info=extra_information, - ) - elif custom_llm_provider == "sagemaker": - if "Unable to locate credentials" in error_str: - exception_mapping_worked = True - raise BadRequestError( - message=f"litellm.BadRequestError: SagemakerException - {error_str}", - model=model, - llm_provider="sagemaker", - response=original_exception.response, - ) - elif ( - "Input validation error: `best_of` must be > 0 and <= 2" - in error_str - ): - exception_mapping_worked = True - raise BadRequestError( - message=f"SagemakerException - the value of 'n' must be > 0 and <= 2 for sagemaker endpoints", - model=model, - llm_provider="sagemaker", - response=original_exception.response, - ) - elif ( - "`inputs` tokens + `max_new_tokens` must be <=" in error_str - or "instance type with more CPU capacity or memory" in error_str - ): - exception_mapping_worked = True - raise ContextWindowExceededError( - message=f"SagemakerException - {error_str}", - model=model, - llm_provider="sagemaker", - response=original_exception.response, - ) - elif ( - custom_llm_provider == "vertex_ai" - or custom_llm_provider == "vertex_ai_beta" - or custom_llm_provider == "gemini" - ): - if ( - "Vertex AI API has not been used in project" in error_str - or "Unable to find your project" in error_str - ): - exception_mapping_worked = True - raise BadRequestError( - message=f"litellm.BadRequestError: VertexAIException - {error_str}", - model=model, - llm_provider="vertex_ai", - response=httpx.Response( - status_code=400, - request=httpx.Request( - method="POST", - url=" https://cloud.google.com/vertex-ai/", - ), - ), - litellm_debug_info=extra_information, - ) - if "400 Request payload size exceeds" in error_str: - exception_mapping_worked = True - raise ContextWindowExceededError( - message=f"VertexException - {error_str}", - model=model, - llm_provider=custom_llm_provider, - ) - elif ( - "None Unknown Error." in error_str - or "Content has no parts." in error_str - ): - exception_mapping_worked = True - raise litellm.InternalServerError( - message=f"litellm.InternalServerError: VertexAIException - {error_str}", - model=model, - llm_provider="vertex_ai", - response=httpx.Response( - status_code=500, - content=str(original_exception), - request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore - ), - litellm_debug_info=extra_information, - ) - elif "API key not valid." in error_str: - exception_mapping_worked = True - raise AuthenticationError( - message=f"{custom_llm_provider}Exception - {error_str}", - model=model, - llm_provider=custom_llm_provider, - litellm_debug_info=extra_information, - ) - elif "403" in error_str: - exception_mapping_worked = True - raise BadRequestError( - message=f"VertexAIException BadRequestError - {error_str}", - model=model, - llm_provider="vertex_ai", - response=httpx.Response( - status_code=403, - request=httpx.Request( - method="POST", - url=" https://cloud.google.com/vertex-ai/", - ), - ), - litellm_debug_info=extra_information, - ) - elif ( - "The response was blocked." in error_str - or "Output blocked by content filtering policy" - in error_str # anthropic on vertex ai - ): - exception_mapping_worked = True - raise ContentPolicyViolationError( - message=f"VertexAIException ContentPolicyViolationError - {error_str}", - model=model, - llm_provider="vertex_ai", - litellm_debug_info=extra_information, - response=httpx.Response( - status_code=400, - request=httpx.Request( - method="POST", - url=" https://cloud.google.com/vertex-ai/", - ), - ), - ) - elif ( - "429 Quota exceeded" in error_str - or "Quota exceeded for" in error_str - or "IndexError: list index out of range" in error_str - or "429 Unable to submit request because the service is temporarily out of capacity." - in error_str - ): - exception_mapping_worked = True - raise RateLimitError( - message=f"litellm.RateLimitError: VertexAIException - {error_str}", - model=model, - llm_provider="vertex_ai", - litellm_debug_info=extra_information, - response=httpx.Response( - status_code=429, - request=httpx.Request( - method="POST", - url=" https://cloud.google.com/vertex-ai/", - ), - ), - ) - elif "500 Internal Server Error" in error_str: - exception_mapping_worked = True - raise ServiceUnavailableError( - message=f"litellm.ServiceUnavailableError: VertexAIException - {error_str}", - model=model, - llm_provider="vertex_ai", - litellm_debug_info=extra_information, - ) - if hasattr(original_exception, "status_code"): - if original_exception.status_code == 400: - exception_mapping_worked = True - raise BadRequestError( - message=f"VertexAIException BadRequestError - {error_str}", - model=model, - llm_provider="vertex_ai", - litellm_debug_info=extra_information, - response=httpx.Response( - status_code=400, - request=httpx.Request( - method="POST", - url="https://cloud.google.com/vertex-ai/", - ), - ), - ) - if original_exception.status_code == 401: - exception_mapping_worked = True - raise AuthenticationError( - message=f"VertexAIException - {original_exception.message}", - llm_provider=custom_llm_provider, - model=model, - ) - if original_exception.status_code == 404: - exception_mapping_worked = True - raise NotFoundError( - message=f"VertexAIException - {original_exception.message}", - llm_provider=custom_llm_provider, - model=model, - ) - if original_exception.status_code == 408: - exception_mapping_worked = True - raise Timeout( - message=f"VertexAIException - {original_exception.message}", - llm_provider=custom_llm_provider, - model=model, - ) - - if original_exception.status_code == 429: - exception_mapping_worked = True - raise RateLimitError( - message=f"litellm.RateLimitError: VertexAIException - {error_str}", - model=model, - llm_provider="vertex_ai", - litellm_debug_info=extra_information, - response=httpx.Response( - status_code=429, - request=httpx.Request( - method="POST", - url=" https://cloud.google.com/vertex-ai/", - ), - ), - ) - if original_exception.status_code == 500: - exception_mapping_worked = True - raise litellm.InternalServerError( - message=f"VertexAIException InternalServerError - {error_str}", - model=model, - llm_provider="vertex_ai", - litellm_debug_info=extra_information, - response=httpx.Response( - status_code=500, - content=str(original_exception), - request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore - ), - ) - if original_exception.status_code == 503: - exception_mapping_worked = True - raise ServiceUnavailableError( - message=f"VertexAIException - {original_exception.message}", - llm_provider=custom_llm_provider, - model=model, - ) - elif custom_llm_provider == "palm" or custom_llm_provider == "gemini": - if "503 Getting metadata" in error_str: - # auth errors look like this - # 503 Getting metadata from plugin failed with error: Reauthentication is needed. Please run `gcloud auth application-default login` to reauthenticate. - exception_mapping_worked = True - raise BadRequestError( - message=f"GeminiException - Invalid api key", - model=model, - llm_provider="palm", - response=original_exception.response, - ) - if ( - "504 Deadline expired before operation could complete." in error_str - or "504 Deadline Exceeded" in error_str - ): - exception_mapping_worked = True - raise Timeout( - message=f"GeminiException - {original_exception.message}", - model=model, - llm_provider="palm", - ) - if "400 Request payload size exceeds" in error_str: - exception_mapping_worked = True - raise ContextWindowExceededError( - message=f"GeminiException - {error_str}", - model=model, - llm_provider="palm", - response=original_exception.response, - ) - if ( - "500 An internal error has occurred." in error_str - or "list index out of range" in error_str - ): - exception_mapping_worked = True - raise APIError( - status_code=getattr(original_exception, "status_code", 500), - message=f"GeminiException - {original_exception.message}", - llm_provider="palm", - model=model, - request=httpx.Response( - status_code=429, - request=httpx.Request( - method="POST", - url=" https://cloud.google.com/vertex-ai/", - ), - ), - ) - if hasattr(original_exception, "status_code"): - if original_exception.status_code == 400: - exception_mapping_worked = True - raise BadRequestError( - message=f"GeminiException - {error_str}", - model=model, - llm_provider="palm", - response=original_exception.response, - ) - # Dailed: Error occurred: 400 Request payload size exceeds the limit: 20000 bytes - elif custom_llm_provider == "cloudflare": - if "Authentication error" in error_str: - exception_mapping_worked = True - raise AuthenticationError( - message=f"Cloudflare Exception - {original_exception.message}", - llm_provider="cloudflare", - model=model, - response=original_exception.response, - ) - if "must have required property" in error_str: - exception_mapping_worked = True - raise BadRequestError( - message=f"Cloudflare Exception - {original_exception.message}", - llm_provider="cloudflare", - model=model, - response=original_exception.response, - ) - elif ( - custom_llm_provider == "cohere" or custom_llm_provider == "cohere_chat" - ): # Cohere - if ( - "invalid api token" in error_str - or "No API key provided." in error_str - ): - exception_mapping_worked = True - raise AuthenticationError( - message=f"CohereException - {original_exception.message}", - llm_provider="cohere", - model=model, - response=original_exception.response, - ) - elif "too many tokens" in error_str: - exception_mapping_worked = True - raise ContextWindowExceededError( - message=f"CohereException - {original_exception.message}", - model=model, - llm_provider="cohere", - response=original_exception.response, - ) - elif hasattr(original_exception, "status_code"): - if ( - original_exception.status_code == 400 - or original_exception.status_code == 498 - ): - exception_mapping_worked = True - raise BadRequestError( - message=f"CohereException - {original_exception.message}", - llm_provider="cohere", - model=model, - response=original_exception.response, - ) - elif original_exception.status_code == 408: - exception_mapping_worked = True - raise Timeout( - message=f"CohereException - {original_exception.message}", - llm_provider="cohere", - model=model, - ) - elif original_exception.status_code == 500: - exception_mapping_worked = True - raise ServiceUnavailableError( - message=f"CohereException - {original_exception.message}", - llm_provider="cohere", - model=model, - response=original_exception.response, - ) - elif ( - "CohereConnectionError" in exception_type - ): # cohere seems to fire these errors when we load test it (1k+ messages / min) - exception_mapping_worked = True - raise RateLimitError( - message=f"CohereException - {original_exception.message}", - llm_provider="cohere", - model=model, - response=original_exception.response, - ) - elif "invalid type:" in error_str: - exception_mapping_worked = True - raise BadRequestError( - message=f"CohereException - {original_exception.message}", - llm_provider="cohere", - model=model, - response=original_exception.response, - ) - elif "Unexpected server error" in error_str: - exception_mapping_worked = True - raise ServiceUnavailableError( - message=f"CohereException - {original_exception.message}", - llm_provider="cohere", - model=model, - response=original_exception.response, - ) - else: - if hasattr(original_exception, "status_code"): - exception_mapping_worked = True - raise APIError( - status_code=original_exception.status_code, - message=f"CohereException - {original_exception.message}", - llm_provider="cohere", - model=model, - request=original_exception.request, - ) - raise original_exception - elif custom_llm_provider == "huggingface": - if "length limit exceeded" in error_str: - exception_mapping_worked = True - raise ContextWindowExceededError( - message=error_str, - model=model, - llm_provider="huggingface", - response=original_exception.response, - ) - elif "A valid user token is required" in error_str: - exception_mapping_worked = True - raise BadRequestError( - message=error_str, - llm_provider="huggingface", - model=model, - response=original_exception.response, - ) - elif "Rate limit reached" in error_str: - exception_mapping_worked = True - raise RateLimitError( - message=error_str, - llm_provider="huggingface", - model=model, - response=original_exception.response, - ) - if hasattr(original_exception, "status_code"): - if original_exception.status_code == 401: - exception_mapping_worked = True - raise AuthenticationError( - message=f"HuggingfaceException - {original_exception.message}", - llm_provider="huggingface", - model=model, - response=original_exception.response, - ) - elif original_exception.status_code == 400: - exception_mapping_worked = True - raise BadRequestError( - message=f"HuggingfaceException - {original_exception.message}", - model=model, - llm_provider="huggingface", - response=original_exception.response, - ) - elif original_exception.status_code == 408: - exception_mapping_worked = True - raise Timeout( - message=f"HuggingfaceException - {original_exception.message}", - model=model, - llm_provider="huggingface", - ) - elif original_exception.status_code == 429: - exception_mapping_worked = True - raise RateLimitError( - message=f"HuggingfaceException - {original_exception.message}", - llm_provider="huggingface", - model=model, - response=original_exception.response, - ) - elif original_exception.status_code == 503: - exception_mapping_worked = True - raise ServiceUnavailableError( - message=f"HuggingfaceException - {original_exception.message}", - llm_provider="huggingface", - model=model, - response=original_exception.response, - ) - else: - exception_mapping_worked = True - raise APIError( - status_code=original_exception.status_code, - message=f"HuggingfaceException - {original_exception.message}", - llm_provider="huggingface", - model=model, - request=original_exception.request, - ) - elif custom_llm_provider == "ai21": - if hasattr(original_exception, "message"): - if "Prompt has too many tokens" in original_exception.message: - exception_mapping_worked = True - raise ContextWindowExceededError( - message=f"AI21Exception - {original_exception.message}", - model=model, - llm_provider="ai21", - response=original_exception.response, - ) - if "Bad or missing API token." in original_exception.message: - exception_mapping_worked = True - raise BadRequestError( - message=f"AI21Exception - {original_exception.message}", - model=model, - llm_provider="ai21", - response=original_exception.response, - ) - if hasattr(original_exception, "status_code"): - if original_exception.status_code == 401: - exception_mapping_worked = True - raise AuthenticationError( - message=f"AI21Exception - {original_exception.message}", - llm_provider="ai21", - model=model, - response=original_exception.response, - ) - elif original_exception.status_code == 408: - exception_mapping_worked = True - raise Timeout( - message=f"AI21Exception - {original_exception.message}", - model=model, - llm_provider="ai21", - ) - if original_exception.status_code == 422: - exception_mapping_worked = True - raise BadRequestError( - message=f"AI21Exception - {original_exception.message}", - model=model, - llm_provider="ai21", - response=original_exception.response, - ) - elif original_exception.status_code == 429: - exception_mapping_worked = True - raise RateLimitError( - message=f"AI21Exception - {original_exception.message}", - llm_provider="ai21", - model=model, - response=original_exception.response, - ) - else: - exception_mapping_worked = True - raise APIError( - status_code=original_exception.status_code, - message=f"AI21Exception - {original_exception.message}", - llm_provider="ai21", - model=model, - request=original_exception.request, - ) - elif custom_llm_provider == "nlp_cloud": - if "detail" in error_str: - if "Input text length should not exceed" in error_str: - exception_mapping_worked = True - raise ContextWindowExceededError( - message=f"NLPCloudException - {error_str}", - model=model, - llm_provider="nlp_cloud", - response=original_exception.response, - ) - elif "value is not a valid" in error_str: - exception_mapping_worked = True - raise BadRequestError( - message=f"NLPCloudException - {error_str}", - model=model, - llm_provider="nlp_cloud", - response=original_exception.response, - ) - else: - exception_mapping_worked = True - raise APIError( - status_code=500, - message=f"NLPCloudException - {error_str}", - model=model, - llm_provider="nlp_cloud", - request=original_exception.request, - ) - if hasattr( - original_exception, "status_code" - ): # https://docs.nlpcloud.com/?shell#errors - if ( - original_exception.status_code == 400 - or original_exception.status_code == 406 - or original_exception.status_code == 413 - or original_exception.status_code == 422 - ): - exception_mapping_worked = True - raise BadRequestError( - message=f"NLPCloudException - {original_exception.message}", - llm_provider="nlp_cloud", - model=model, - response=original_exception.response, - ) - elif ( - original_exception.status_code == 401 - or original_exception.status_code == 403 - ): - exception_mapping_worked = True - raise AuthenticationError( - message=f"NLPCloudException - {original_exception.message}", - llm_provider="nlp_cloud", - model=model, - response=original_exception.response, - ) - elif ( - original_exception.status_code == 522 - or original_exception.status_code == 524 - ): - exception_mapping_worked = True - raise Timeout( - message=f"NLPCloudException - {original_exception.message}", - model=model, - llm_provider="nlp_cloud", - ) - elif ( - original_exception.status_code == 429 - or original_exception.status_code == 402 - ): - exception_mapping_worked = True - raise RateLimitError( - message=f"NLPCloudException - {original_exception.message}", - llm_provider="nlp_cloud", - model=model, - response=original_exception.response, - ) - elif ( - original_exception.status_code == 500 - or original_exception.status_code == 503 - ): - exception_mapping_worked = True - raise APIError( - status_code=original_exception.status_code, - message=f"NLPCloudException - {original_exception.message}", - llm_provider="nlp_cloud", - model=model, - request=original_exception.request, - ) - elif ( - original_exception.status_code == 504 - or original_exception.status_code == 520 - ): - exception_mapping_worked = True - raise ServiceUnavailableError( - message=f"NLPCloudException - {original_exception.message}", - model=model, - llm_provider="nlp_cloud", - response=original_exception.response, - ) - else: - exception_mapping_worked = True - raise APIError( - status_code=original_exception.status_code, - message=f"NLPCloudException - {original_exception.message}", - llm_provider="nlp_cloud", - model=model, - request=original_exception.request, - ) - elif custom_llm_provider == "together_ai": - import json - - try: - error_response = json.loads(error_str) - except: - error_response = {"error": error_str} - if ( - "error" in error_response - and "`inputs` tokens + `max_new_tokens` must be <=" - in error_response["error"] - ): - exception_mapping_worked = True - raise ContextWindowExceededError( - message=f"TogetherAIException - {error_response['error']}", - model=model, - llm_provider="together_ai", - response=original_exception.response, - ) - elif ( - "error" in error_response - and "invalid private key" in error_response["error"] - ): - exception_mapping_worked = True - raise AuthenticationError( - message=f"TogetherAIException - {error_response['error']}", - llm_provider="together_ai", - model=model, - response=original_exception.response, - ) - elif ( - "error" in error_response - and "INVALID_ARGUMENT" in error_response["error"] - ): - exception_mapping_worked = True - raise BadRequestError( - message=f"TogetherAIException - {error_response['error']}", - model=model, - llm_provider="together_ai", - response=original_exception.response, - ) - elif "A timeout occurred" in error_str: - exception_mapping_worked = True - raise Timeout( - message=f"TogetherAIException - {error_str}", - model=model, - llm_provider="together_ai", - ) - elif ( - "error" in error_response - and "API key doesn't match expected format." - in error_response["error"] - ): - exception_mapping_worked = True - raise BadRequestError( - message=f"TogetherAIException - {error_response['error']}", - model=model, - llm_provider="together_ai", - response=original_exception.response, - ) - elif ( - "error_type" in error_response - and error_response["error_type"] == "validation" - ): - exception_mapping_worked = True - raise BadRequestError( - message=f"TogetherAIException - {error_response['error']}", - model=model, - llm_provider="together_ai", - response=original_exception.response, - ) - if hasattr(original_exception, "status_code"): - if original_exception.status_code == 408: - exception_mapping_worked = True - raise Timeout( - message=f"TogetherAIException - {original_exception.message}", - model=model, - llm_provider="together_ai", - ) - elif original_exception.status_code == 422: - exception_mapping_worked = True - raise BadRequestError( - message=f"TogetherAIException - {error_response['error']}", - model=model, - llm_provider="together_ai", - response=original_exception.response, - ) - elif original_exception.status_code == 429: - exception_mapping_worked = True - raise RateLimitError( - message=f"TogetherAIException - {original_exception.message}", - llm_provider="together_ai", - model=model, - response=original_exception.response, - ) - elif original_exception.status_code == 524: - exception_mapping_worked = True - raise Timeout( - message=f"TogetherAIException - {original_exception.message}", - llm_provider="together_ai", - model=model, - ) - else: - exception_mapping_worked = True - raise APIError( - status_code=original_exception.status_code, - message=f"TogetherAIException - {original_exception.message}", - llm_provider="together_ai", - model=model, - request=original_exception.request, - ) - elif custom_llm_provider == "aleph_alpha": - if ( - "This is longer than the model's maximum context length" - in error_str - ): - exception_mapping_worked = True - raise ContextWindowExceededError( - message=f"AlephAlphaException - {original_exception.message}", - llm_provider="aleph_alpha", - model=model, - response=original_exception.response, - ) - elif "InvalidToken" in error_str or "No token provided" in error_str: - exception_mapping_worked = True - raise BadRequestError( - message=f"AlephAlphaException - {original_exception.message}", - llm_provider="aleph_alpha", - model=model, - response=original_exception.response, - ) - elif hasattr(original_exception, "status_code"): - print_verbose(f"status code: {original_exception.status_code}") - if original_exception.status_code == 401: - exception_mapping_worked = True - raise AuthenticationError( - message=f"AlephAlphaException - {original_exception.message}", - llm_provider="aleph_alpha", - model=model, - ) - elif original_exception.status_code == 400: - exception_mapping_worked = True - raise BadRequestError( - message=f"AlephAlphaException - {original_exception.message}", - llm_provider="aleph_alpha", - model=model, - response=original_exception.response, - ) - elif original_exception.status_code == 429: - exception_mapping_worked = True - raise RateLimitError( - message=f"AlephAlphaException - {original_exception.message}", - llm_provider="aleph_alpha", - model=model, - response=original_exception.response, - ) - elif original_exception.status_code == 500: - exception_mapping_worked = True - raise ServiceUnavailableError( - message=f"AlephAlphaException - {original_exception.message}", - llm_provider="aleph_alpha", - model=model, - response=original_exception.response, - ) - raise original_exception - raise original_exception - elif ( - custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat" - ): - if isinstance(original_exception, dict): - error_str = original_exception.get("error", "") - else: - error_str = str(original_exception) - if "no such file or directory" in error_str: - exception_mapping_worked = True - raise BadRequestError( - message=f"OllamaException: Invalid Model/Model not loaded - {original_exception}", - model=model, - llm_provider="ollama", - response=original_exception.response, - ) - elif "Failed to establish a new connection" in error_str: - exception_mapping_worked = True - raise ServiceUnavailableError( - message=f"OllamaException: {original_exception}", - llm_provider="ollama", - model=model, - response=original_exception.response, - ) - elif "Invalid response object from API" in error_str: - exception_mapping_worked = True - raise BadRequestError( - message=f"OllamaException: {original_exception}", - llm_provider="ollama", - model=model, - response=original_exception.response, - ) - elif "Read timed out" in error_str: - exception_mapping_worked = True - raise Timeout( - message=f"OllamaException: {original_exception}", - llm_provider="ollama", - model=model, - ) - elif custom_llm_provider == "vllm": - if hasattr(original_exception, "status_code"): - if original_exception.status_code == 0: - exception_mapping_worked = True - raise APIConnectionError( - message=f"VLLMException - {original_exception.message}", - llm_provider="vllm", - model=model, - request=original_exception.request, - ) - elif custom_llm_provider == "azure" or custom_llm_provider == "azure_text": - message = get_error_message(error_obj=original_exception) - if message is None: - if hasattr(original_exception, "message"): - message = original_exception.message - else: - message = str(original_exception) - - if "Internal server error" in error_str: - exception_mapping_worked = True - raise litellm.InternalServerError( - message=f"AzureException Internal server error - {message}", - llm_provider="azure", - model=model, - litellm_debug_info=extra_information, - ) - elif "This model's maximum context length is" in error_str: - exception_mapping_worked = True - raise ContextWindowExceededError( - message=f"AzureException ContextWindowExceededError - {message}", - llm_provider="azure", - model=model, - litellm_debug_info=extra_information, - ) - elif "DeploymentNotFound" in error_str: - exception_mapping_worked = True - raise NotFoundError( - message=f"AzureException NotFoundError - {message}", - llm_provider="azure", - model=model, - litellm_debug_info=extra_information, - ) - elif ( - ( - "invalid_request_error" in error_str - and "content_policy_violation" in error_str - ) - or ( - "The response was filtered due to the prompt triggering Azure OpenAI's content management" - in error_str - ) - or "Your task failed as a result of our safety system" in error_str - or "The model produced invalid content" in error_str - or "content_filter_policy" in error_str - ): - exception_mapping_worked = True - raise ContentPolicyViolationError( - message=f"litellm.ContentPolicyViolationError: AzureException - {message}", - llm_provider="azure", - model=model, - litellm_debug_info=extra_information, - ) - elif "invalid_request_error" in error_str: - exception_mapping_worked = True - raise BadRequestError( - message=f"AzureException BadRequestError - {message}", - llm_provider="azure", - model=model, - litellm_debug_info=extra_information, - ) - elif ( - "The api_key client option must be set either by passing api_key to the client or by setting" - in error_str - ): - exception_mapping_worked = True - raise AuthenticationError( - message=f"{exception_provider} AuthenticationError - {message}", - llm_provider=custom_llm_provider, - model=model, - litellm_debug_info=extra_information, - ) - elif "Connection error" in error_str: - exception_mapping_worked = True - raise APIConnectionError( - message=f"{exception_provider} APIConnectionError - {message}", - llm_provider=custom_llm_provider, - model=model, - litellm_debug_info=extra_information, - ) - elif hasattr(original_exception, "status_code"): - exception_mapping_worked = True - if original_exception.status_code == 400: - exception_mapping_worked = True - raise BadRequestError( - message=f"AzureException - {message}", - llm_provider="azure", - model=model, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 401: - exception_mapping_worked = True - raise AuthenticationError( - message=f"AzureException AuthenticationError - {message}", - llm_provider="azure", - model=model, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 408: - exception_mapping_worked = True - raise Timeout( - message=f"AzureException Timeout - {message}", - model=model, - litellm_debug_info=extra_information, - llm_provider="azure", - ) - elif original_exception.status_code == 422: - exception_mapping_worked = True - raise BadRequestError( - message=f"AzureException BadRequestError - {message}", - model=model, - llm_provider="azure", - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 429: - exception_mapping_worked = True - raise RateLimitError( - message=f"AzureException RateLimitError - {message}", - model=model, - llm_provider="azure", - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 503: - exception_mapping_worked = True - raise ServiceUnavailableError( - message=f"AzureException ServiceUnavailableError - {message}", - model=model, - llm_provider="azure", - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 504: # gateway timeout error - exception_mapping_worked = True - raise Timeout( - message=f"AzureException Timeout - {message}", - model=model, - litellm_debug_info=extra_information, - llm_provider="azure", - ) - else: - exception_mapping_worked = True - raise APIError( - status_code=original_exception.status_code, - message=f"AzureException APIError - {message}", - llm_provider="azure", - litellm_debug_info=extra_information, - model=model, - request=httpx.Request( - method="POST", url="https://openai.com/" - ), - ) - else: - # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors - raise APIConnectionError( - message=f"{exception_provider} APIConnectionError - {message}\n{traceback.format_exc()}", - llm_provider="azure", - model=model, - litellm_debug_info=extra_information, - request=httpx.Request(method="POST", url="https://openai.com/"), - ) - if custom_llm_provider == "openrouter": - if hasattr(original_exception, "status_code"): - exception_mapping_worked = True - if original_exception.status_code == 400: - exception_mapping_worked = True - raise BadRequestError( - message=f"{exception_provider} - {error_str}", - llm_provider=custom_llm_provider, - model=model, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 401: - exception_mapping_worked = True - raise AuthenticationError( - message=f"AuthenticationError: {exception_provider} - {error_str}", - llm_provider=custom_llm_provider, - model=model, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 404: - exception_mapping_worked = True - raise NotFoundError( - message=f"NotFoundError: {exception_provider} - {error_str}", - model=model, - llm_provider=custom_llm_provider, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 408: - exception_mapping_worked = True - raise Timeout( - message=f"Timeout Error: {exception_provider} - {error_str}", - model=model, - llm_provider=custom_llm_provider, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 422: - exception_mapping_worked = True - raise BadRequestError( - message=f"BadRequestError: {exception_provider} - {error_str}", - model=model, - llm_provider=custom_llm_provider, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 429: - exception_mapping_worked = True - raise RateLimitError( - message=f"RateLimitError: {exception_provider} - {error_str}", - model=model, - llm_provider=custom_llm_provider, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 503: - exception_mapping_worked = True - raise ServiceUnavailableError( - message=f"ServiceUnavailableError: {exception_provider} - {error_str}", - model=model, - llm_provider=custom_llm_provider, - response=original_exception.response, - litellm_debug_info=extra_information, - ) - elif original_exception.status_code == 504: # gateway timeout error - exception_mapping_worked = True - raise Timeout( - message=f"Timeout Error: {exception_provider} - {error_str}", - model=model, - llm_provider=custom_llm_provider, - litellm_debug_info=extra_information, - ) - else: - exception_mapping_worked = True - raise APIError( - status_code=original_exception.status_code, - message=f"APIError: {exception_provider} - {error_str}", - llm_provider=custom_llm_provider, - model=model, - request=original_exception.request, - litellm_debug_info=extra_information, - ) - else: - # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors - raise APIConnectionError( - message=f"APIConnectionError: {exception_provider} - {error_str}", - llm_provider=custom_llm_provider, - model=model, - litellm_debug_info=extra_information, - request=httpx.Request( - method="POST", url="https://api.openai.com/v1/" - ), - ) - if ( - "BadRequestError.__init__() missing 1 required positional argument: 'param'" - in str(original_exception) - ): # deal with edge-case invalid request error bug in openai-python sdk - exception_mapping_worked = True - raise BadRequestError( - message=f"{exception_provider} BadRequestError : This can happen due to missing AZURE_API_VERSION: {str(original_exception)}", - model=model, - llm_provider=custom_llm_provider, - response=getattr(original_exception, "response", None), - ) - else: # ensure generic errors always return APIConnectionError= - """ - For unmapped exceptions - raise the exception with traceback - https://github.com/BerriAI/litellm/issues/4201 - """ - exception_mapping_worked = True - if hasattr(original_exception, "request"): - raise APIConnectionError( - message="{} - {}".format(exception_provider, error_str), - llm_provider=custom_llm_provider, - model=model, - request=original_exception.request, - ) - else: - raise APIConnectionError( - message="{}\n{}".format( - str(original_exception), traceback.format_exc() - ), - llm_provider=custom_llm_provider, - model=model, - request=httpx.Request( - method="POST", url="https://api.openai.com/v1/" - ), # stub the request - ) - except Exception as e: - # LOGGING - exception_logging( - logger_fn=user_logger_fn, - additional_args={ - "exception_mapping_worked": exception_mapping_worked, - "original_exception": original_exception, - }, - exception=e, - ) - ## AUTH ERROR - if isinstance(e, AuthenticationError) and ( - litellm.email or "LITELLM_EMAIL" in os.environ - ): - threading.Thread(target=get_all_keys, args=(e.llm_provider,)).start() - # don't let an error with mapping interrupt the user from receiving an error from the llm api calls - if exception_mapping_worked: - setattr(e, "litellm_response_headers", litellm_response_headers) - raise e - else: - for error_type in litellm.LITELLM_EXCEPTION_TYPES: - if isinstance(e, error_type): - setattr(e, "litellm_response_headers", litellm_response_headers) - raise e # it's already mapped - raised_exc = APIConnectionError( - message="{}\n{}".format(original_exception, traceback.format_exc()), - llm_provider="", - model="", - ) - setattr(raised_exc, "litellm_response_headers", _response_headers) - raise raised_exc - - ######## Streaming Class ############################ # wraps the completion stream to return the correct format for the model # replicate/anthropic/cohere @@ -11166,3 +9213,15 @@ def is_base64_encoded(s: str) -> bool: return base64.b64encode(decoded_bytes).decode("utf-8") == s except Exception: return False + + +def has_tool_call_blocks(messages: List[AllMessageValues]) -> bool: + """ + Returns true, if messages has tool call blocks. + + Used for anthropic/bedrock message validation. + """ + for message in messages: + if message.get("tool_calls") is not None: + return True + return False diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 48b2a9322..e698fc5ba 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1173,6 +1173,18 @@ "supports_function_calling": true, "supports_assistant_prefill": true }, + "mistral/pixtral-12b-2409": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000015, + "litellm_provider": "mistral", + "mode": "chat", + "supports_function_calling": true, + "supports_assistant_prefill": true, + "supports_vision": true + }, "mistral/open-mistral-7b": { "max_tokens": 8191, "max_input_tokens": 32000, diff --git a/litellm/tests/test_anthropic_completion.py b/tests/llm_translation/test_anthropic_completion.py similarity index 98% rename from litellm/tests/test_anthropic_completion.py rename to tests/llm_translation/test_anthropic_completion.py index b8ccf716e..2d5dd570a 100644 --- a/litellm/tests/test_anthropic_completion.py +++ b/tests/llm_translation/test_anthropic_completion.py @@ -25,7 +25,12 @@ from unittest.mock import MagicMock, patch import pytest import litellm -from litellm import AnthropicConfig, Router, adapter_completion +from litellm import ( + AnthropicConfig, + Router, + adapter_completion, + AnthropicExperimentalPassThroughConfig, +) from litellm.adapters.anthropic_adapter import anthropic_adapter from litellm.types.llms.anthropic import AnthropicResponse @@ -33,7 +38,7 @@ from litellm.types.llms.anthropic import AnthropicResponse def test_anthropic_completion_messages_translation(): messages = [{"role": "user", "content": "Hey, how's it going?"}] - translated_messages = AnthropicConfig().translate_anthropic_messages_to_openai(messages=messages) # type: ignore + translated_messages = AnthropicExperimentalPassThroughConfig().translate_anthropic_messages_to_openai(messages=messages) # type: ignore assert translated_messages == [{"role": "user", "content": "Hey, how's it going?"}] diff --git a/tests/llm_translation/test_databricks.py b/tests/llm_translation/test_databricks.py index b3bd92d8d..d5cd1135c 100644 --- a/tests/llm_translation/test_databricks.py +++ b/tests/llm_translation/test_databricks.py @@ -5,7 +5,11 @@ import pytest import sys from typing import Any, Dict, List from unittest.mock import MagicMock, Mock, patch +import os +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path import litellm from litellm.exceptions import BadRequestError from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler