diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2d85031b5..74f165bdd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,10 +24,10 @@ repos: language: system types: [python] files: ^litellm/ - # - id: check-file-length - # name: Check file length - # entry: python check_file_length.py - # args: ["10000"] # set your desired maximum number of lines - # language: python - # files: litellm/.*\.py - # exclude: ^litellm/tests/ \ No newline at end of file + - id: check-file-length + name: Check file length + entry: python check_file_length.py + args: ["10000"] # set your desired maximum number of lines + language: python + files: litellm/.*\.py + exclude: ^litellm/tests/ \ No newline at end of file diff --git a/litellm/__init__.py b/litellm/__init__.py index 6ecf70d0d..353d7ac5b 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -715,6 +715,7 @@ openai_image_generation_models = ["dall-e-2", "dall-e-3"] from .timeout import timeout from .cost_calculator import completion_cost +from litellm.litellm_core_utils.litellm_logging import Logging from .utils import ( client, exception_type, @@ -723,12 +724,10 @@ from .utils import ( token_counter, create_pretrained_tokenizer, create_tokenizer, - cost_per_token, supports_function_calling, supports_parallel_function_calling, supports_vision, get_litellm_params, - Logging, acreate, get_model_list, get_max_tokens, @@ -748,9 +747,10 @@ from .utils import ( get_first_chars_messages, ModelResponse, ImageResponse, - ImageObject, get_provider_fields, ) + +from .types.utils import ImageObject from .llms.huggingface_restapi import HuggingfaceConfig from .llms.anthropic import AnthropicConfig from .llms.databricks import DatabricksConfig, DatabricksEmbeddingConfig @@ -827,4 +827,4 @@ from .router import Router from .assistants.main import * from .batches.main import * from .scheduler import * -from .cost_calculator import response_cost_calculator +from .cost_calculator import response_cost_calculator, cost_per_token diff --git a/litellm/_logging.py b/litellm/_logging.py index ab7a08f97..52a445b49 100644 --- a/litellm/_logging.py +++ b/litellm/_logging.py @@ -3,10 +3,17 @@ from logging import Formatter import traceback set_verbose = False + +if set_verbose is True: + logging.warning( + "`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs." + ) json_logs = bool(os.getenv("JSON_LOGS", False)) # Create a handler for the logger (you may need to adapt this based on your needs) +log_level = os.getenv("LITELLM_LOG", "ERROR") +numeric_level: str = getattr(logging, log_level.upper()) handler = logging.StreamHandler() -handler.setLevel(logging.DEBUG) +handler.setLevel(numeric_level) class JsonFormatter(Formatter): diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index d1e2dab52..c84df53e8 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -1,6 +1,6 @@ # What is this? ## File for 'response_cost' calculation in Logging -from typing import Optional, Union, Literal, List +from typing import Optional, Union, Literal, List, Tuple import litellm._logging from litellm.utils import ( ModelResponse, @@ -9,7 +9,6 @@ from litellm.utils import ( TranscriptionResponse, TextCompletionResponse, CallTypes, - cost_per_token, print_verbose, CostPerToken, token_counter, @@ -18,6 +17,224 @@ import litellm from litellm import verbose_logger +def _cost_per_token_custom_pricing_helper( + prompt_tokens=0, + completion_tokens=0, + response_time_ms=None, + ### CUSTOM PRICING ### + custom_cost_per_token: Optional[CostPerToken] = None, + custom_cost_per_second: Optional[float] = None, +) -> Optional[Tuple[float, float]]: + """Internal helper function for calculating cost, if custom pricing given""" + if custom_cost_per_token is None and custom_cost_per_second is None: + return None + + if custom_cost_per_token is not None: + input_cost = custom_cost_per_token["input_cost_per_token"] * prompt_tokens + output_cost = custom_cost_per_token["output_cost_per_token"] * completion_tokens + return input_cost, output_cost + elif custom_cost_per_second is not None: + output_cost = custom_cost_per_second * response_time_ms / 1000 # type: ignore + return 0, output_cost + + return None + + +def cost_per_token( + model: str = "", + prompt_tokens=0, + completion_tokens=0, + response_time_ms=None, + custom_llm_provider=None, + region_name=None, + ### CUSTOM PRICING ### + custom_cost_per_token: Optional[CostPerToken] = None, + custom_cost_per_second: Optional[float] = None, +) -> Tuple[float, float]: + """ + Calculates the cost per token for a given model, prompt tokens, and completion tokens. + + Parameters: + model (str): The name of the model to use. Default is "" + prompt_tokens (int): The number of tokens in the prompt. + completion_tokens (int): The number of tokens in the completion. + response_time (float): The amount of time, in milliseconds, it took the call to complete. + custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list) + custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call. + custom_cost_per_second: Optional[float]: the cost per second for the llm api call. + + Returns: + tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively. + """ + if model is None: + raise Exception("Invalid arg. Model cannot be none.") + ## CUSTOM PRICING ## + response_cost = _cost_per_token_custom_pricing_helper( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + response_time_ms=response_time_ms, + custom_cost_per_second=custom_cost_per_second, + custom_cost_per_token=custom_cost_per_token, + ) + if response_cost is not None: + return response_cost[0], response_cost[1] + + # given + prompt_tokens_cost_usd_dollar: float = 0 + completion_tokens_cost_usd_dollar: float = 0 + model_cost_ref = litellm.model_cost + model_with_provider = model + if custom_llm_provider is not None: + model_with_provider = custom_llm_provider + "/" + model + if region_name is not None: + model_with_provider_and_region = ( + f"{custom_llm_provider}/{region_name}/{model}" + ) + if ( + model_with_provider_and_region in model_cost_ref + ): # use region based pricing, if it's available + model_with_provider = model_with_provider_and_region + + model_without_prefix = model + model_parts = model.split("/") + if len(model_parts) > 1: + model_without_prefix = model_parts[1] + else: + model_without_prefix = model + """ + Code block that formats model to lookup in litellm.model_cost + Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1 + Option2. model = "openai/gpt-4" - model = provider/model + Option3. model = "anthropic.claude-3" - model = model + """ + if ( + model_with_provider in model_cost_ref + ): # Option 2. use model with provider, model = "openai/gpt-4" + model = model_with_provider + elif model in model_cost_ref: # Option 1. use model passed, model="gpt-4" + model = model + elif ( + model_without_prefix in model_cost_ref + ): # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3" + model = model_without_prefix + + # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models + print_verbose(f"Looking up model={model} in model_cost_map") + if model in model_cost_ref: + print_verbose(f"Success: model={model} in model_cost_map") + print_verbose( + f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}" + ) + if ( + model_cost_ref[model].get("input_cost_per_token", None) is not None + and model_cost_ref[model].get("output_cost_per_token", None) is not None + ): + ## COST PER TOKEN ## + prompt_tokens_cost_usd_dollar = ( + model_cost_ref[model]["input_cost_per_token"] * prompt_tokens + ) + completion_tokens_cost_usd_dollar = ( + model_cost_ref[model]["output_cost_per_token"] * completion_tokens + ) + elif ( + model_cost_ref[model].get("output_cost_per_second", None) is not None + and response_time_ms is not None + ): + print_verbose( + f"For model={model} - output_cost_per_second: {model_cost_ref[model].get('output_cost_per_second')}; response time: {response_time_ms}" + ) + ## COST PER SECOND ## + prompt_tokens_cost_usd_dollar = 0 + completion_tokens_cost_usd_dollar = ( + model_cost_ref[model]["output_cost_per_second"] + * response_time_ms + / 1000 + ) + elif ( + model_cost_ref[model].get("input_cost_per_second", None) is not None + and response_time_ms is not None + ): + print_verbose( + f"For model={model} - input_cost_per_second: {model_cost_ref[model].get('input_cost_per_second')}; response time: {response_time_ms}" + ) + ## COST PER SECOND ## + prompt_tokens_cost_usd_dollar = ( + model_cost_ref[model]["input_cost_per_second"] * response_time_ms / 1000 + ) + completion_tokens_cost_usd_dollar = 0.0 + print_verbose( + f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}" + ) + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + elif "ft:gpt-3.5-turbo" in model: + print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM") + # fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm + prompt_tokens_cost_usd_dollar = ( + model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens + ) + completion_tokens_cost_usd_dollar = ( + model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"] + * completion_tokens + ) + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + elif "ft:davinci-002" in model: + print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM") + # fuzzy match ft:davinci-002:abcd-id-cool-litellm + prompt_tokens_cost_usd_dollar = ( + model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens + ) + completion_tokens_cost_usd_dollar = ( + model_cost_ref["ft:davinci-002"]["output_cost_per_token"] + * completion_tokens + ) + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + elif "ft:babbage-002" in model: + print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM") + # fuzzy match ft:babbage-002:abcd-id-cool-litellm + prompt_tokens_cost_usd_dollar = ( + model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens + ) + completion_tokens_cost_usd_dollar = ( + model_cost_ref["ft:babbage-002"]["output_cost_per_token"] + * completion_tokens + ) + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + elif model in litellm.azure_llms: + verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM") + model = litellm.azure_llms[model] + verbose_logger.debug( + f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}" + ) + prompt_tokens_cost_usd_dollar = ( + model_cost_ref[model]["input_cost_per_token"] * prompt_tokens + ) + verbose_logger.debug( + f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}" + ) + completion_tokens_cost_usd_dollar = ( + model_cost_ref[model]["output_cost_per_token"] * completion_tokens + ) + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + elif model in litellm.azure_embedding_models: + verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model") + model = litellm.azure_embedding_models[model] + prompt_tokens_cost_usd_dollar = ( + model_cost_ref[model]["input_cost_per_token"] * prompt_tokens + ) + completion_tokens_cost_usd_dollar = ( + model_cost_ref[model]["output_cost_per_token"] * completion_tokens + ) + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + else: + # if model is not in model_prices_and_context_window.json. Raise an exception-let users know + error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n" + raise litellm.exceptions.NotFoundError( # type: ignore + message=error_str, + model=model, + llm_provider="", + ) + + # Extract the number of billion parameters from the model name # only used for together_computer LLMs def get_model_params_and_category(model_name) -> str: diff --git a/litellm/litellm_core_utils/core_helpers.py b/litellm/litellm_core_utils/core_helpers.py new file mode 100644 index 000000000..7b911895d --- /dev/null +++ b/litellm/litellm_core_utils/core_helpers.py @@ -0,0 +1,41 @@ +# What is this? +## Helper utilities for the model response objects + + +def map_finish_reason( + finish_reason: str, +): # openai supports 5 stop sequences - 'stop', 'length', 'function_call', 'content_filter', 'null' + # anthropic mapping + if finish_reason == "stop_sequence": + return "stop" + # cohere mapping - https://docs.cohere.com/reference/generate + elif finish_reason == "COMPLETE": + return "stop" + elif finish_reason == "MAX_TOKENS": # cohere + vertex ai + return "length" + elif finish_reason == "ERROR_TOXIC": + return "content_filter" + elif ( + finish_reason == "ERROR" + ): # openai currently doesn't support an 'error' finish reason + return "stop" + # huggingface mapping https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/generate_stream + elif finish_reason == "eos_token" or finish_reason == "stop_sequence": + return "stop" + elif ( + finish_reason == "FINISH_REASON_UNSPECIFIED" or finish_reason == "STOP" + ): # vertex ai - got from running `print(dir(response_obj.candidates[0].finish_reason))`: ['FINISH_REASON_UNSPECIFIED', 'MAX_TOKENS', 'OTHER', 'RECITATION', 'SAFETY', 'STOP',] + return "stop" + elif finish_reason == "SAFETY": # vertex ai + return "content_filter" + elif finish_reason == "STOP": # vertex ai + return "stop" + elif finish_reason == "end_turn" or finish_reason == "stop_sequence": # anthropic + return "stop" + elif finish_reason == "max_tokens": # anthropic + return "length" + elif finish_reason == "tool_use": # anthropic + return "tool_calls" + elif finish_reason == "content_filtered": + return "content_filter" + return finish_reason diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py new file mode 100644 index 000000000..f99303abb --- /dev/null +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -0,0 +1,1780 @@ +# What is this? +## Common Utility file for Logging handler +# Logging function -> log the exact model details + what's being sent | Non-Blocking +from litellm.types.utils import CallTypes +from typing import Optional +import datetime +from litellm import ( + verbose_logger, + json_logs, + log_raw_request_response, + turn_off_message_logging, +) +import traceback +import litellm +import copy +import sys +import uuid +import os +from litellm.integrations.custom_logger import CustomLogger +import json +import time +from litellm.litellm_core_utils.redact_messages import ( + redact_message_input_output_from_logging, +) +from litellm.utils import ( + _get_base_model_from_metadata, + supabaseClient, + liteDebuggerClient, + promptLayerLogger, + weightsBiasesLogger, + langsmithLogger, + logfireLogger, + capture_exception, + add_breadcrumb, + lunaryLogger, + prometheusLogger, + print_verbose, + customLogger, + prompt_token_calculator, +) +from litellm.types.utils import ( + ModelResponse, + EmbeddingResponse, + ImageResponse, + TranscriptionResponse, + TextCompletionResponse, +) +import subprocess +from ..integrations.traceloop import TraceloopLogger +from ..integrations.athina import AthinaLogger +from ..integrations.helicone import HeliconeLogger +from ..integrations.aispend import AISpendLogger +from ..integrations.berrispend import BerriSpendLogger +from ..integrations.supabase import Supabase +from ..integrations.lunary import LunaryLogger +from ..integrations.prompt_layer import PromptLayerLogger +from ..integrations.langsmith import LangsmithLogger +from ..integrations.logfire_logger import LogfireLogger, LogfireLevel +from ..integrations.weights_biases import WeightsBiasesLogger +from ..integrations.custom_logger import CustomLogger +from ..integrations.langfuse import LangFuseLogger +from ..integrations.openmeter import OpenMeterLogger +from ..integrations.lago import LagoLogger +from ..integrations.datadog import DataDogLogger +from ..integrations.prometheus import PrometheusLogger +from ..integrations.prometheus_services import PrometheusServicesLogger +from ..integrations.dynamodb import DyanmoDBLogger +from ..integrations.s3 import S3Logger +from ..integrations.clickhouse import ClickhouseLogger +from ..integrations.greenscale import GreenscaleLogger +from ..integrations.litedebugger import LiteDebugger + + +class Logging: + global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app + custom_pricing: bool = False + stream_options = None + + def __init__( + self, + model, + messages, + stream, + call_type, + start_time, + litellm_call_id, + function_id, + dynamic_success_callbacks=None, + dynamic_failure_callbacks=None, + dynamic_async_success_callbacks=None, + langfuse_public_key=None, + langfuse_secret=None, + ): + if call_type not in [item.value for item in CallTypes]: + allowed_values = ", ".join([item.value for item in CallTypes]) + raise ValueError( + f"Invalid call_type {call_type}. Allowed values: {allowed_values}" + ) + if messages is not None: + if isinstance(messages, str): + messages = [ + {"role": "user", "content": messages} + ] # convert text completion input to the chat completion format + elif ( + isinstance(messages, list) + and len(messages) > 0 + and isinstance(messages[0], str) + ): + new_messages = [] + for m in messages: + new_messages.append({"role": "user", "content": m}) + messages = new_messages + self.model = model + self.messages = messages + self.stream = stream + self.start_time = start_time # log the call start time + self.call_type = call_type + self.litellm_call_id = litellm_call_id + self.function_id = function_id + self.streaming_chunks = [] # for generating complete stream response + self.sync_streaming_chunks = [] # for generating complete stream response + self.model_call_details = {} + self.dynamic_input_callbacks = [] # [TODO] callbacks set for just that call + self.dynamic_failure_callbacks = dynamic_failure_callbacks + self.dynamic_success_callbacks = ( + dynamic_success_callbacks # callbacks set for just that call + ) + self.dynamic_async_success_callbacks = ( + dynamic_async_success_callbacks # callbacks set for just that call + ) + ## DYNAMIC LANGFUSE KEYS ## + self.langfuse_public_key = langfuse_public_key + self.langfuse_secret = langfuse_secret + ## TIME TO FIRST TOKEN LOGGING ## + self.completion_start_time: Optional[datetime.datetime] = None + + def update_environment_variables( + self, model, user, optional_params, litellm_params, **additional_params + ): + self.optional_params = optional_params + self.model = model + self.user = user + self.litellm_params = litellm_params + self.logger_fn = litellm_params.get("logger_fn", None) + verbose_logger.debug(f"self.optional_params: {self.optional_params}") + + self.model_call_details = { + "model": self.model, + "messages": self.messages, + "optional_params": self.optional_params, + "litellm_params": self.litellm_params, + "start_time": self.start_time, + "stream": self.stream, + "user": user, + "call_type": str(self.call_type), + "litellm_call_id": self.litellm_call_id, + "completion_start_time": self.completion_start_time, + **self.optional_params, + **additional_params, + } + + ## check if stream options is set ## - used by CustomStreamWrapper for easy instrumentation + if "stream_options" in additional_params: + self.stream_options = additional_params["stream_options"] + ## check if custom pricing set ## + if ( + litellm_params.get("input_cost_per_token") is not None + or litellm_params.get("input_cost_per_second") is not None + or litellm_params.get("output_cost_per_token") is not None + or litellm_params.get("output_cost_per_second") is not None + ): + self.custom_pricing = True + + def _pre_call(self, input, api_key, model=None, additional_args={}): + """ + Common helper function across the sync + async pre-call function + """ + self.model_call_details["input"] = input + self.model_call_details["api_key"] = api_key + self.model_call_details["additional_args"] = additional_args + self.model_call_details["log_event_type"] = "pre_api_call" + if ( + model + ): # if model name was changes pre-call, overwrite the initial model call name with the new one + self.model_call_details["model"] = model + + def pre_call(self, input, api_key, model=None, additional_args={}): + # Log the exact input to the LLM API + litellm.error_logs["PRE_CALL"] = locals() + try: + self._pre_call( + input=input, + api_key=api_key, + model=model, + additional_args=additional_args, + ) + + # User Logging -> if you pass in a custom logging function + headers = additional_args.get("headers", {}) + if headers is None: + headers = {} + data = additional_args.get("complete_input_dict", {}) + api_base = additional_args.get("api_base", "") + self.model_call_details["litellm_params"]["api_base"] = str( + api_base + ) # used for alerting + masked_headers = { + k: ( + (v[:-44] + "*" * 44) + if (isinstance(v, str) and len(v) > 44) + else "*****" + ) + for k, v in headers.items() + } + formatted_headers = " ".join( + [f"-H '{k}: {v}'" for k, v in masked_headers.items()] + ) + + verbose_logger.debug(f"PRE-API-CALL ADDITIONAL ARGS: {additional_args}") + + curl_command = "\n\nPOST Request Sent from LiteLLM:\n" + curl_command += "curl -X POST \\\n" + curl_command += f"{api_base} \\\n" + curl_command += ( + f"{formatted_headers} \\\n" if formatted_headers.strip() != "" else "" + ) + curl_command += f"-d '{str(data)}'\n" + if additional_args.get("request_str", None) is not None: + # print the sagemaker / bedrock client request + curl_command = "\nRequest Sent from LiteLLM:\n" + curl_command += additional_args.get("request_str", None) + elif api_base == "": + curl_command = self.model_call_details + + if json_logs: + verbose_logger.debug( + "POST Request Sent from LiteLLM", + extra={"api_base": {api_base}, **masked_headers}, + ) + else: + verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n") + # log raw request to provider (like LangFuse) -- if opted in. + if log_raw_request_response is True: + try: + # [Non-blocking Extra Debug Information in metadata] + _litellm_params = self.model_call_details.get("litellm_params", {}) + _metadata = _litellm_params.get("metadata", {}) or {} + if ( + turn_off_message_logging is not None + and turn_off_message_logging is True + ): + _metadata["raw_request"] = ( + "redacted by litellm. \ + 'litellm.turn_off_message_logging=True'" + ) + else: + _metadata["raw_request"] = str(curl_command) + except Exception as e: + _metadata["raw_request"] = ( + "Unable to Log \ + raw request: {}".format( + str(e) + ) + ) + if self.logger_fn and callable(self.logger_fn): + try: + self.logger_fn( + self.model_call_details + ) # Expectation: any logger function passed in by the user should accept a dict object + except Exception as e: + verbose_logger.error( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {}\n{}".format( + str(e), traceback.format_exc() + ) + ) + # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made + callbacks = litellm.input_callback + self.dynamic_input_callbacks + for callback in callbacks: + try: + if callback == "supabase": + verbose_logger.debug("reaches supabase for logging!") + model = self.model_call_details["model"] + messages = self.model_call_details["input"] + verbose_logger.debug(f"supabaseClient: {supabaseClient}") + supabaseClient.input_log_event( + model=model, + messages=messages, + end_user=self.model_call_details.get("user", "default"), + litellm_call_id=self.litellm_params["litellm_call_id"], + print_verbose=print_verbose, + ) + elif callback == "sentry" and add_breadcrumb: + try: + details_to_log = copy.deepcopy(self.model_call_details) + except: + details_to_log = self.model_call_details + if litellm.turn_off_message_logging: + # make a copy of the _model_Call_details and log it + details_to_log.pop("messages", None) + details_to_log.pop("input", None) + details_to_log.pop("prompt", None) + + add_breadcrumb( + category="litellm.llm_call", + message=f"Model Call Details pre-call: {details_to_log}", + level="info", + ) + elif isinstance(callback, CustomLogger): # custom logger class + callback.log_pre_api_call( + model=self.model, + messages=self.messages, + kwargs=self.model_call_details, + ) + elif callable(callback): # custom logger functions + customLogger.log_input_event( + model=self.model, + messages=self.messages, + kwargs=self.model_call_details, + print_verbose=print_verbose, + callback_func=callback, + ) + except Exception as e: + verbose_logger.error( + "litellm.Logging.pre_call(): Exception occured - {}\n{}".format( + str(e), traceback.format_exc() + ) + ) + verbose_logger.debug( + f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" + ) + if capture_exception: # log this error to sentry for debugging + capture_exception(e) + except Exception: + verbose_logger.error( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {}\n{}".format( + str(e), traceback.format_exc() + ) + ) + verbose_logger.error( + f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" + ) + if capture_exception: # log this error to sentry for debugging + capture_exception(e) + + def post_call( + self, original_response, input=None, api_key=None, additional_args={} + ): + # Log the exact result from the LLM API, for streaming - log the type of response received + litellm.error_logs["POST_CALL"] = locals() + if isinstance(original_response, dict): + original_response = json.dumps(original_response) + try: + self.model_call_details["input"] = input + self.model_call_details["api_key"] = api_key + self.model_call_details["original_response"] = original_response + self.model_call_details["additional_args"] = additional_args + self.model_call_details["log_event_type"] = "post_api_call" + + verbose_logger.debug( + "RAW RESPONSE:\n{}\n\n".format( + self.model_call_details.get( + "original_response", self.model_call_details + ) + ), + ) + if self.logger_fn and callable(self.logger_fn): + try: + self.logger_fn( + self.model_call_details + ) # Expectation: any logger function passed in by the user should accept a dict object + except Exception as e: + verbose_logger.debug( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {}\n{}".format( + str(e), traceback.format_exc() + ) + ) + original_response = redact_message_input_output_from_logging( + litellm_logging_obj=self, result=original_response + ) + # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made + + callbacks = litellm.input_callback + self.dynamic_input_callbacks + for callback in callbacks: + try: + if callback == "sentry" and add_breadcrumb: + verbose_logger.debug("reaches sentry breadcrumbing") + try: + details_to_log = copy.deepcopy(self.model_call_details) + except: + details_to_log = self.model_call_details + if litellm.turn_off_message_logging: + # make a copy of the _model_Call_details and log it + details_to_log.pop("messages", None) + details_to_log.pop("input", None) + details_to_log.pop("prompt", None) + + add_breadcrumb( + category="litellm.llm_call", + message=f"Model Call Details post-call: {details_to_log}", + level="info", + ) + elif isinstance(callback, CustomLogger): # custom logger class + callback.log_post_api_call( + kwargs=self.model_call_details, + response_obj=None, + start_time=self.start_time, + end_time=None, + ) + except Exception as e: + verbose_logger.error( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while post-call logging with integrations {}\n{}".format( + str(e), traceback.format_exc() + ) + ) + verbose_logger.debug( + f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" + ) + if capture_exception: # log this error to sentry for debugging + capture_exception(e) + except Exception as e: + verbose_logger.error( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {}\n{}".format( + str(e), traceback.format_exc() + ) + ) + + def _success_handler_helper_fn( + self, result=None, start_time=None, end_time=None, cache_hit=None + ): + try: + if start_time is None: + start_time = self.start_time + if end_time is None: + end_time = datetime.datetime.now() + if self.completion_start_time is None: + self.completion_start_time = end_time + self.model_call_details["completion_start_time"] = ( + self.completion_start_time + ) + self.model_call_details["log_event_type"] = "successful_api_call" + self.model_call_details["end_time"] = end_time + self.model_call_details["cache_hit"] = cache_hit + ## if model in model cost map - log the response cost + ## else set cost to None + verbose_logger.debug(f"Model={self.model};") + if ( + result is not None + and ( + isinstance(result, ModelResponse) + or isinstance(result, EmbeddingResponse) + or isinstance(result, ImageResponse) + or isinstance(result, TranscriptionResponse) + or isinstance(result, TextCompletionResponse) + ) + and self.stream != True + ): # handle streaming separately + self.model_call_details["response_cost"] = ( + litellm.response_cost_calculator( + response_object=result, + model=self.model, + cache_hit=self.model_call_details.get("cache_hit", False), + custom_llm_provider=self.model_call_details.get( + "custom_llm_provider", None + ), + base_model=_get_base_model_from_metadata( + model_call_details=self.model_call_details + ), + call_type=self.call_type, + optional_params=self.optional_params, + ) + ) + else: # streaming chunks + image gen. + self.model_call_details["response_cost"] = None + + if ( + litellm.max_budget + and self.stream == False + and result is not None + and "content" in result + ): + time_diff = (end_time - start_time).total_seconds() + float_diff = float(time_diff) + litellm._current_cost += litellm.completion_cost( + model=self.model, + prompt="", + completion=result["content"], + total_time=float_diff, + ) + + return start_time, end_time, result + except Exception as e: + raise Exception(f"[Non-Blocking] LiteLLM.Success_Call Error: {str(e)}") + + def success_handler( + self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs + ): + verbose_logger.debug( + f"Logging Details LiteLLM-Success Call: Cache_hit={cache_hit}" + ) + start_time, end_time, result = self._success_handler_helper_fn( + start_time=start_time, + end_time=end_time, + result=result, + cache_hit=cache_hit, + ) + # print(f"original response in success handler: {self.model_call_details['original_response']}") + try: + verbose_logger.debug(f"success callbacks: {litellm.success_callback}") + ## BUILD COMPLETE STREAMED RESPONSE + complete_streaming_response = None + if self.stream and isinstance(result, ModelResponse): + if ( + result.choices[0].finish_reason is not None + ): # if it's the last chunk + self.sync_streaming_chunks.append(result) + # print_verbose(f"final set of received chunks: {self.sync_streaming_chunks}") + try: + complete_streaming_response = litellm.stream_chunk_builder( + self.sync_streaming_chunks, + messages=self.model_call_details.get("messages", None), + start_time=start_time, + end_time=end_time, + ) + except Exception as e: + verbose_logger.error( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format( + str(e), traceback.format_exc() + ), + log_level="ERROR", + ) + complete_streaming_response = None + else: + self.sync_streaming_chunks.append(result) + + if complete_streaming_response is not None: + verbose_logger.debug( + f"Logging Details LiteLLM-Success Call streaming complete" + ) + self.model_call_details["complete_streaming_response"] = ( + complete_streaming_response + ) + self.model_call_details["response_cost"] = ( + litellm.response_cost_calculator( + response_object=complete_streaming_response, + model=self.model, + cache_hit=self.model_call_details.get("cache_hit", False), + custom_llm_provider=self.model_call_details.get( + "custom_llm_provider", None + ), + base_model=_get_base_model_from_metadata( + model_call_details=self.model_call_details + ), + call_type=self.call_type, + optional_params=self.optional_params, + ) + ) + if self.dynamic_success_callbacks is not None and isinstance( + self.dynamic_success_callbacks, list + ): + callbacks = self.dynamic_success_callbacks + ## keep the internal functions ## + for callback in litellm.success_callback: + if ( + isinstance(callback, CustomLogger) + and "_PROXY_" in callback.__class__.__name__ + ): + callbacks.append(callback) + else: + callbacks = litellm.success_callback + + result = redact_message_input_output_from_logging( + result=result, litellm_logging_obj=self + ) + + for callback in callbacks: + try: + litellm_params = self.model_call_details.get("litellm_params", {}) + if litellm_params.get("no-log", False) == True: + # proxy cost tracking cal backs should run + if not ( + isinstance(callback, CustomLogger) + and "_PROXY_" in callback.__class__.__name__ + ): + print_verbose("no-log request, skipping logging") + continue + if callback == "lite_debugger": + print_verbose("reaches lite_debugger for logging!") + print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") + print_verbose( + f"liteDebuggerClient details function {self.call_type} and stream set to {self.stream}" + ) + liteDebuggerClient.log_event( + end_user=kwargs.get("user", "default"), + response_obj=result, + start_time=start_time, + end_time=end_time, + litellm_call_id=self.litellm_call_id, + print_verbose=print_verbose, + call_type=self.call_type, + stream=self.stream, + ) + if callback == "promptlayer": + print_verbose("reaches promptlayer for logging!") + promptLayerLogger.log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + if callback == "supabase": + print_verbose("reaches supabase for logging!") + kwargs = self.model_call_details + + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + if "complete_streaming_response" not in kwargs: + continue + else: + print_verbose("reaches supabase for streaming logging!") + result = kwargs["complete_streaming_response"] + + model = kwargs["model"] + messages = kwargs["messages"] + optional_params = kwargs.get("optional_params", {}) + litellm_params = kwargs.get("litellm_params", {}) + supabaseClient.log_event( + model=model, + messages=messages, + end_user=optional_params.get("user", "default"), + response_obj=result, + start_time=start_time, + end_time=end_time, + litellm_call_id=litellm_params.get( + "litellm_call_id", str(uuid.uuid4()) + ), + print_verbose=print_verbose, + ) + if callback == "wandb": + print_verbose("reaches wandb for logging!") + weightsBiasesLogger.log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + if callback == "langsmith": + print_verbose("reaches langsmith for logging!") + if self.stream: + if "complete_streaming_response" not in kwargs: + continue + else: + print_verbose( + "reaches langsmith for streaming logging!" + ) + result = kwargs["complete_streaming_response"] + langsmithLogger.log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + if callback == "logfire": + global logfireLogger + verbose_logger.debug("reaches logfire for success logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + if "complete_streaming_response" not in kwargs: + continue + else: + print_verbose("reaches logfire for streaming logging!") + result = kwargs["complete_streaming_response"] + + logfireLogger.log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + level=LogfireLevel.INFO.value, + ) + + if callback == "lunary": + print_verbose("reaches lunary for logging!") + model = self.model + kwargs = self.model_call_details + + input = kwargs.get("messages", kwargs.get("input", None)) + + type = ( + "embed" + if self.call_type == CallTypes.embedding.value + else "llm" + ) + + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + if "complete_streaming_response" not in kwargs: + continue + else: + result = kwargs["complete_streaming_response"] + + lunaryLogger.log_event( + type=type, + kwargs=kwargs, + event="end", + model=model, + input=input, + user_id=kwargs.get("user", None), + # user_props=self.model_call_details.get("user_props", None), + extra=kwargs.get("optional_params", {}), + response_obj=result, + start_time=start_time, + end_time=end_time, + run_id=self.litellm_call_id, + print_verbose=print_verbose, + ) + if callback == "helicone": + print_verbose("reaches helicone for logging!") + model = self.model + messages = self.model_call_details["input"] + heliconeLogger.log_success( + model=model, + messages=messages, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + if callback == "langfuse": + global langFuseLogger + verbose_logger.debug("reaches langfuse for success logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + verbose_logger.debug( + f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" + ) + if complete_streaming_response is None: + continue + else: + print_verbose("reaches langfuse for streaming logging!") + result = kwargs["complete_streaming_response"] + if langFuseLogger is None or ( + ( + self.langfuse_public_key is not None + and self.langfuse_public_key + != langFuseLogger.public_key + ) + and ( + self.langfuse_public_key is not None + and self.langfuse_public_key + != langFuseLogger.public_key + ) + ): + langFuseLogger = LangFuseLogger( + langfuse_public_key=self.langfuse_public_key, + langfuse_secret=self.langfuse_secret, + ) + langFuseLogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + ) + if callback == "datadog": + global dataDogLogger + verbose_logger.debug("reaches datadog for success logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + verbose_logger.debug( + f"datadog: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" + ) + if complete_streaming_response is None: + continue + else: + print_verbose("reaches datadog for streaming logging!") + result = kwargs["complete_streaming_response"] + dataDogLogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + ) + if callback == "prometheus": + verbose_logger.debug("reaches prometheus for success logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + verbose_logger.debug( + f"prometheus: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" + ) + if complete_streaming_response is None: + continue + else: + print_verbose( + "reaches prometheus for streaming logging!" + ) + result = kwargs["complete_streaming_response"] + prometheusLogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + ) + if callback == "generic": + global genericAPILogger + verbose_logger.debug("reaches langfuse for success logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + verbose_logger.debug( + f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" + ) + if complete_streaming_response is None: + continue + else: + print_verbose("reaches langfuse for streaming logging!") + result = kwargs["complete_streaming_response"] + if genericAPILogger is None: + genericAPILogger = GenericAPILogger() + genericAPILogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + ) + if callback == "clickhouse": + global clickHouseLogger + verbose_logger.debug("reaches clickhouse for success logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + verbose_logger.debug( + f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" + ) + if complete_streaming_response is None: + continue + else: + print_verbose( + "reaches clickhouse for streaming logging!" + ) + result = kwargs["complete_streaming_response"] + if clickHouseLogger is None: + clickHouseLogger = ClickhouseLogger() + clickHouseLogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + ) + if callback == "greenscale": + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + verbose_logger.debug( + f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" + ) + if complete_streaming_response is None: + continue + else: + print_verbose( + "reaches greenscale for streaming logging!" + ) + result = kwargs["complete_streaming_response"] + + greenscaleLogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + if callback == "cache" and litellm.cache is not None: + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + print_verbose("success_callback: reaches cache for logging!") + kwargs = self.model_call_details + if self.stream: + if "complete_streaming_response" not in kwargs: + print_verbose( + f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n" + ) + pass + else: + print_verbose( + "success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache" + ) + result = kwargs["complete_streaming_response"] + # only add to cache once we have a complete streaming response + litellm.cache.add_cache(result, **kwargs) + if callback == "athina": + deep_copy = {} + for k, v in self.model_call_details.items(): + deep_copy[k] = v + athinaLogger.log_event( + kwargs=deep_copy, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + if callback == "traceloop": + deep_copy = {} + for k, v in self.model_call_details.items(): + if k != "original_response": + deep_copy[k] = v + traceloopLogger.log_event( + kwargs=deep_copy, + response_obj=result, + start_time=start_time, + end_time=end_time, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + ) + if callback == "s3": + global s3Logger + if s3Logger is None: + s3Logger = S3Logger() + if self.stream: + if "complete_streaming_response" in self.model_call_details: + print_verbose( + "S3Logger Logger: Got Stream Event - Completed Stream Response" + ) + s3Logger.log_event( + kwargs=self.model_call_details, + response_obj=self.model_call_details[ + "complete_streaming_response" + ], + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + else: + print_verbose( + "S3Logger Logger: Got Stream Event - No complete stream response as yet" + ) + else: + s3Logger.log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + if ( + callback == "openmeter" + and self.model_call_details.get("litellm_params", {}).get( + "acompletion", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "aembedding", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "aimage_generation", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "atranscription", False + ) + == False + ): + global openMeterLogger + if openMeterLogger is None: + print_verbose("Instantiates openmeter client") + openMeterLogger = OpenMeterLogger() + if self.stream and complete_streaming_response is None: + openMeterLogger.log_stream_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) + else: + if self.stream and complete_streaming_response: + self.model_call_details["complete_response"] = ( + self.model_call_details.get( + "complete_streaming_response", {} + ) + ) + result = self.model_call_details["complete_response"] + openMeterLogger.log_success_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) + + if ( + isinstance(callback, CustomLogger) + and self.model_call_details.get("litellm_params", {}).get( + "acompletion", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "aembedding", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "aimage_generation", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "atranscription", False + ) + == False + ): # custom logger class + if self.stream and complete_streaming_response is None: + callback.log_stream_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) + else: + if self.stream and complete_streaming_response: + self.model_call_details["complete_response"] = ( + self.model_call_details.get( + "complete_streaming_response", {} + ) + ) + result = self.model_call_details["complete_response"] + callback.log_success_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) + if ( + callable(callback) == True + and self.model_call_details.get("litellm_params", {}).get( + "acompletion", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "aembedding", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "aimage_generation", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "atranscription", False + ) + == False + ): # custom logger functions + print_verbose( + f"success callbacks: Running Custom Callback Function" + ) + customLogger.log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + callback_func=callback, + ) + + except Exception as e: + print_verbose( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging with integrations {traceback.format_exc()}" + ) + print_verbose( + f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" + ) + if capture_exception: # log this error to sentry for debugging + capture_exception(e) + except: + verbose_logger.error( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {}\n{}".format( + str(e), traceback.format_exc() + ), + ) + + async def async_success_handler( + self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs + ): + """ + Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions. + """ + print_verbose("Logging Details LiteLLM-Async Success Call") + start_time, end_time, result = self._success_handler_helper_fn( + start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit + ) + ## BUILD COMPLETE STREAMED RESPONSE + complete_streaming_response = None + if self.stream: + if result.choices[0].finish_reason is not None: # if it's the last chunk + self.streaming_chunks.append(result) + # verbose_logger.debug(f"final set of received chunks: {self.streaming_chunks}") + try: + complete_streaming_response = litellm.stream_chunk_builder( + self.streaming_chunks, + messages=self.model_call_details.get("messages", None), + start_time=start_time, + end_time=end_time, + ) + except Exception as e: + print_verbose( + "Error occurred building stream chunk in success logging: {}\n{}".format( + str(e), traceback.format_exc() + ), + log_level="ERROR", + ) + complete_streaming_response = None + else: + self.streaming_chunks.append(result) + if complete_streaming_response is not None: + print_verbose("Async success callbacks: Got a complete streaming response") + self.model_call_details["async_complete_streaming_response"] = ( + complete_streaming_response + ) + try: + if self.model_call_details.get("cache_hit", False) is True: + self.model_call_details["response_cost"] = 0.0 + else: + # check if base_model set on azure + base_model = _get_base_model_from_metadata( + model_call_details=self.model_call_details + ) + # base_model defaults to None if not set on model_info + self.model_call_details["response_cost"] = litellm.completion_cost( + completion_response=complete_streaming_response, + model=base_model, + ) + verbose_logger.debug( + f"Model={self.model}; cost={self.model_call_details['response_cost']}" + ) + except litellm.NotFoundError as e: + verbose_logger.error( + f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None" + ) + self.model_call_details["response_cost"] = None + + if self.dynamic_async_success_callbacks is not None and isinstance( + self.dynamic_async_success_callbacks, list + ): + callbacks = self.dynamic_async_success_callbacks + ## keep the internal functions ## + for callback in litellm._async_success_callback: + callback_name = "" + if isinstance(callback, CustomLogger): + callback_name = callback.__class__.__name__ + if callable(callback): + callback_name = callback.__name__ + if "_PROXY_" in callback_name: + callbacks.append(callback) + else: + callbacks = litellm._async_success_callback + + result = redact_message_input_output_from_logging( + result=result, litellm_logging_obj=self + ) + + for callback in callbacks: + # check if callback can run for this request + litellm_params = self.model_call_details.get("litellm_params", {}) + if litellm_params.get("no-log", False) == True: + # proxy cost tracking cal backs should run + if not ( + isinstance(callback, CustomLogger) + and "_PROXY_" in callback.__class__.__name__ + ): + print_verbose("no-log request, skipping logging") + continue + try: + if kwargs.get("no-log", False) == True: + print_verbose("no-log request, skipping logging") + continue + if callback == "cache" and litellm.cache is not None: + # set_cache once complete streaming response is built + print_verbose("async success_callback: reaches cache for logging!") + kwargs = self.model_call_details + if self.stream: + if "async_complete_streaming_response" not in kwargs: + print_verbose( + f"async success_callback: reaches cache for logging, there is no async_complete_streaming_response. Kwargs={kwargs}\n\n" + ) + pass + else: + print_verbose( + "async success_callback: reaches cache for logging, there is a async_complete_streaming_response. Adding to cache" + ) + result = kwargs["async_complete_streaming_response"] + # only add to cache once we have a complete streaming response + if litellm.cache is not None and not isinstance( + litellm.cache.cache, S3Cache + ): + await litellm.cache.async_add_cache(result, **kwargs) + else: + litellm.cache.add_cache(result, **kwargs) + if callback == "openmeter": + global openMeterLogger + if self.stream == True: + if ( + "async_complete_streaming_response" + in self.model_call_details + ): + await openMeterLogger.async_log_success_event( + kwargs=self.model_call_details, + response_obj=self.model_call_details[ + "async_complete_streaming_response" + ], + start_time=start_time, + end_time=end_time, + ) + else: + await openMeterLogger.async_log_stream_event( # [TODO]: move this to being an async log stream event function + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) + else: + await openMeterLogger.async_log_success_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) + if isinstance(callback, CustomLogger): # custom logger class + if self.stream == True: + if ( + "async_complete_streaming_response" + in self.model_call_details + ): + await callback.async_log_success_event( + kwargs=self.model_call_details, + response_obj=self.model_call_details[ + "async_complete_streaming_response" + ], + start_time=start_time, + end_time=end_time, + ) + else: + await callback.async_log_stream_event( # [TODO]: move this to being an async log stream event function + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) + else: + await callback.async_log_success_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) + if callable(callback): # custom logger functions + if self.stream: + if ( + "async_complete_streaming_response" + in self.model_call_details + ): + await customLogger.async_log_event( + kwargs=self.model_call_details, + response_obj=self.model_call_details[ + "async_complete_streaming_response" + ], + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + callback_func=callback, + ) + else: + await customLogger.async_log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + callback_func=callback, + ) + if callback == "dynamodb": + global dynamoLogger + if dynamoLogger is None: + dynamoLogger = DyanmoDBLogger() + if self.stream: + if ( + "async_complete_streaming_response" + in self.model_call_details + ): + print_verbose( + "DynamoDB Logger: Got Stream Event - Completed Stream Response" + ) + await dynamoLogger._async_log_event( + kwargs=self.model_call_details, + response_obj=self.model_call_details[ + "async_complete_streaming_response" + ], + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + else: + print_verbose( + "DynamoDB Logger: Got Stream Event - No complete stream response as yet" + ) + else: + await dynamoLogger._async_log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + except Exception as e: + verbose_logger.error( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}" + ) + pass + + def _failure_handler_helper_fn( + self, exception, traceback_exception, start_time=None, end_time=None + ): + if start_time is None: + start_time = self.start_time + if end_time is None: + end_time = datetime.datetime.now() + + # on some exceptions, model_call_details is not always initialized, this ensures that we still log those exceptions + if not hasattr(self, "model_call_details"): + self.model_call_details = {} + + self.model_call_details["log_event_type"] = "failed_api_call" + self.model_call_details["exception"] = exception + self.model_call_details["traceback_exception"] = traceback_exception + self.model_call_details["end_time"] = end_time + self.model_call_details.setdefault("original_response", None) + return start_time, end_time + + def failure_handler( + self, exception, traceback_exception, start_time=None, end_time=None + ): + verbose_logger.debug( + f"Logging Details LiteLLM-Failure Call: {litellm.failure_callback}" + ) + try: + start_time, end_time = self._failure_handler_helper_fn( + exception=exception, + traceback_exception=traceback_exception, + start_time=start_time, + end_time=end_time, + ) + callbacks = [] # init this to empty incase it's not created + + if self.dynamic_failure_callbacks is not None and isinstance( + self.dynamic_failure_callbacks, list + ): + callbacks = self.dynamic_failure_callbacks + ## keep the internal functions ## + for callback in litellm.failure_callback: + if ( + isinstance(callback, CustomLogger) + and "_PROXY_" in callback.__class__.__name__ + ): + callbacks.append(callback) + else: + callbacks = litellm.failure_callback + + result = None # result sent to all loggers, init this to None incase it's not created + + result = redact_message_input_output_from_logging( + result=result, litellm_logging_obj=self + ) + for callback in callbacks: + try: + if callback == "lite_debugger": + print_verbose("reaches lite_debugger for logging!") + print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") + result = { + "model": self.model, + "created": time.time(), + "error": traceback_exception, + "usage": { + "prompt_tokens": prompt_token_calculator( + self.model, messages=self.messages + ), + "completion_tokens": 0, + }, + } + liteDebuggerClient.log_event( + model=self.model, + messages=self.messages, + end_user=self.model_call_details.get("user", "default"), + response_obj=result, + start_time=start_time, + end_time=end_time, + litellm_call_id=self.litellm_call_id, + print_verbose=print_verbose, + call_type=self.call_type, + stream=self.stream, + ) + if callback == "lunary": + print_verbose("reaches lunary for logging error!") + + model = self.model + + input = self.model_call_details["input"] + + _type = ( + "embed" + if self.call_type == CallTypes.embedding.value + else "llm" + ) + + lunaryLogger.log_event( + type=_type, + event="error", + user_id=self.model_call_details.get("user", "default"), + model=model, + input=input, + error=traceback_exception, + run_id=self.litellm_call_id, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + if callback == "sentry": + print_verbose("sending exception to sentry") + if capture_exception: + capture_exception(exception) + else: + print_verbose( + f"capture exception not initialized: {capture_exception}" + ) + elif callback == "supabase": + print_verbose("reaches supabase for logging!") + print_verbose(f"supabaseClient: {supabaseClient}") + result = { + "model": model, + "created": time.time(), + "error": traceback_exception, + "usage": { + "prompt_tokens": prompt_token_calculator( + model, messages=self.messages + ), + "completion_tokens": 0, + }, + } + supabaseClient.log_event( + model=self.model, + messages=self.messages, + end_user=self.model_call_details.get("user", "default"), + response_obj=result, + start_time=start_time, + end_time=end_time, + litellm_call_id=self.model_call_details["litellm_call_id"], + print_verbose=print_verbose, + ) + if callable(callback): # custom logger functions + customLogger.log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + callback_func=callback, + ) + if ( + isinstance(callback, CustomLogger) + and self.model_call_details.get("litellm_params", {}).get( + "acompletion", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "aembedding", False + ) + == False + ): # custom logger class + callback.log_failure_event( + start_time=start_time, + end_time=end_time, + response_obj=result, + kwargs=self.model_call_details, + ) + if callback == "langfuse": + global langFuseLogger + verbose_logger.debug("reaches langfuse for logging failure") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if langFuseLogger is None or ( + ( + self.langfuse_public_key is not None + and self.langfuse_public_key + != langFuseLogger.public_key + ) + and ( + self.langfuse_public_key is not None + and self.langfuse_public_key + != langFuseLogger.public_key + ) + ): + langFuseLogger = LangFuseLogger( + langfuse_public_key=self.langfuse_public_key, + langfuse_secret=self.langfuse_secret, + ) + langFuseLogger.log_event( + start_time=start_time, + end_time=end_time, + response_obj=None, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + status_message=str(exception), + level="ERROR", + kwargs=self.model_call_details, + ) + if callback == "traceloop": + traceloopLogger.log_event( + start_time=start_time, + end_time=end_time, + response_obj=None, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + status_message=str(exception), + level="ERROR", + kwargs=self.model_call_details, + ) + if callback == "prometheus": + global prometheusLogger + verbose_logger.debug("reaches prometheus for success logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + kwargs["exception"] = str(exception) + prometheusLogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + ) + + if callback == "logfire": + verbose_logger.debug("reaches logfire for failure logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + kwargs["exception"] = exception + + logfireLogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + level=LogfireLevel.ERROR.value, + print_verbose=print_verbose, + ) + except Exception as e: + print_verbose( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}" + ) + print_verbose( + f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" + ) + if capture_exception: # log this error to sentry for debugging + capture_exception(e) + except Exception as e: + verbose_logger.error( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging {}\n{}".format( + str(e), traceback.format_exc() + ) + ) + + async def async_failure_handler( + self, exception, traceback_exception, start_time=None, end_time=None + ): + """ + Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions. + """ + start_time, end_time = self._failure_handler_helper_fn( + exception=exception, + traceback_exception=traceback_exception, + start_time=start_time, + end_time=end_time, + ) + result = None # result sent to all loggers, init this to None incase it's not created + for callback in litellm._async_failure_callback: + try: + if isinstance(callback, CustomLogger): # custom logger class + await callback.async_log_failure_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) # type: ignore + if callable(callback): # custom logger functions + await customLogger.async_log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + callback_func=callback, + ) + except Exception as e: + verbose_logger.error( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success \ + logging {}\n{}\nCallback={}".format( + str(e), traceback.format_exc(), callback + ) + ) + + +def set_callbacks(callback_list, function_id=None): + """ + Globally sets the callback client + """ + global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger + + try: + for callback in callback_list: + print_verbose(f"init callback list: {callback}") + if callback == "sentry": + try: + import sentry_sdk + except ImportError: + print_verbose("Package 'sentry_sdk' is missing. Installing it...") + subprocess.check_call( + [sys.executable, "-m", "pip", "install", "sentry_sdk"] + ) + import sentry_sdk + sentry_sdk_instance = sentry_sdk + sentry_trace_rate = ( + os.environ.get("SENTRY_API_TRACE_RATE") + if "SENTRY_API_TRACE_RATE" in os.environ + else "1.0" + ) + sentry_sdk_instance.init( + dsn=os.environ.get("SENTRY_DSN"), + traces_sample_rate=float(sentry_trace_rate), + ) + capture_exception = sentry_sdk_instance.capture_exception + add_breadcrumb = sentry_sdk_instance.add_breadcrumb + elif callback == "posthog": + try: + from posthog import Posthog + except ImportError: + print_verbose("Package 'posthog' is missing. Installing it...") + subprocess.check_call( + [sys.executable, "-m", "pip", "install", "posthog"] + ) + from posthog import Posthog + posthog = Posthog( + project_api_key=os.environ.get("POSTHOG_API_KEY"), + host=os.environ.get("POSTHOG_API_URL"), + ) + elif callback == "slack": + try: + from slack_bolt import App + except ImportError: + print_verbose("Package 'slack_bolt' is missing. Installing it...") + subprocess.check_call( + [sys.executable, "-m", "pip", "install", "slack_bolt"] + ) + from slack_bolt import App + slack_app = App( + token=os.environ.get("SLACK_API_TOKEN"), + signing_secret=os.environ.get("SLACK_API_SECRET"), + ) + alerts_channel = os.environ["SLACK_API_CHANNEL"] + print_verbose(f"Initialized Slack App: {slack_app}") + elif callback == "traceloop": + traceloopLogger = TraceloopLogger() + elif callback == "athina": + athinaLogger = AthinaLogger() + print_verbose("Initialized Athina Logger") + elif callback == "helicone": + heliconeLogger = HeliconeLogger() + elif callback == "lunary": + lunaryLogger = LunaryLogger() + elif callback == "promptlayer": + promptLayerLogger = PromptLayerLogger() + elif callback == "langfuse": + langFuseLogger = LangFuseLogger() + elif callback == "openmeter": + openMeterLogger = OpenMeterLogger() + elif callback == "datadog": + dataDogLogger = DataDogLogger() + elif callback == "prometheus": + if prometheusLogger is None: + prometheusLogger = PrometheusLogger() + elif callback == "dynamodb": + dynamoLogger = DyanmoDBLogger() + elif callback == "s3": + s3Logger = S3Logger() + elif callback == "wandb": + weightsBiasesLogger = WeightsBiasesLogger() + elif callback == "langsmith": + langsmithLogger = LangsmithLogger() + elif callback == "logfire": + logfireLogger = LogfireLogger() + elif callback == "aispend": + aispendLogger = AISpendLogger() + elif callback == "berrispend": + berrispendLogger = BerriSpendLogger() + elif callback == "supabase": + print_verbose("instantiating supabase") + supabaseClient = Supabase() + elif callback == "greenscale": + greenscaleLogger = GreenscaleLogger() + print_verbose("Initialized Greenscale Logger") + elif callback == "lite_debugger": + print_verbose("instantiating lite_debugger") + if function_id: + liteDebuggerClient = LiteDebugger(email=function_id) + elif litellm.token: + liteDebuggerClient = LiteDebugger(email=litellm.token) + elif litellm.email: + liteDebuggerClient = LiteDebugger(email=litellm.email) + else: + liteDebuggerClient = LiteDebugger(email=str(uuid.uuid4())) + elif callable(callback): + customLogger = CustomLogger() + except Exception as e: + raise e diff --git a/litellm/litellm_core_utils/redact_messages.py b/litellm/litellm_core_utils/redact_messages.py index 9c0df2011..8f270d8be 100644 --- a/litellm/litellm_core_utils/redact_messages.py +++ b/litellm/litellm_core_utils/redact_messages.py @@ -12,7 +12,9 @@ from typing import TYPE_CHECKING, Any import litellm if TYPE_CHECKING: - from litellm.utils import Logging as _LiteLLMLoggingObject + from litellm.litellm_core_utils.litellm_logging import ( + Logging as _LiteLLMLoggingObject, + ) LiteLLMLoggingObject = _LiteLLMLoggingObject else: diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index 236f7cd4f..808813c05 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -5,7 +5,9 @@ import requests, copy # type: ignore import time from functools import partial from typing import Callable, Optional, List, Union -from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper +import litellm.litellm_core_utils +from litellm.utils import ModelResponse, Usage, CustomStreamWrapper +from litellm.litellm_core_utils.core_helpers import map_finish_reason import litellm from .prompt_templates.factory import prompt_factory, custom_prompt from litellm.llms.custom_httpx.http_handler import ( @@ -205,7 +207,7 @@ class AnthropicChatCompletion(BaseLLM): response: Union[requests.Response, httpx.Response], model_response: ModelResponse, stream: bool, - logging_obj: litellm.utils.Logging, + logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, optional_params: dict, api_key: str, data: Union[dict, str], @@ -320,7 +322,7 @@ class AnthropicChatCompletion(BaseLLM): response: Union[requests.Response, httpx.Response], model_response: ModelResponse, stream: bool, - logging_obj: litellm.utils.Logging, + logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, optional_params: dict, api_key: str, data: Union[dict, str], diff --git a/litellm/llms/base.py b/litellm/llms/base.py index 8c2f5101e..0222d2366 100644 --- a/litellm/llms/base.py +++ b/litellm/llms/base.py @@ -2,7 +2,7 @@ import litellm import httpx, requests from typing import Optional, Union -from litellm.utils import Logging +from litellm.litellm_core_utils.litellm_logging import Logging class BaseLLM: diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py index 4314032e7..73fa18023 100644 --- a/litellm/llms/bedrock.py +++ b/litellm/llms/bedrock.py @@ -5,12 +5,10 @@ import time, uuid from typing import Callable, Optional, Any, Union, List import litellm from litellm.utils import ( - ModelResponse, get_secret, - Usage, - ImageResponse, - map_finish_reason, ) +from litellm.litellm_core_utils.core_helpers import map_finish_reason +from litellm.types.utils import ImageResponse, ModelResponse, Usage from .prompt_templates.factory import ( prompt_factory, custom_prompt, @@ -633,7 +631,11 @@ def init_bedrock_client( config = boto3.session.Config() ### CHECK STS ### - if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None: + if ( + aws_web_identity_token is not None + and aws_role_name is not None + and aws_session_name is not None + ): oidc_token = get_secret(aws_web_identity_token) if oidc_token is None: @@ -642,9 +644,7 @@ def init_bedrock_client( status_code=401, ) - sts_client = boto3.client( - "sts" - ) + sts_client = boto3.client("sts") # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py index 7c7210f84..510bf7c7c 100644 --- a/litellm/llms/bedrock_httpx.py +++ b/litellm/llms/bedrock_httpx.py @@ -22,13 +22,12 @@ from typing import ( from litellm.utils import ( ModelResponse, Usage, - map_finish_reason, CustomStreamWrapper, - Message, - Choices, get_secret, - Logging, ) +from litellm.litellm_core_utils.core_helpers import map_finish_reason +from litellm.litellm_core_utils.litellm_logging import Logging +from litellm.types.utils import Message, Choices import litellm, uuid from .prompt_templates.factory import ( prompt_factory, diff --git a/litellm/llms/databricks.py b/litellm/llms/databricks.py index 4fe475259..1ab09246b 100644 --- a/litellm/llms/databricks.py +++ b/litellm/llms/databricks.py @@ -10,10 +10,10 @@ from typing import Callable, Optional, List, Union, Tuple, Literal from litellm.utils import ( ModelResponse, Usage, - map_finish_reason, CustomStreamWrapper, EmbeddingResponse, ) +from litellm.litellm_core_utils.core_helpers import map_finish_reason import litellm from .prompt_templates.factory import prompt_factory, custom_prompt from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler @@ -289,7 +289,7 @@ class DatabricksChatCompletion(BaseLLM): response: Union[requests.Response, httpx.Response], model_response: ModelResponse, stream: bool, - logging_obj: litellm.utils.Logging, + logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, optional_params: dict, api_key: str, data: Union[dict, str], diff --git a/litellm/llms/predibase.py b/litellm/llms/predibase.py index 66c28acee..8ad294457 100644 --- a/litellm/llms/predibase.py +++ b/litellm/llms/predibase.py @@ -12,11 +12,11 @@ from typing import Callable, Optional, List, Literal, Union from litellm.utils import ( ModelResponse, Usage, - map_finish_reason, CustomStreamWrapper, Message, Choices, ) +from litellm.litellm_core_utils.core_helpers import map_finish_reason import litellm from .prompt_templates.factory import prompt_factory, custom_prompt from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler @@ -198,7 +198,7 @@ class PredibaseChatCompletion(BaseLLM): response: Union[requests.Response, httpx.Response], model_response: ModelResponse, stream: bool, - logging_obj: litellm.utils.Logging, + logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, optional_params: dict, api_key: str, data: Union[dict, str], diff --git a/litellm/llms/triton.py b/litellm/llms/triton.py index 711186b3f..d647c9c43 100644 --- a/litellm/llms/triton.py +++ b/litellm/llms/triton.py @@ -4,7 +4,6 @@ from enum import Enum import requests, copy # type: ignore import time from typing import Callable, Optional, List -from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper import litellm from .prompt_templates.factory import prompt_factory, custom_prompt from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py index 67a8a4519..60d3d5897 100644 --- a/litellm/llms/vertex_ai.py +++ b/litellm/llms/vertex_ai.py @@ -5,7 +5,8 @@ import requests # type: ignore import time from typing import Callable, Optional, Union, List, Literal, Any from pydantic import BaseModel -from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason +from litellm.utils import ModelResponse, Usage, CustomStreamWrapper +from litellm.litellm_core_utils.core_helpers import map_finish_reason import litellm, uuid import httpx, inspect # type: ignore from litellm.types.llms.vertex_ai import * diff --git a/litellm/llms/vertex_ai_anthropic.py b/litellm/llms/vertex_ai_anthropic.py index 065294280..fd43d4378 100644 --- a/litellm/llms/vertex_ai_anthropic.py +++ b/litellm/llms/vertex_ai_anthropic.py @@ -6,7 +6,8 @@ from enum import Enum import requests, copy # type: ignore import time, uuid from typing import Callable, Optional, List -from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper +from litellm.utils import ModelResponse, Usage, CustomStreamWrapper +from litellm.litellm_core_utils.core_helpers import map_finish_reason import litellm from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from .prompt_templates.factory import ( diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py index b1c38f0bc..c9e48f3e1 100644 --- a/litellm/llms/vertex_httpx.py +++ b/litellm/llms/vertex_httpx.py @@ -8,7 +8,10 @@ from enum import Enum import requests # type: ignore import time from typing import Callable, Optional, Union, List, Any, Tuple -from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason +import litellm.litellm_core_utils +import litellm.litellm_core_utils.litellm_logging +from litellm.utils import ModelResponse, Usage, CustomStreamWrapper +from litellm.litellm_core_utils.core_helpers import map_finish_reason import litellm, uuid import httpx, inspect # type: ignore from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler @@ -320,7 +323,7 @@ class VertexLLM(BaseLLM): model: str, response: httpx.Response, model_response: ModelResponse, - logging_obj: litellm.utils.Logging, + logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, optional_params: dict, api_key: str, data: Union[dict, str], diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html deleted file mode 100644 index 794e8d66a..000000000 --- a/litellm/proxy/_experimental/out/404.html +++ /dev/null @@ -1 +0,0 @@ -404: This page could not be found.LiteLLM Dashboard

404

This page could not be found.

\ No newline at end of file diff --git a/litellm/proxy/_experimental/out/model_hub.html b/litellm/proxy/_experimental/out/model_hub.html deleted file mode 100644 index dda9f78c2..000000000 --- a/litellm/proxy/_experimental/out/model_hub.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html deleted file mode 100644 index 61a21232d..000000000 --- a/litellm/proxy/_experimental/out/onboarding.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index c0b0543e8..badc77546 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -12,6 +12,8 @@ import litellm import backoff import traceback from pydantic import BaseModel +import litellm.litellm_core_utils +import litellm.litellm_core_utils.litellm_logging from litellm.proxy._types import ( UserAPIKeyAuth, DynamoDBArgs, @@ -266,7 +268,9 @@ class ProxyLogging: + litellm.failure_callback ) ) - litellm.utils.set_callbacks(callback_list=callback_list) + litellm.litellm_core_utils.litellm_logging.set_callbacks( + callback_list=callback_list + ) # The actual implementation of the function async def pre_call_hook( @@ -331,7 +335,9 @@ class ProxyLogging: return data except Exception as e: if "litellm_logging_obj" in data: - logging_obj: litellm.utils.Logging = data["litellm_logging_obj"] + logging_obj: litellm.litellm_core_utils.litellm_logging.Logging = data[ + "litellm_logging_obj" + ] ## ASYNC FAILURE HANDLER ## error_message = "" diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py index c0be350f9..3f7288854 100644 --- a/litellm/tests/test_completion_cost.py +++ b/litellm/tests/test_completion_cost.py @@ -13,7 +13,7 @@ from litellm import ( open_ai_chat_completion_models, TranscriptionResponse, ) -from litellm.utils import CustomLogger +from litellm.litellm_core_utils.litellm_logging import CustomLogger import pytest, asyncio diff --git a/litellm/tests/test_utils.py b/litellm/tests/test_utils.py index 7d581a0fb..bf84ba994 100644 --- a/litellm/tests/test_utils.py +++ b/litellm/tests/test_utils.py @@ -412,7 +412,7 @@ def test_redact_msgs_from_logs(): from litellm.litellm_core_utils.redact_messages import ( redact_message_input_output_from_logging, ) - from litellm.utils import Logging + from litellm.litellm_core_utils.litellm_logging import Logging litellm.turn_off_message_logging = True diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 1fbb375d3..29d21143e 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -3,6 +3,16 @@ from typing_extensions import TypedDict from enum import Enum from typing_extensions import override, Required, Dict from .llms.openai import ChatCompletionUsageBlock, ChatCompletionToolCallChunk +from ..litellm_core_utils.core_helpers import map_finish_reason +from openai._models import BaseModel as OpenAIObject +from pydantic import ConfigDict +import uuid +import json +import time + + +def _generate_id(): # private helper function + return "chatcmpl-" + str(uuid.uuid4()) class LiteLLMCommonStrings(Enum): @@ -48,3 +58,904 @@ class GenericStreamingChunk(TypedDict): finish_reason: Required[str] usage: Optional[ChatCompletionUsageBlock] index: int + + +from enum import Enum + + +class CallTypes(Enum): + embedding = "embedding" + aembedding = "aembedding" + completion = "completion" + acompletion = "acompletion" + atext_completion = "atext_completion" + text_completion = "text_completion" + image_generation = "image_generation" + aimage_generation = "aimage_generation" + moderation = "moderation" + amoderation = "amoderation" + atranscription = "atranscription" + transcription = "transcription" + aspeech = "aspeech" + speech = "speech" + + +class TopLogprob(OpenAIObject): + token: str + """The token.""" + + bytes: Optional[List[int]] = None + """A list of integers representing the UTF-8 bytes representation of the token. + + Useful in instances where characters are represented by multiple tokens and + their byte representations must be combined to generate the correct text + representation. Can be `null` if there is no bytes representation for the token. + """ + + logprob: float + """The log probability of this token, if it is within the top 20 most likely + tokens. + + Otherwise, the value `-9999.0` is used to signify that the token is very + unlikely. + """ + + +class ChatCompletionTokenLogprob(OpenAIObject): + token: str + """The token.""" + + bytes: Optional[List[int]] = None + """A list of integers representing the UTF-8 bytes representation of the token. + + Useful in instances where characters are represented by multiple tokens and + their byte representations must be combined to generate the correct text + representation. Can be `null` if there is no bytes representation for the token. + """ + + logprob: float + """The log probability of this token, if it is within the top 20 most likely + tokens. + + Otherwise, the value `-9999.0` is used to signify that the token is very + unlikely. + """ + + top_logprobs: List[TopLogprob] + """List of the most likely tokens and their log probability, at this token + position. + + In rare cases, there may be fewer than the number of requested `top_logprobs` + returned. + """ + + +class ChoiceLogprobs(OpenAIObject): + content: Optional[List[ChatCompletionTokenLogprob]] = None + """A list of message content tokens with log probability information.""" + + +class FunctionCall(OpenAIObject): + arguments: str + name: Optional[str] = None + + +class Function(OpenAIObject): + arguments: str + name: Optional[str] = None + + def __init__( + self, + arguments: Union[Dict, str], + name: Optional[str] = None, + **params, + ): + if isinstance(arguments, Dict): + arguments = json.dumps(arguments) + else: + arguments = arguments + + name = name + + # Build a dictionary with the structure your BaseModel expects + data = {"arguments": arguments, "name": name, **params} + + super(Function, self).__init__(**data) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class ChatCompletionDeltaToolCall(OpenAIObject): + id: Optional[str] = None + function: Function + type: Optional[str] = None + index: int + + +class HiddenParams(OpenAIObject): + original_response: Optional[str] = None + model_id: Optional[str] = None # used in Router for individual deployments + api_base: Optional[str] = None # returns api base used for making completion call + + model_config = ConfigDict(extra="allow", protected_namespaces=()) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() + + +class ChatCompletionMessageToolCall(OpenAIObject): + def __init__( + self, + function: Union[Dict, Function], + id: Optional[str] = None, + type: Optional[str] = None, + **params, + ): + super(ChatCompletionMessageToolCall, self).__init__(**params) + if isinstance(function, Dict): + self.function = Function(**function) + else: + self.function = function + + if id is not None: + self.id = id + else: + self.id = f"{uuid.uuid4()}" + + if type is not None: + self.type = type + else: + self.type = "function" + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class Message(OpenAIObject): + def __init__( + self, + content: Optional[str] = "default", + role="assistant", + logprobs=None, + function_call=None, + tool_calls=None, + **params, + ): + super(Message, self).__init__(**params) + self.content = content + self.role = role + if function_call is not None: + self.function_call = FunctionCall(**function_call) + + if tool_calls is not None: + self.tool_calls = [] + for tool_call in tool_calls: + self.tool_calls.append(ChatCompletionMessageToolCall(**tool_call)) + + if logprobs is not None: + self._logprobs = ChoiceLogprobs(**logprobs) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() + + +class Delta(OpenAIObject): + def __init__( + self, + content=None, + role=None, + function_call=None, + tool_calls=None, + **params, + ): + super(Delta, self).__init__(**params) + self.content = content + self.role = role + + if function_call is not None and isinstance(function_call, dict): + self.function_call = FunctionCall(**function_call) + else: + self.function_call = function_call + if tool_calls is not None and isinstance(tool_calls, list): + self.tool_calls = [] + for tool_call in tool_calls: + if isinstance(tool_call, dict): + if tool_call.get("index", None) is None: + tool_call["index"] = 0 + self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call)) + elif isinstance(tool_call, ChatCompletionDeltaToolCall): + self.tool_calls.append(tool_call) + else: + self.tool_calls = tool_calls + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class Choices(OpenAIObject): + def __init__( + self, + finish_reason=None, + index=0, + message: Optional[Union[Message, dict]] = None, + logprobs=None, + enhancements=None, + **params, + ): + super(Choices, self).__init__(**params) + if finish_reason is not None: + self.finish_reason = map_finish_reason( + finish_reason + ) # set finish_reason for all responses + else: + self.finish_reason = "stop" + self.index = index + if message is None: + self.message = Message() + else: + if isinstance(message, Message): + self.message = message + elif isinstance(message, dict): + self.message = Message(**message) + if logprobs is not None: + self.logprobs = logprobs + if enhancements is not None: + self.enhancements = enhancements + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class Usage(OpenAIObject): + def __init__( + self, prompt_tokens=None, completion_tokens=None, total_tokens=None, **params + ): + super(Usage, self).__init__(**params) + if prompt_tokens: + self.prompt_tokens = prompt_tokens + if completion_tokens: + self.completion_tokens = completion_tokens + if total_tokens: + self.total_tokens = total_tokens + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class StreamingChoices(OpenAIObject): + def __init__( + self, + finish_reason=None, + index=0, + delta: Optional[Delta] = None, + logprobs=None, + enhancements=None, + **params, + ): + super(StreamingChoices, self).__init__(**params) + if finish_reason: + self.finish_reason = finish_reason + else: + self.finish_reason = None + self.index = index + if delta is not None: + if isinstance(delta, Delta): + self.delta = delta + elif isinstance(delta, dict): + self.delta = Delta(**delta) + else: + self.delta = Delta() + if enhancements is not None: + self.enhancements = enhancements + + if logprobs is not None and isinstance(logprobs, dict): + self.logprobs = ChoiceLogprobs(**logprobs) + else: + self.logprobs = logprobs # type: ignore + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class ModelResponse(OpenAIObject): + id: str + """A unique identifier for the completion.""" + + choices: List[Union[Choices, StreamingChoices]] + """The list of completion choices the model generated for the input prompt.""" + + created: int + """The Unix timestamp (in seconds) of when the completion was created.""" + + model: Optional[str] = None + """The model used for completion.""" + + object: str + """The object type, which is always "text_completion" """ + + system_fingerprint: Optional[str] = None + """This fingerprint represents the backend configuration that the model runs with. + + Can be used in conjunction with the `seed` request parameter to understand when + backend changes have been made that might impact determinism. + """ + + _hidden_params: dict = {} + + def __init__( + self, + id=None, + choices=None, + created=None, + model=None, + object=None, + system_fingerprint=None, + usage=None, + stream=None, + stream_options=None, + response_ms=None, + hidden_params=None, + **params, + ): + if stream is not None and stream is True: + object = "chat.completion.chunk" + if choices is not None and isinstance(choices, list): + new_choices = [] + for choice in choices: + if isinstance(choice, StreamingChoices): + _new_choice = choice + elif isinstance(choice, dict): + _new_choice = StreamingChoices(**choice) + new_choices.append(_new_choice) + choices = new_choices + else: + choices = [StreamingChoices()] + else: + object = "chat.completion" + if choices is not None and isinstance(choices, list): + new_choices = [] + for choice in choices: + if isinstance(choice, Choices): + _new_choice = choice + elif isinstance(choice, dict): + _new_choice = Choices(**choice) + new_choices.append(_new_choice) + choices = new_choices + else: + choices = [Choices()] + if id is None: + id = _generate_id() + else: + id = id + if created is None: + created = int(time.time()) + else: + created = created + model = model + if usage is not None: + if isinstance(usage, dict): + usage = Usage(**usage) + else: + usage = usage + elif stream is None or stream is False: + usage = Usage() + if hidden_params: + self._hidden_params = hidden_params + + init_values = { + "id": id, + "choices": choices, + "created": created, + "model": model, + "object": object, + "system_fingerprint": system_fingerprint, + } + + if usage is not None: + init_values["usage"] = usage + + super().__init__( + **init_values, + **params, + ) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() + + +class Embedding(OpenAIObject): + embedding: Union[list, str] = [] + index: int + object: str + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class EmbeddingResponse(OpenAIObject): + model: Optional[str] = None + """The model used for embedding.""" + + data: Optional[List] = None + """The actual embedding value""" + + object: str + """The object type, which is always "embedding" """ + + usage: Optional[Usage] = None + """Usage statistics for the embedding request.""" + + _hidden_params: dict = {} + + def __init__( + self, + model=None, + usage=None, + stream=False, + response_ms=None, + data=None, + **params, + ): + object = "list" + if response_ms: + _response_ms = response_ms + else: + _response_ms = None + if data: + data = data + else: + data = None + + if usage: + usage = usage + else: + usage = Usage() + + model = model + super().__init__(model=model, object=object, data=data, usage=usage) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() + + +class Logprobs(OpenAIObject): + text_offset: List[int] + token_logprobs: List[float] + tokens: List[str] + top_logprobs: List[Dict[str, float]] + + +class TextChoices(OpenAIObject): + def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params): + super(TextChoices, self).__init__(**params) + if finish_reason: + self.finish_reason = map_finish_reason(finish_reason) + else: + self.finish_reason = None + self.index = index + if text is not None: + self.text = text + else: + self.text = None + if logprobs is None: + self.logprobs = None + else: + if isinstance(logprobs, dict): + self.logprobs = Logprobs(**logprobs) + else: + self.logprobs = logprobs + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() + + +class TextCompletionResponse(OpenAIObject): + """ + { + "id": response["id"], + "object": "text_completion", + "created": response["created"], + "model": response["model"], + "choices": [ + { + "text": response["choices"][0]["message"]["content"], + "index": response["choices"][0]["index"], + "logprobs": transformed_logprobs, + "finish_reason": response["choices"][0]["finish_reason"] + } + ], + "usage": response["usage"] + } + """ + + id: str + object: str + created: int + model: Optional[str] + choices: List[TextChoices] + usage: Optional[Usage] + _response_ms: Optional[int] = None + _hidden_params: HiddenParams + + def __init__( + self, + id=None, + choices=None, + created=None, + model=None, + usage=None, + stream=False, + response_ms=None, + object=None, + **params, + ): + if stream: + object = "text_completion.chunk" + choices = [TextChoices()] + else: + object = "text_completion" + if choices is not None and isinstance(choices, list): + new_choices = [] + for choice in choices: + if isinstance(choice, TextChoices): + _new_choice = choice + elif isinstance(choice, dict): + _new_choice = TextChoices(**choice) + new_choices.append(_new_choice) + choices = new_choices + else: + choices = [TextChoices()] + if object is not None: + object = object + if id is None: + id = _generate_id() + else: + id = id + if created is None: + created = int(time.time()) + else: + created = created + + model = model + if usage: + usage = usage + else: + usage = Usage() + + super(TextCompletionResponse, self).__init__( + id=id, + object=object, + created=created, + model=model, + choices=choices, + usage=usage, + **params, + ) + + if response_ms: + self._response_ms = response_ms + else: + self._response_ms = None + self._hidden_params = HiddenParams() + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class ImageObject(OpenAIObject): + """ + Represents the url or the content of an image generated by the OpenAI API. + + Attributes: + b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json. + url: The URL of the generated image, if response_format is url (default). + revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt. + + https://platform.openai.com/docs/api-reference/images/object + """ + + b64_json: Optional[str] = None + url: Optional[str] = None + revised_prompt: Optional[str] = None + + def __init__(self, b64_json=None, url=None, revised_prompt=None): + super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() + + +class ImageResponse(OpenAIObject): + created: Optional[int] = None + + data: Optional[List[ImageObject]] = None + + usage: Optional[dict] = None + + _hidden_params: dict = {} + + def __init__(self, created=None, data=None, response_ms=None): + if response_ms: + _response_ms = response_ms + else: + _response_ms = None + if data: + data = data + else: + data = None + + if created: + created = created + else: + created = None + + super().__init__(data=data, created=created) + self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() + + +class TranscriptionResponse(OpenAIObject): + text: Optional[str] = None + + _hidden_params: dict = {} + + def __init__(self, text=None): + super().__init__(text=text) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() diff --git a/litellm/utils.py b/litellm/utils.py index 7f37bcf7c..6bc33d73d 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -6,6 +6,9 @@ # +-----------------------------------------------+ # # Thank you users! We ❤️ you! - Krrish & Ishaan + +# What is this? +## Generic utils.py file. Problem-specific utils (e.g. 'cost calculation), should all be in `litellm_core_utils/`. import sys, re, binascii, struct import litellm import dotenv, json, traceback, threading, base64, ast @@ -18,7 +21,7 @@ from functools import wraps, lru_cache import datetime, time import tiktoken import uuid -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel import aiohttp import textwrap import logging @@ -32,9 +35,29 @@ from dataclasses import ( ) import os import litellm._service_logger # for storing API inputs, outputs, and metadata +import litellm.litellm_core_utils +from litellm.litellm_core_utils.core_helpers import map_finish_reason from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler from litellm.caching import DualCache -from litellm.types.utils import CostPerToken, ProviderField, ModelInfo +from litellm.types.utils import ( + CostPerToken, + ProviderField, + ModelInfo, + CallTypes, + ModelResponse, + EmbeddingResponse, + ImageResponse, + TranscriptionResponse, + TextCompletionResponse, + ChatCompletionDeltaToolCall, + Message, + Delta, + Choices, + Usage, + StreamingChoices, + Embedding, + TextChoices, +) from litellm.litellm_core_utils.redact_messages import ( redact_message_input_output_from_logging, ) @@ -71,32 +94,9 @@ from .types.llms.openai import ( ChatCompletionToolCallFunctionChunk, ChatCompletionDeltaToolCallChunk, ) -from .integrations.traceloop import TraceloopLogger -from .integrations.athina import AthinaLogger -from .integrations.helicone import HeliconeLogger -from .integrations.aispend import AISpendLogger -from .integrations.berrispend import BerriSpendLogger -from .integrations.supabase import Supabase -from .integrations.lunary import LunaryLogger -from .integrations.prompt_layer import PromptLayerLogger -from .integrations.langsmith import LangsmithLogger -from .integrations.logfire_logger import LogfireLogger, LogfireLevel -from .integrations.weights_biases import WeightsBiasesLogger -from .integrations.custom_logger import CustomLogger -from .integrations.langfuse import LangFuseLogger -from .integrations.openmeter import OpenMeterLogger -from .integrations.lago import LagoLogger -from .integrations.datadog import DataDogLogger -from .integrations.prometheus import PrometheusLogger -from .integrations.prometheus_services import PrometheusServicesLogger -from .integrations.dynamodb import DyanmoDBLogger -from .integrations.s3 import S3Logger -from .integrations.clickhouse import ClickhouseLogger -from .integrations.greenscale import GreenscaleLogger -from .integrations.litedebugger import LiteDebugger + from .proxy._types import KeyManagementSystem from openai import OpenAIError as OriginalError -from openai._models import BaseModel as OpenAIObject from .caching import S3Cache, RedisSemanticCache, RedisCache from .exceptions import ( AuthenticationError, @@ -179,6 +179,8 @@ local_cache: Optional[Dict[str, str]] = {} last_fetched_at = None last_fetched_at_keys = None ######## Model Response ######################### + + # All liteLLM Model responses will be in this format, Follows the OpenAI Format # https://docs.litellm.ai/docs/completion/output # { @@ -209,933 +211,6 @@ class UnsupportedParamsError(Exception): ) # Call the base class constructor with the parameters it needs -def _generate_id(): # private helper function - return "chatcmpl-" + str(uuid.uuid4()) - - -def map_finish_reason( - finish_reason: str, -): # openai supports 5 stop sequences - 'stop', 'length', 'function_call', 'content_filter', 'null' - # anthropic mapping - if finish_reason == "stop_sequence": - return "stop" - # cohere mapping - https://docs.cohere.com/reference/generate - elif finish_reason == "COMPLETE": - return "stop" - elif finish_reason == "MAX_TOKENS": # cohere + vertex ai - return "length" - elif finish_reason == "ERROR_TOXIC": - return "content_filter" - elif ( - finish_reason == "ERROR" - ): # openai currently doesn't support an 'error' finish reason - return "stop" - # huggingface mapping https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/generate_stream - elif finish_reason == "eos_token" or finish_reason == "stop_sequence": - return "stop" - elif ( - finish_reason == "FINISH_REASON_UNSPECIFIED" or finish_reason == "STOP" - ): # vertex ai - got from running `print(dir(response_obj.candidates[0].finish_reason))`: ['FINISH_REASON_UNSPECIFIED', 'MAX_TOKENS', 'OTHER', 'RECITATION', 'SAFETY', 'STOP',] - return "stop" - elif finish_reason == "SAFETY": # vertex ai - return "content_filter" - elif finish_reason == "STOP": # vertex ai - return "stop" - elif finish_reason == "end_turn" or finish_reason == "stop_sequence": # anthropic - return "stop" - elif finish_reason == "max_tokens": # anthropic - return "length" - elif finish_reason == "tool_use": # anthropic - return "tool_calls" - elif finish_reason == "content_filtered": - return "content_filter" - return finish_reason - - -class TopLogprob(OpenAIObject): - token: str - """The token.""" - - bytes: Optional[List[int]] = None - """A list of integers representing the UTF-8 bytes representation of the token. - - Useful in instances where characters are represented by multiple tokens and - their byte representations must be combined to generate the correct text - representation. Can be `null` if there is no bytes representation for the token. - """ - - logprob: float - """The log probability of this token, if it is within the top 20 most likely - tokens. - - Otherwise, the value `-9999.0` is used to signify that the token is very - unlikely. - """ - - -class ChatCompletionTokenLogprob(OpenAIObject): - token: str - """The token.""" - - bytes: Optional[List[int]] = None - """A list of integers representing the UTF-8 bytes representation of the token. - - Useful in instances where characters are represented by multiple tokens and - their byte representations must be combined to generate the correct text - representation. Can be `null` if there is no bytes representation for the token. - """ - - logprob: float - """The log probability of this token, if it is within the top 20 most likely - tokens. - - Otherwise, the value `-9999.0` is used to signify that the token is very - unlikely. - """ - - top_logprobs: List[TopLogprob] - """List of the most likely tokens and their log probability, at this token - position. - - In rare cases, there may be fewer than the number of requested `top_logprobs` - returned. - """ - - -class ChoiceLogprobs(OpenAIObject): - content: Optional[List[ChatCompletionTokenLogprob]] = None - """A list of message content tokens with log probability information.""" - - -class FunctionCall(OpenAIObject): - arguments: str - name: Optional[str] = None - - -class Function(OpenAIObject): - arguments: str - name: Optional[str] = None - - def __init__( - self, - arguments: Union[Dict, str], - name: Optional[str] = None, - **params, - ): - if isinstance(arguments, Dict): - arguments = json.dumps(arguments) - else: - arguments = arguments - - name = name - - # Build a dictionary with the structure your BaseModel expects - data = {"arguments": arguments, "name": name, **params} - - super(Function, self).__init__(**data) - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - -class ChatCompletionDeltaToolCall(OpenAIObject): - id: Optional[str] = None - function: Function - type: Optional[str] = None - index: int - - -class HiddenParams(OpenAIObject): - original_response: Optional[str] = None - model_id: Optional[str] = None # used in Router for individual deployments - api_base: Optional[str] = None # returns api base used for making completion call - - model_config = ConfigDict(extra="allow", protected_namespaces=()) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - def json(self, **kwargs): - try: - return self.model_dump() # noqa - except: - # if using pydantic v1 - return self.dict() - - -class ChatCompletionMessageToolCall(OpenAIObject): - def __init__( - self, - function: Union[Dict, Function], - id: Optional[str] = None, - type: Optional[str] = None, - **params, - ): - super(ChatCompletionMessageToolCall, self).__init__(**params) - if isinstance(function, Dict): - self.function = Function(**function) - else: - self.function = function - - if id is not None: - self.id = id - else: - self.id = f"{uuid.uuid4()}" - - if type is not None: - self.type = type - else: - self.type = "function" - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - -class Message(OpenAIObject): - def __init__( - self, - content: Optional[str] = "default", - role="assistant", - logprobs=None, - function_call=None, - tool_calls=None, - **params, - ): - super(Message, self).__init__(**params) - self.content = content - self.role = role - if function_call is not None: - self.function_call = FunctionCall(**function_call) - - if tool_calls is not None: - self.tool_calls = [] - for tool_call in tool_calls: - self.tool_calls.append(ChatCompletionMessageToolCall(**tool_call)) - - if logprobs is not None: - self._logprobs = ChoiceLogprobs(**logprobs) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - def json(self, **kwargs): - try: - return self.model_dump() # noqa - except: - # if using pydantic v1 - return self.dict() - - -class Delta(OpenAIObject): - def __init__( - self, - content=None, - role=None, - function_call=None, - tool_calls=None, - **params, - ): - super(Delta, self).__init__(**params) - self.content = content - self.role = role - - if function_call is not None and isinstance(function_call, dict): - self.function_call = FunctionCall(**function_call) - else: - self.function_call = function_call - if tool_calls is not None and isinstance(tool_calls, list): - self.tool_calls = [] - for tool_call in tool_calls: - if isinstance(tool_call, dict): - if tool_call.get("index", None) is None: - tool_call["index"] = 0 - self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call)) - elif isinstance(tool_call, ChatCompletionDeltaToolCall): - self.tool_calls.append(tool_call) - else: - self.tool_calls = tool_calls - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - -class Choices(OpenAIObject): - def __init__( - self, - finish_reason=None, - index=0, - message: Optional[Union[Message, dict]] = None, - logprobs=None, - enhancements=None, - **params, - ): - super(Choices, self).__init__(**params) - if finish_reason is not None: - self.finish_reason = map_finish_reason( - finish_reason - ) # set finish_reason for all responses - else: - self.finish_reason = "stop" - self.index = index - if message is None: - self.message = Message() - else: - if isinstance(message, Message): - self.message = message - elif isinstance(message, dict): - self.message = Message(**message) - if logprobs is not None: - self.logprobs = logprobs - if enhancements is not None: - self.enhancements = enhancements - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - -class Usage(OpenAIObject): - def __init__( - self, prompt_tokens=None, completion_tokens=None, total_tokens=None, **params - ): - super(Usage, self).__init__(**params) - if prompt_tokens: - self.prompt_tokens = prompt_tokens - if completion_tokens: - self.completion_tokens = completion_tokens - if total_tokens: - self.total_tokens = total_tokens - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - -class StreamingChoices(OpenAIObject): - def __init__( - self, - finish_reason=None, - index=0, - delta: Optional[Delta] = None, - logprobs=None, - enhancements=None, - **params, - ): - super(StreamingChoices, self).__init__(**params) - if finish_reason: - self.finish_reason = finish_reason - else: - self.finish_reason = None - self.index = index - if delta is not None: - if isinstance(delta, Delta): - self.delta = delta - elif isinstance(delta, dict): - self.delta = Delta(**delta) - else: - self.delta = Delta() - if enhancements is not None: - self.enhancements = enhancements - - if logprobs is not None and isinstance(logprobs, dict): - self.logprobs = ChoiceLogprobs(**logprobs) - else: - self.logprobs = logprobs # type: ignore - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - -class ModelResponse(OpenAIObject): - id: str - """A unique identifier for the completion.""" - - choices: List[Union[Choices, StreamingChoices]] - """The list of completion choices the model generated for the input prompt.""" - - created: int - """The Unix timestamp (in seconds) of when the completion was created.""" - - model: Optional[str] = None - """The model used for completion.""" - - object: str - """The object type, which is always "text_completion" """ - - system_fingerprint: Optional[str] = None - """This fingerprint represents the backend configuration that the model runs with. - - Can be used in conjunction with the `seed` request parameter to understand when - backend changes have been made that might impact determinism. - """ - - _hidden_params: dict = {} - - def __init__( - self, - id=None, - choices=None, - created=None, - model=None, - object=None, - system_fingerprint=None, - usage=None, - stream=None, - stream_options=None, - response_ms=None, - hidden_params=None, - **params, - ): - if stream is not None and stream == True: - object = "chat.completion.chunk" - if choices is not None and isinstance(choices, list): - new_choices = [] - for choice in choices: - if isinstance(choice, StreamingChoices): - _new_choice = choice - elif isinstance(choice, dict): - _new_choice = StreamingChoices(**choice) - new_choices.append(_new_choice) - choices = new_choices - else: - choices = [StreamingChoices()] - else: - if model in litellm.open_ai_embedding_models: - object = "embedding" - else: - object = "chat.completion" - if choices is not None and isinstance(choices, list): - new_choices = [] - for choice in choices: - if isinstance(choice, Choices): - _new_choice = choice - elif isinstance(choice, dict): - _new_choice = Choices(**choice) - new_choices.append(_new_choice) - choices = new_choices - else: - choices = [Choices()] - if id is None: - id = _generate_id() - else: - id = id - if created is None: - created = int(time.time()) - else: - created = created - model = model - if usage is not None: - if isinstance(usage, dict): - usage = Usage(**usage) - else: - usage = usage - elif stream is None or stream == False: - usage = Usage() - if hidden_params: - self._hidden_params = hidden_params - - init_values = { - "id": id, - "choices": choices, - "created": created, - "model": model, - "object": object, - "system_fingerprint": system_fingerprint, - } - - if usage is not None: - init_values["usage"] = usage - - super().__init__( - **init_values, - **params, - ) - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - def json(self, **kwargs): - try: - return self.model_dump() # noqa - except: - # if using pydantic v1 - return self.dict() - - -class Embedding(OpenAIObject): - embedding: Union[list, str] = [] - index: int - object: str - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - -class EmbeddingResponse(OpenAIObject): - model: Optional[str] = None - """The model used for embedding.""" - - data: Optional[List] = None - """The actual embedding value""" - - object: str - """The object type, which is always "embedding" """ - - usage: Optional[Usage] = None - """Usage statistics for the embedding request.""" - - _hidden_params: dict = {} - - def __init__( - self, - model=None, - usage=None, - stream=False, - response_ms=None, - data=None, - **params, - ): - object = "list" - if response_ms: - _response_ms = response_ms - else: - _response_ms = None - if data: - data = data - else: - data = None - - if usage: - usage = usage - else: - usage = Usage() - - model = model - super().__init__(model=model, object=object, data=data, usage=usage) - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - def json(self, **kwargs): - try: - return self.model_dump() # noqa - except: - # if using pydantic v1 - return self.dict() - - -class Logprobs(OpenAIObject): - text_offset: List[int] - token_logprobs: List[float] - tokens: List[str] - top_logprobs: List[Dict[str, float]] - - -class TextChoices(OpenAIObject): - def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params): - super(TextChoices, self).__init__(**params) - if finish_reason: - self.finish_reason = map_finish_reason(finish_reason) - else: - self.finish_reason = None - self.index = index - if text is not None: - self.text = text - else: - self.text = None - if logprobs is None: - self.logprobs = None - else: - if isinstance(logprobs, dict): - self.logprobs = Logprobs(**logprobs) - else: - self.logprobs = logprobs - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - def json(self, **kwargs): - try: - return self.model_dump() # noqa - except: - # if using pydantic v1 - return self.dict() - - -class TextCompletionResponse(OpenAIObject): - """ - { - "id": response["id"], - "object": "text_completion", - "created": response["created"], - "model": response["model"], - "choices": [ - { - "text": response["choices"][0]["message"]["content"], - "index": response["choices"][0]["index"], - "logprobs": transformed_logprobs, - "finish_reason": response["choices"][0]["finish_reason"] - } - ], - "usage": response["usage"] - } - """ - - id: str - object: str - created: int - model: Optional[str] - choices: List[TextChoices] - usage: Optional[Usage] - _response_ms: Optional[int] = None - _hidden_params: HiddenParams - - def __init__( - self, - id=None, - choices=None, - created=None, - model=None, - usage=None, - stream=False, - response_ms=None, - object=None, - **params, - ): - if stream: - object = "text_completion.chunk" - choices = [TextChoices()] - else: - object = "text_completion" - if choices is not None and isinstance(choices, list): - new_choices = [] - for choice in choices: - if isinstance(choice, TextChoices): - _new_choice = choice - elif isinstance(choice, dict): - _new_choice = TextChoices(**choice) - new_choices.append(_new_choice) - choices = new_choices - else: - choices = [TextChoices()] - if object is not None: - object = object - if id is None: - id = _generate_id() - else: - id = id - if created is None: - created = int(time.time()) - else: - created = created - - model = model - if usage: - usage = usage - else: - usage = Usage() - - super(TextCompletionResponse, self).__init__( - id=id, - object=object, - created=created, - model=model, - choices=choices, - usage=usage, - **params, - ) - - if response_ms: - self._response_ms = response_ms - else: - self._response_ms = None - self._hidden_params = HiddenParams() - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - -class ImageObject(OpenAIObject): - """ - Represents the url or the content of an image generated by the OpenAI API. - - Attributes: - b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json. - url: The URL of the generated image, if response_format is url (default). - revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt. - - https://platform.openai.com/docs/api-reference/images/object - """ - - b64_json: Optional[str] = None - url: Optional[str] = None - revised_prompt: Optional[str] = None - - def __init__(self, b64_json=None, url=None, revised_prompt=None): - super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt) - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - def json(self, **kwargs): - try: - return self.model_dump() # noqa - except: - # if using pydantic v1 - return self.dict() - - -class ImageResponse(OpenAIObject): - created: Optional[int] = None - - data: Optional[List[ImageObject]] = None - - usage: Optional[dict] = None - - _hidden_params: dict = {} - - def __init__(self, created=None, data=None, response_ms=None): - if response_ms: - _response_ms = response_ms - else: - _response_ms = None - if data: - data = data - else: - data = None - - if created: - created = created - else: - created = None - - super().__init__(data=data, created=created) - self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - def json(self, **kwargs): - try: - return self.model_dump() # noqa - except: - # if using pydantic v1 - return self.dict() - - -class TranscriptionResponse(OpenAIObject): - text: Optional[str] = None - - _hidden_params: dict = {} - - def __init__(self, text=None): - super().__init__(text=text) - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - def json(self, **kwargs): - try: - return self.model_dump() # noqa - except: - # if using pydantic v1 - return self.dict() - - ############################################################ def print_verbose( print_statement, @@ -1156,1602 +231,6 @@ def print_verbose( ####### LOGGING ################### -from enum import Enum - - -class CallTypes(Enum): - embedding = "embedding" - aembedding = "aembedding" - completion = "completion" - acompletion = "acompletion" - atext_completion = "atext_completion" - text_completion = "text_completion" - image_generation = "image_generation" - aimage_generation = "aimage_generation" - moderation = "moderation" - amoderation = "amoderation" - atranscription = "atranscription" - transcription = "transcription" - aspeech = "aspeech" - speech = "speech" - - -# Logging function -> log the exact model details + what's being sent | Non-BlockingP -class Logging: - global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger - - custom_pricing: bool = False - stream_options = None - - def __init__( - self, - model, - messages, - stream, - call_type, - start_time, - litellm_call_id, - function_id, - dynamic_success_callbacks=None, - dynamic_failure_callbacks=None, - dynamic_async_success_callbacks=None, - langfuse_public_key=None, - langfuse_secret=None, - ): - if call_type not in [item.value for item in CallTypes]: - allowed_values = ", ".join([item.value for item in CallTypes]) - raise ValueError( - f"Invalid call_type {call_type}. Allowed values: {allowed_values}" - ) - if messages is not None: - if isinstance(messages, str): - messages = [ - {"role": "user", "content": messages} - ] # convert text completion input to the chat completion format - elif ( - isinstance(messages, list) - and len(messages) > 0 - and isinstance(messages[0], str) - ): - new_messages = [] - for m in messages: - new_messages.append({"role": "user", "content": m}) - messages = new_messages - self.model = model - self.messages = messages - self.stream = stream - self.start_time = start_time # log the call start time - self.call_type = call_type - self.litellm_call_id = litellm_call_id - self.function_id = function_id - self.streaming_chunks = [] # for generating complete stream response - self.sync_streaming_chunks = [] # for generating complete stream response - self.model_call_details = {} - self.dynamic_input_callbacks = [] # [TODO] callbacks set for just that call - self.dynamic_failure_callbacks = dynamic_failure_callbacks - self.dynamic_success_callbacks = ( - dynamic_success_callbacks # callbacks set for just that call - ) - self.dynamic_async_success_callbacks = ( - dynamic_async_success_callbacks # callbacks set for just that call - ) - ## DYNAMIC LANGFUSE KEYS ## - self.langfuse_public_key = langfuse_public_key - self.langfuse_secret = langfuse_secret - ## TIME TO FIRST TOKEN LOGGING ## - self.completion_start_time: Optional[datetime.datetime] = None - - def update_environment_variables( - self, model, user, optional_params, litellm_params, **additional_params - ): - self.optional_params = optional_params - self.model = model - self.user = user - self.litellm_params = litellm_params - self.logger_fn = litellm_params.get("logger_fn", None) - print_verbose(f"self.optional_params: {self.optional_params}") - - self.model_call_details = { - "model": self.model, - "messages": self.messages, - "optional_params": self.optional_params, - "litellm_params": self.litellm_params, - "start_time": self.start_time, - "stream": self.stream, - "user": user, - "call_type": str(self.call_type), - "litellm_call_id": self.litellm_call_id, - "completion_start_time": self.completion_start_time, - **self.optional_params, - **additional_params, - } - - ## check if stream options is set ## - used by CustomStreamWrapper for easy instrumentation - if "stream_options" in additional_params: - self.stream_options = additional_params["stream_options"] - ## check if custom pricing set ## - if ( - litellm_params.get("input_cost_per_token") is not None - or litellm_params.get("input_cost_per_second") is not None - or litellm_params.get("output_cost_per_token") is not None - or litellm_params.get("output_cost_per_second") is not None - ): - self.custom_pricing = True - - def _pre_call(self, input, api_key, model=None, additional_args={}): - """ - Common helper function across the sync + async pre-call function - """ - # print_verbose(f"logging pre call for model: {self.model} with call type: {self.call_type}") - self.model_call_details["input"] = input - self.model_call_details["api_key"] = api_key - self.model_call_details["additional_args"] = additional_args - self.model_call_details["log_event_type"] = "pre_api_call" - if ( - model - ): # if model name was changes pre-call, overwrite the initial model call name with the new one - self.model_call_details["model"] = model - - def pre_call(self, input, api_key, model=None, additional_args={}): - # Log the exact input to the LLM API - litellm.error_logs["PRE_CALL"] = locals() - try: - self._pre_call( - input=input, - api_key=api_key, - model=model, - additional_args=additional_args, - ) - - # User Logging -> if you pass in a custom logging function - headers = additional_args.get("headers", {}) - if headers is None: - headers = {} - data = additional_args.get("complete_input_dict", {}) - api_base = additional_args.get("api_base", "") - self.model_call_details["litellm_params"]["api_base"] = str( - api_base - ) # used for alerting - masked_headers = { - k: ( - (v[:-44] + "*" * 44) - if (isinstance(v, str) and len(v) > 44) - else "*****" - ) - for k, v in headers.items() - } - formatted_headers = " ".join( - [f"-H '{k}: {v}'" for k, v in masked_headers.items()] - ) - - verbose_logger.debug(f"PRE-API-CALL ADDITIONAL ARGS: {additional_args}") - - curl_command = "\n\nPOST Request Sent from LiteLLM:\n" - curl_command += "curl -X POST \\\n" - curl_command += f"{api_base} \\\n" - curl_command += ( - f"{formatted_headers} \\\n" if formatted_headers.strip() != "" else "" - ) - curl_command += f"-d '{str(data)}'\n" - if additional_args.get("request_str", None) is not None: - # print the sagemaker / bedrock client request - curl_command = "\nRequest Sent from LiteLLM:\n" - curl_command += additional_args.get("request_str", None) - elif api_base == "": - curl_command = self.model_call_details - - # only print verbose if verbose logger is not set - if verbose_logger.level == 0: - # this means verbose logger was not switched on - user is in litellm.set_verbose=True - print_verbose(f"\033[92m{curl_command}\033[0m\n") - - if litellm.json_logs: - verbose_logger.debug( - "POST Request Sent from LiteLLM", - extra={"api_base": {api_base}, **masked_headers}, - ) - else: - verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n") - # log raw request to provider (like LangFuse) -- if opted in. - if litellm.log_raw_request_response is True: - try: - # [Non-blocking Extra Debug Information in metadata] - _litellm_params = self.model_call_details.get("litellm_params", {}) - _metadata = _litellm_params.get("metadata", {}) or {} - if ( - litellm.turn_off_message_logging is not None - and litellm.turn_off_message_logging is True - ): - _metadata["raw_request"] = ( - "redacted by litellm. \ - 'litellm.turn_off_message_logging=True'" - ) - else: - _metadata["raw_request"] = str(curl_command) - except Exception as e: - _metadata["raw_request"] = ( - "Unable to Log \ - raw request: {}".format( - str(e) - ) - ) - if self.logger_fn and callable(self.logger_fn): - try: - self.logger_fn( - self.model_call_details - ) # Expectation: any logger function passed in by the user should accept a dict object - except Exception as e: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" - ) - # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made - callbacks = litellm.input_callback + self.dynamic_input_callbacks - for callback in callbacks: - try: - if callback == "supabase": - print_verbose("reaches supabase for logging!") - model = self.model_call_details["model"] - messages = self.model_call_details["input"] - print_verbose(f"supabaseClient: {supabaseClient}") - supabaseClient.input_log_event( - model=model, - messages=messages, - end_user=self.model_call_details.get("user", "default"), - litellm_call_id=self.litellm_params["litellm_call_id"], - print_verbose=print_verbose, - ) - elif callback == "sentry" and add_breadcrumb: - try: - details_to_log = copy.deepcopy(self.model_call_details) - except: - details_to_log = self.model_call_details - if litellm.turn_off_message_logging: - # make a copy of the _model_Call_details and log it - details_to_log.pop("messages", None) - details_to_log.pop("input", None) - details_to_log.pop("prompt", None) - - add_breadcrumb( - category="litellm.llm_call", - message=f"Model Call Details pre-call: {details_to_log}", - level="info", - ) - elif isinstance(callback, CustomLogger): # custom logger class - callback.log_pre_api_call( - model=self.model, - messages=self.messages, - kwargs=self.model_call_details, - ) - elif callable(callback): # custom logger functions - customLogger.log_input_event( - model=self.model, - messages=self.messages, - kwargs=self.model_call_details, - print_verbose=print_verbose, - callback_func=callback, - ) - except Exception as e: - verbose_logger.error( - "litellm.Logging.pre_call(): Exception occured - {}".format( - str(e) - ) - ) - verbose_logger.debug( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}" - ) - print_verbose( - f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" - ) - if capture_exception: # log this error to sentry for debugging - capture_exception(e) - except: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" - ) - print_verbose( - f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" - ) - if capture_exception: # log this error to sentry for debugging - capture_exception(e) - - def post_call( - self, original_response, input=None, api_key=None, additional_args={} - ): - # Log the exact result from the LLM API, for streaming - log the type of response received - litellm.error_logs["POST_CALL"] = locals() - if isinstance(original_response, dict): - original_response = json.dumps(original_response) - try: - self.model_call_details["input"] = input - self.model_call_details["api_key"] = api_key - self.model_call_details["original_response"] = original_response - self.model_call_details["additional_args"] = additional_args - self.model_call_details["log_event_type"] = "post_api_call" - # User Logging -> if you pass in a custom logging function - print_verbose( - f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n", - log_level="DEBUG", - ) - if self.logger_fn and callable(self.logger_fn): - try: - self.logger_fn( - self.model_call_details - ) # Expectation: any logger function passed in by the user should accept a dict object - except Exception as e: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" - ) - original_response = redact_message_input_output_from_logging( - litellm_logging_obj=self, result=original_response - ) - # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made - - callbacks = litellm.input_callback + self.dynamic_input_callbacks - for callback in callbacks: - try: - if callback == "lite_debugger": - print_verbose("reaches litedebugger for post-call logging!") - print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") - liteDebuggerClient.post_call_log_event( - original_response=original_response, - litellm_call_id=self.litellm_params["litellm_call_id"], - print_verbose=print_verbose, - call_type=self.call_type, - stream=self.stream, - ) - elif callback == "sentry" and add_breadcrumb: - print_verbose("reaches sentry breadcrumbing") - try: - details_to_log = copy.deepcopy(self.model_call_details) - except: - details_to_log = self.model_call_details - if litellm.turn_off_message_logging: - # make a copy of the _model_Call_details and log it - details_to_log.pop("messages", None) - details_to_log.pop("input", None) - details_to_log.pop("prompt", None) - - add_breadcrumb( - category="litellm.llm_call", - message=f"Model Call Details post-call: {details_to_log}", - level="info", - ) - elif isinstance(callback, CustomLogger): # custom logger class - callback.log_post_api_call( - kwargs=self.model_call_details, - response_obj=None, - start_time=self.start_time, - end_time=None, - ) - except Exception as e: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while post-call logging with integrations {traceback.format_exc()}" - ) - print_verbose( - f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" - ) - if capture_exception: # log this error to sentry for debugging - capture_exception(e) - except: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" - ) - pass - - def _success_handler_helper_fn( - self, result=None, start_time=None, end_time=None, cache_hit=None - ): - try: - if start_time is None: - start_time = self.start_time - if end_time is None: - end_time = datetime.datetime.now() - if self.completion_start_time is None: - self.completion_start_time = end_time - self.model_call_details["completion_start_time"] = ( - self.completion_start_time - ) - self.model_call_details["log_event_type"] = "successful_api_call" - self.model_call_details["end_time"] = end_time - self.model_call_details["cache_hit"] = cache_hit - ## if model in model cost map - log the response cost - ## else set cost to None - verbose_logger.debug(f"Model={self.model};") - if ( - result is not None - and ( - isinstance(result, ModelResponse) - or isinstance(result, EmbeddingResponse) - or isinstance(result, ImageResponse) - or isinstance(result, TranscriptionResponse) - or isinstance(result, TextCompletionResponse) - ) - and self.stream != True - ): # handle streaming separately - self.model_call_details["response_cost"] = ( - litellm.response_cost_calculator( - response_object=result, - model=self.model, - cache_hit=self.model_call_details.get("cache_hit", False), - custom_llm_provider=self.model_call_details.get( - "custom_llm_provider", None - ), - base_model=_get_base_model_from_metadata( - model_call_details=self.model_call_details - ), - call_type=self.call_type, - optional_params=self.optional_params, - ) - ) - else: # streaming chunks + image gen. - self.model_call_details["response_cost"] = None - - if ( - litellm.max_budget - and self.stream == False - and result is not None - and "content" in result - ): - time_diff = (end_time - start_time).total_seconds() - float_diff = float(time_diff) - litellm._current_cost += litellm.completion_cost( - model=self.model, - prompt="", - completion=result["content"], - total_time=float_diff, - ) - - return start_time, end_time, result - except Exception as e: - raise Exception(f"[Non-Blocking] LiteLLM.Success_Call Error: {str(e)}") - - def success_handler( - self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs - ): - print_verbose(f"Logging Details LiteLLM-Success Call: {cache_hit}") - start_time, end_time, result = self._success_handler_helper_fn( - start_time=start_time, - end_time=end_time, - result=result, - cache_hit=cache_hit, - ) - # print(f"original response in success handler: {self.model_call_details['original_response']}") - try: - print_verbose(f"success callbacks: {litellm.success_callback}") - ## BUILD COMPLETE STREAMED RESPONSE - complete_streaming_response = None - if self.stream and isinstance(result, ModelResponse): - if ( - result.choices[0].finish_reason is not None - ): # if it's the last chunk - self.sync_streaming_chunks.append(result) - # print_verbose(f"final set of received chunks: {self.sync_streaming_chunks}") - try: - complete_streaming_response = litellm.stream_chunk_builder( - self.sync_streaming_chunks, - messages=self.model_call_details.get("messages", None), - start_time=start_time, - end_time=end_time, - ) - except Exception as e: - print_verbose( - "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format( - str(e), traceback.format_exc() - ), - log_level="ERROR", - ) - complete_streaming_response = None - else: - self.sync_streaming_chunks.append(result) - - if complete_streaming_response is not None: - print_verbose( - f"Logging Details LiteLLM-Success Call streaming complete" - ) - self.model_call_details["complete_streaming_response"] = ( - complete_streaming_response - ) - self.model_call_details["response_cost"] = ( - litellm.response_cost_calculator( - response_object=complete_streaming_response, - model=self.model, - cache_hit=self.model_call_details.get("cache_hit", False), - custom_llm_provider=self.model_call_details.get( - "custom_llm_provider", None - ), - base_model=_get_base_model_from_metadata( - model_call_details=self.model_call_details - ), - call_type=self.call_type, - optional_params=self.optional_params, - ) - ) - if self.dynamic_success_callbacks is not None and isinstance( - self.dynamic_success_callbacks, list - ): - callbacks = self.dynamic_success_callbacks - ## keep the internal functions ## - for callback in litellm.success_callback: - if ( - isinstance(callback, CustomLogger) - and "_PROXY_" in callback.__class__.__name__ - ): - callbacks.append(callback) - else: - callbacks = litellm.success_callback - - result = redact_message_input_output_from_logging( - result=result, litellm_logging_obj=self - ) - - for callback in callbacks: - try: - litellm_params = self.model_call_details.get("litellm_params", {}) - if litellm_params.get("no-log", False) == True: - # proxy cost tracking cal backs should run - if not ( - isinstance(callback, CustomLogger) - and "_PROXY_" in callback.__class__.__name__ - ): - print_verbose("no-log request, skipping logging") - continue - if callback == "lite_debugger": - print_verbose("reaches lite_debugger for logging!") - print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") - print_verbose( - f"liteDebuggerClient details function {self.call_type} and stream set to {self.stream}" - ) - liteDebuggerClient.log_event( - end_user=kwargs.get("user", "default"), - response_obj=result, - start_time=start_time, - end_time=end_time, - litellm_call_id=self.litellm_call_id, - print_verbose=print_verbose, - call_type=self.call_type, - stream=self.stream, - ) - if callback == "promptlayer": - print_verbose("reaches promptlayer for logging!") - promptLayerLogger.log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - if callback == "supabase": - print_verbose("reaches supabase for logging!") - kwargs = self.model_call_details - - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - if "complete_streaming_response" not in kwargs: - continue - else: - print_verbose("reaches supabase for streaming logging!") - result = kwargs["complete_streaming_response"] - - model = kwargs["model"] - messages = kwargs["messages"] - optional_params = kwargs.get("optional_params", {}) - litellm_params = kwargs.get("litellm_params", {}) - supabaseClient.log_event( - model=model, - messages=messages, - end_user=optional_params.get("user", "default"), - response_obj=result, - start_time=start_time, - end_time=end_time, - litellm_call_id=litellm_params.get( - "litellm_call_id", str(uuid.uuid4()) - ), - print_verbose=print_verbose, - ) - if callback == "wandb": - print_verbose("reaches wandb for logging!") - weightsBiasesLogger.log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - if callback == "langsmith": - print_verbose("reaches langsmith for logging!") - if self.stream: - if "complete_streaming_response" not in kwargs: - continue - else: - print_verbose( - "reaches langsmith for streaming logging!" - ) - result = kwargs["complete_streaming_response"] - langsmithLogger.log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - if callback == "logfire": - global logfireLogger - verbose_logger.debug("reaches logfire for success logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - if "complete_streaming_response" not in kwargs: - continue - else: - print_verbose("reaches logfire for streaming logging!") - result = kwargs["complete_streaming_response"] - - logfireLogger.log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - level=LogfireLevel.INFO.value, - ) - - if callback == "lunary": - print_verbose("reaches lunary for logging!") - model = self.model - kwargs = self.model_call_details - - input = kwargs.get("messages", kwargs.get("input", None)) - - type = ( - "embed" - if self.call_type == CallTypes.embedding.value - else "llm" - ) - - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - if "complete_streaming_response" not in kwargs: - continue - else: - result = kwargs["complete_streaming_response"] - - lunaryLogger.log_event( - type=type, - kwargs=kwargs, - event="end", - model=model, - input=input, - user_id=kwargs.get("user", None), - # user_props=self.model_call_details.get("user_props", None), - extra=kwargs.get("optional_params", {}), - response_obj=result, - start_time=start_time, - end_time=end_time, - run_id=self.litellm_call_id, - print_verbose=print_verbose, - ) - if callback == "helicone": - print_verbose("reaches helicone for logging!") - model = self.model - messages = self.model_call_details["input"] - heliconeLogger.log_success( - model=model, - messages=messages, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - if callback == "langfuse": - global langFuseLogger - verbose_logger.debug("reaches langfuse for success logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - verbose_logger.debug( - f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" - ) - if complete_streaming_response is None: - continue - else: - print_verbose("reaches langfuse for streaming logging!") - result = kwargs["complete_streaming_response"] - if langFuseLogger is None or ( - ( - self.langfuse_public_key is not None - and self.langfuse_public_key - != langFuseLogger.public_key - ) - and ( - self.langfuse_public_key is not None - and self.langfuse_public_key - != langFuseLogger.public_key - ) - ): - langFuseLogger = LangFuseLogger( - langfuse_public_key=self.langfuse_public_key, - langfuse_secret=self.langfuse_secret, - ) - langFuseLogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - ) - if callback == "datadog": - global dataDogLogger - verbose_logger.debug("reaches datadog for success logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - verbose_logger.debug( - f"datadog: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" - ) - if complete_streaming_response is None: - continue - else: - print_verbose("reaches datadog for streaming logging!") - result = kwargs["complete_streaming_response"] - dataDogLogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - ) - if callback == "prometheus": - global prometheusLogger - verbose_logger.debug("reaches prometheus for success logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - verbose_logger.debug( - f"prometheus: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" - ) - if complete_streaming_response is None: - continue - else: - print_verbose( - "reaches prometheus for streaming logging!" - ) - result = kwargs["complete_streaming_response"] - prometheusLogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - ) - if callback == "generic": - global genericAPILogger - verbose_logger.debug("reaches langfuse for success logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - verbose_logger.debug( - f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" - ) - if complete_streaming_response is None: - continue - else: - print_verbose("reaches langfuse for streaming logging!") - result = kwargs["complete_streaming_response"] - if genericAPILogger is None: - genericAPILogger = GenericAPILogger() - genericAPILogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - ) - if callback == "clickhouse": - global clickHouseLogger - verbose_logger.debug("reaches clickhouse for success logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - verbose_logger.debug( - f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" - ) - if complete_streaming_response is None: - continue - else: - print_verbose( - "reaches clickhouse for streaming logging!" - ) - result = kwargs["complete_streaming_response"] - if clickHouseLogger is None: - clickHouseLogger = ClickhouseLogger() - clickHouseLogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - ) - if callback == "greenscale": - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - verbose_logger.debug( - f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" - ) - if complete_streaming_response is None: - continue - else: - print_verbose( - "reaches greenscale for streaming logging!" - ) - result = kwargs["complete_streaming_response"] - - greenscaleLogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - if callback == "cache" and litellm.cache is not None: - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - print_verbose("success_callback: reaches cache for logging!") - kwargs = self.model_call_details - if self.stream: - if "complete_streaming_response" not in kwargs: - print_verbose( - f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n" - ) - pass - else: - print_verbose( - "success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache" - ) - result = kwargs["complete_streaming_response"] - # only add to cache once we have a complete streaming response - litellm.cache.add_cache(result, **kwargs) - if callback == "athina": - deep_copy = {} - for k, v in self.model_call_details.items(): - deep_copy[k] = v - athinaLogger.log_event( - kwargs=deep_copy, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - if callback == "traceloop": - deep_copy = {} - for k, v in self.model_call_details.items(): - if k != "original_response": - deep_copy[k] = v - traceloopLogger.log_event( - kwargs=deep_copy, - response_obj=result, - start_time=start_time, - end_time=end_time, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - ) - if callback == "s3": - global s3Logger - if s3Logger is None: - s3Logger = S3Logger() - if self.stream: - if "complete_streaming_response" in self.model_call_details: - print_verbose( - "S3Logger Logger: Got Stream Event - Completed Stream Response" - ) - s3Logger.log_event( - kwargs=self.model_call_details, - response_obj=self.model_call_details[ - "complete_streaming_response" - ], - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - else: - print_verbose( - "S3Logger Logger: Got Stream Event - No complete stream response as yet" - ) - else: - s3Logger.log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - if ( - callback == "openmeter" - and self.model_call_details.get("litellm_params", {}).get( - "acompletion", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "aembedding", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "aimage_generation", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "atranscription", False - ) - == False - ): - global openMeterLogger - if openMeterLogger is None: - print_verbose("Instantiates openmeter client") - openMeterLogger = OpenMeterLogger() - if self.stream and complete_streaming_response is None: - openMeterLogger.log_stream_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) - else: - if self.stream and complete_streaming_response: - self.model_call_details["complete_response"] = ( - self.model_call_details.get( - "complete_streaming_response", {} - ) - ) - result = self.model_call_details["complete_response"] - openMeterLogger.log_success_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) - - if ( - isinstance(callback, CustomLogger) - and self.model_call_details.get("litellm_params", {}).get( - "acompletion", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "aembedding", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "aimage_generation", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "atranscription", False - ) - == False - ): # custom logger class - if self.stream and complete_streaming_response is None: - callback.log_stream_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) - else: - if self.stream and complete_streaming_response: - self.model_call_details["complete_response"] = ( - self.model_call_details.get( - "complete_streaming_response", {} - ) - ) - result = self.model_call_details["complete_response"] - callback.log_success_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) - if ( - callable(callback) == True - and self.model_call_details.get("litellm_params", {}).get( - "acompletion", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "aembedding", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "aimage_generation", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "atranscription", False - ) - == False - ): # custom logger functions - print_verbose( - f"success callbacks: Running Custom Callback Function" - ) - customLogger.log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - callback_func=callback, - ) - - except Exception as e: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging with integrations {traceback.format_exc()}" - ) - print_verbose( - f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" - ) - if capture_exception: # log this error to sentry for debugging - capture_exception(e) - except: - print_verbose( - "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {}\n{}".format( - str(e), traceback.format_exc() - ), - log_level="ERROR", - ) - pass - - async def async_success_handler( - self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs - ): - """ - Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions. - """ - print_verbose("Logging Details LiteLLM-Async Success Call") - start_time, end_time, result = self._success_handler_helper_fn( - start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit - ) - ## BUILD COMPLETE STREAMED RESPONSE - complete_streaming_response = None - if self.stream: - if result.choices[0].finish_reason is not None: # if it's the last chunk - self.streaming_chunks.append(result) - # verbose_logger.debug(f"final set of received chunks: {self.streaming_chunks}") - try: - complete_streaming_response = litellm.stream_chunk_builder( - self.streaming_chunks, - messages=self.model_call_details.get("messages", None), - start_time=start_time, - end_time=end_time, - ) - except Exception as e: - print_verbose( - "Error occurred building stream chunk in success logging: {}\n{}".format( - str(e), traceback.format_exc() - ), - log_level="ERROR", - ) - complete_streaming_response = None - else: - self.streaming_chunks.append(result) - if complete_streaming_response is not None: - print_verbose("Async success callbacks: Got a complete streaming response") - self.model_call_details["async_complete_streaming_response"] = ( - complete_streaming_response - ) - try: - if self.model_call_details.get("cache_hit", False) is True: - self.model_call_details["response_cost"] = 0.0 - else: - # check if base_model set on azure - base_model = _get_base_model_from_metadata( - model_call_details=self.model_call_details - ) - # base_model defaults to None if not set on model_info - self.model_call_details["response_cost"] = litellm.completion_cost( - completion_response=complete_streaming_response, - model=base_model, - ) - verbose_logger.debug( - f"Model={self.model}; cost={self.model_call_details['response_cost']}" - ) - except litellm.NotFoundError as e: - verbose_logger.error( - f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None" - ) - self.model_call_details["response_cost"] = None - - if self.dynamic_async_success_callbacks is not None and isinstance( - self.dynamic_async_success_callbacks, list - ): - callbacks = self.dynamic_async_success_callbacks - ## keep the internal functions ## - for callback in litellm._async_success_callback: - callback_name = "" - if isinstance(callback, CustomLogger): - callback_name = callback.__class__.__name__ - if callable(callback): - callback_name = callback.__name__ - if "_PROXY_" in callback_name: - callbacks.append(callback) - else: - callbacks = litellm._async_success_callback - - result = redact_message_input_output_from_logging( - result=result, litellm_logging_obj=self - ) - - for callback in callbacks: - # check if callback can run for this request - litellm_params = self.model_call_details.get("litellm_params", {}) - if litellm_params.get("no-log", False) == True: - # proxy cost tracking cal backs should run - if not ( - isinstance(callback, CustomLogger) - and "_PROXY_" in callback.__class__.__name__ - ): - print_verbose("no-log request, skipping logging") - continue - try: - if kwargs.get("no-log", False) == True: - print_verbose("no-log request, skipping logging") - continue - if callback == "cache" and litellm.cache is not None: - # set_cache once complete streaming response is built - print_verbose("async success_callback: reaches cache for logging!") - kwargs = self.model_call_details - if self.stream: - if "async_complete_streaming_response" not in kwargs: - print_verbose( - f"async success_callback: reaches cache for logging, there is no async_complete_streaming_response. Kwargs={kwargs}\n\n" - ) - pass - else: - print_verbose( - "async success_callback: reaches cache for logging, there is a async_complete_streaming_response. Adding to cache" - ) - result = kwargs["async_complete_streaming_response"] - # only add to cache once we have a complete streaming response - if litellm.cache is not None and not isinstance( - litellm.cache.cache, S3Cache - ): - await litellm.cache.async_add_cache(result, **kwargs) - else: - litellm.cache.add_cache(result, **kwargs) - if callback == "openmeter": - global openMeterLogger - if self.stream == True: - if ( - "async_complete_streaming_response" - in self.model_call_details - ): - await openMeterLogger.async_log_success_event( - kwargs=self.model_call_details, - response_obj=self.model_call_details[ - "async_complete_streaming_response" - ], - start_time=start_time, - end_time=end_time, - ) - else: - await openMeterLogger.async_log_stream_event( # [TODO]: move this to being an async log stream event function - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) - else: - await openMeterLogger.async_log_success_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) - if isinstance(callback, CustomLogger): # custom logger class - if self.stream == True: - if ( - "async_complete_streaming_response" - in self.model_call_details - ): - await callback.async_log_success_event( - kwargs=self.model_call_details, - response_obj=self.model_call_details[ - "async_complete_streaming_response" - ], - start_time=start_time, - end_time=end_time, - ) - else: - await callback.async_log_stream_event( # [TODO]: move this to being an async log stream event function - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) - else: - await callback.async_log_success_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) - if callable(callback): # custom logger functions - if self.stream: - if ( - "async_complete_streaming_response" - in self.model_call_details - ): - await customLogger.async_log_event( - kwargs=self.model_call_details, - response_obj=self.model_call_details[ - "async_complete_streaming_response" - ], - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - callback_func=callback, - ) - else: - await customLogger.async_log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - callback_func=callback, - ) - if callback == "dynamodb": - global dynamoLogger - if dynamoLogger is None: - dynamoLogger = DyanmoDBLogger() - if self.stream: - if ( - "async_complete_streaming_response" - in self.model_call_details - ): - print_verbose( - "DynamoDB Logger: Got Stream Event - Completed Stream Response" - ) - await dynamoLogger._async_log_event( - kwargs=self.model_call_details, - response_obj=self.model_call_details[ - "async_complete_streaming_response" - ], - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - else: - print_verbose( - "DynamoDB Logger: Got Stream Event - No complete stream response as yet" - ) - else: - await dynamoLogger._async_log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - except Exception as e: - verbose_logger.error( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}" - ) - pass - - def _failure_handler_helper_fn( - self, exception, traceback_exception, start_time=None, end_time=None - ): - if start_time is None: - start_time = self.start_time - if end_time is None: - end_time = datetime.datetime.now() - - # on some exceptions, model_call_details is not always initialized, this ensures that we still log those exceptions - if not hasattr(self, "model_call_details"): - self.model_call_details = {} - - self.model_call_details["log_event_type"] = "failed_api_call" - self.model_call_details["exception"] = exception - self.model_call_details["traceback_exception"] = traceback_exception - self.model_call_details["end_time"] = end_time - self.model_call_details.setdefault("original_response", None) - return start_time, end_time - - def failure_handler( - self, exception, traceback_exception, start_time=None, end_time=None - ): - print_verbose( - f"Logging Details LiteLLM-Failure Call: {litellm.failure_callback}" - ) - try: - start_time, end_time = self._failure_handler_helper_fn( - exception=exception, - traceback_exception=traceback_exception, - start_time=start_time, - end_time=end_time, - ) - callbacks = [] # init this to empty incase it's not created - - if self.dynamic_failure_callbacks is not None and isinstance( - self.dynamic_failure_callbacks, list - ): - callbacks = self.dynamic_failure_callbacks - ## keep the internal functions ## - for callback in litellm.failure_callback: - if ( - isinstance(callback, CustomLogger) - and "_PROXY_" in callback.__class__.__name__ - ): - callbacks.append(callback) - else: - callbacks = litellm.failure_callback - - result = None # result sent to all loggers, init this to None incase it's not created - - result = redact_message_input_output_from_logging( - result=result, litellm_logging_obj=self - ) - for callback in callbacks: - try: - if callback == "lite_debugger": - print_verbose("reaches lite_debugger for logging!") - print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") - result = { - "model": self.model, - "created": time.time(), - "error": traceback_exception, - "usage": { - "prompt_tokens": prompt_token_calculator( - self.model, messages=self.messages - ), - "completion_tokens": 0, - }, - } - liteDebuggerClient.log_event( - model=self.model, - messages=self.messages, - end_user=self.model_call_details.get("user", "default"), - response_obj=result, - start_time=start_time, - end_time=end_time, - litellm_call_id=self.litellm_call_id, - print_verbose=print_verbose, - call_type=self.call_type, - stream=self.stream, - ) - if callback == "lunary": - print_verbose("reaches lunary for logging error!") - - model = self.model - - input = self.model_call_details["input"] - - _type = ( - "embed" - if self.call_type == CallTypes.embedding.value - else "llm" - ) - - lunaryLogger.log_event( - type=_type, - event="error", - user_id=self.model_call_details.get("user", "default"), - model=model, - input=input, - error=traceback_exception, - run_id=self.litellm_call_id, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - if callback == "sentry": - print_verbose("sending exception to sentry") - if capture_exception: - capture_exception(exception) - else: - print_verbose( - f"capture exception not initialized: {capture_exception}" - ) - if callable(callback): # custom logger functions - customLogger.log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - callback_func=callback, - ) - if ( - isinstance(callback, CustomLogger) - and self.model_call_details.get("litellm_params", {}).get( - "acompletion", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "aembedding", False - ) - == False - ): # custom logger class - callback.log_failure_event( - start_time=start_time, - end_time=end_time, - response_obj=result, - kwargs=self.model_call_details, - ) - if callback == "langfuse": - global langFuseLogger - verbose_logger.debug("reaches langfuse for logging failure") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if langFuseLogger is None or ( - ( - self.langfuse_public_key is not None - and self.langfuse_public_key - != langFuseLogger.public_key - ) - and ( - self.langfuse_public_key is not None - and self.langfuse_public_key - != langFuseLogger.public_key - ) - ): - langFuseLogger = LangFuseLogger( - langfuse_public_key=self.langfuse_public_key, - langfuse_secret=self.langfuse_secret, - ) - langFuseLogger.log_event( - start_time=start_time, - end_time=end_time, - response_obj=None, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - status_message=str(exception), - level="ERROR", - kwargs=self.model_call_details, - ) - if callback == "traceloop": - traceloopLogger.log_event( - start_time=start_time, - end_time=end_time, - response_obj=None, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - status_message=str(exception), - level="ERROR", - kwargs=self.model_call_details, - ) - if callback == "prometheus": - global prometheusLogger - verbose_logger.debug("reaches prometheus for success logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - kwargs["exception"] = str(exception) - prometheusLogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - ) - - if callback == "logfire": - global logfireLogger - verbose_logger.debug("reaches logfire for failure logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - kwargs["exception"] = exception - - logfireLogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - level=LogfireLevel.ERROR.value, - print_verbose=print_verbose, - ) - except Exception as e: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}" - ) - print_verbose( - f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" - ) - if capture_exception: # log this error to sentry for debugging - capture_exception(e) - except Exception as e: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging {traceback.format_exc()}" - ) - pass - - async def async_failure_handler( - self, exception, traceback_exception, start_time=None, end_time=None - ): - """ - Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions. - """ - start_time, end_time = self._failure_handler_helper_fn( - exception=exception, - traceback_exception=traceback_exception, - start_time=start_time, - end_time=end_time, - ) - result = None # result sent to all loggers, init this to None incase it's not created - for callback in litellm._async_failure_callback: - try: - if isinstance(callback, CustomLogger): # custom logger class - await callback.async_log_failure_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) # type: ignore - if callable(callback): # custom logger functions - await customLogger.async_log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - callback_func=callback, - ) - except Exception as e: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}" - ) def exception_logging( @@ -2848,6 +327,11 @@ def _init_custom_logger_compatible_class( def function_setup( original_function: str, rules_obj, start_time, *args, **kwargs ): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc. + ### NOTICES ### + if litellm.set_verbose is True: + verbose_logger.warning( + "`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs." + ) try: global callback_list, add_breadcrumb, user_logger_fn, Logging function_id = kwargs["id"] if "id" in kwargs else None @@ -2888,7 +372,9 @@ def function_setup( + litellm.failure_callback ) ) - set_callbacks(callback_list=callback_list, function_id=function_id) + litellm.litellm_core_utils.litellm_logging.set_callbacks( + callback_list=callback_list, function_id=function_id + ) ## ASYNC CALLBACKS if len(litellm.input_callback) > 0: removed_async_items = [] @@ -3033,7 +519,7 @@ def function_setup( ): messages = kwargs.get("input", "speech") stream = True if "stream" in kwargs and kwargs["stream"] == True else False - logging_obj = Logging( + logging_obj = litellm.litellm_core_utils.litellm_logging.Logging( model=model, messages=messages, stream=stream, @@ -3191,7 +677,7 @@ def client(original_function): ) if previous_models is not None: if litellm.num_retries_per_request <= len(previous_models): - raise Exception(f"Max retries per request hit!") + raise Exception("Max retries per request hit!") # [OPTIONAL] CHECK CACHE print_verbose( @@ -3451,11 +937,6 @@ def client(original_function): logging_obj.failure_handler( e, traceback_exception, start_time, end_time ) # DO NOT MAKE THREADED - router retry fallback relies on this! - my_thread = threading.Thread( - target=handle_failure, - args=(e, traceback_exception, start_time, end_time, args, kwargs), - ) # don't interrupt execution of main thread - my_thread.start() if hasattr(e, "message"): if ( liteDebuggerClient and liteDebuggerClient.dashboard_url != None @@ -4323,229 +1804,6 @@ def token_counter( return num_tokens -def _cost_per_token_custom_pricing_helper( - prompt_tokens=0, - completion_tokens=0, - response_time_ms=None, - ### CUSTOM PRICING ### - custom_cost_per_token: Optional[CostPerToken] = None, - custom_cost_per_second: Optional[float] = None, -) -> Optional[Tuple[float, float]]: - """Internal helper function for calculating cost, if custom pricing given""" - if custom_cost_per_token is None and custom_cost_per_second is None: - return None - - if custom_cost_per_token is not None: - input_cost = custom_cost_per_token["input_cost_per_token"] * prompt_tokens - output_cost = custom_cost_per_token["output_cost_per_token"] * completion_tokens - return input_cost, output_cost - elif custom_cost_per_second is not None: - output_cost = custom_cost_per_second * response_time_ms / 1000 # type: ignore - return 0, output_cost - - return None - - -def cost_per_token( - model: str = "", - prompt_tokens=0, - completion_tokens=0, - response_time_ms=None, - custom_llm_provider=None, - region_name=None, - ### CUSTOM PRICING ### - custom_cost_per_token: Optional[CostPerToken] = None, - custom_cost_per_second: Optional[float] = None, -) -> Tuple[float, float]: - """ - Calculates the cost per token for a given model, prompt tokens, and completion tokens. - - Parameters: - model (str): The name of the model to use. Default is "" - prompt_tokens (int): The number of tokens in the prompt. - completion_tokens (int): The number of tokens in the completion. - response_time (float): The amount of time, in milliseconds, it took the call to complete. - custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list) - custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call. - custom_cost_per_second: Optional[float]: the cost per second for the llm api call. - - Returns: - tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively. - """ - if model is None: - raise Exception("Invalid arg. Model cannot be none.") - ## CUSTOM PRICING ## - response_cost = _cost_per_token_custom_pricing_helper( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - response_time_ms=response_time_ms, - custom_cost_per_second=custom_cost_per_second, - custom_cost_per_token=custom_cost_per_token, - ) - if response_cost is not None: - return response_cost[0], response_cost[1] - - # given - prompt_tokens_cost_usd_dollar: float = 0 - completion_tokens_cost_usd_dollar: float = 0 - model_cost_ref = litellm.model_cost - model_with_provider = model - if custom_llm_provider is not None: - model_with_provider = custom_llm_provider + "/" + model - if region_name is not None: - model_with_provider_and_region = ( - f"{custom_llm_provider}/{region_name}/{model}" - ) - if ( - model_with_provider_and_region in model_cost_ref - ): # use region based pricing, if it's available - model_with_provider = model_with_provider_and_region - - model_without_prefix = model - model_parts = model.split("/") - if len(model_parts) > 1: - model_without_prefix = model_parts[1] - else: - model_without_prefix = model - """ - Code block that formats model to lookup in litellm.model_cost - Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1 - Option2. model = "openai/gpt-4" - model = provider/model - Option3. model = "anthropic.claude-3" - model = model - """ - if ( - model_with_provider in model_cost_ref - ): # Option 2. use model with provider, model = "openai/gpt-4" - model = model_with_provider - elif model in model_cost_ref: # Option 1. use model passed, model="gpt-4" - model = model - elif ( - model_without_prefix in model_cost_ref - ): # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3" - model = model_without_prefix - - # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models - print_verbose(f"Looking up model={model} in model_cost_map") - if model in model_cost_ref: - print_verbose(f"Success: model={model} in model_cost_map") - print_verbose( - f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}" - ) - if ( - model_cost_ref[model].get("input_cost_per_token", None) is not None - and model_cost_ref[model].get("output_cost_per_token", None) is not None - ): - ## COST PER TOKEN ## - prompt_tokens_cost_usd_dollar = ( - model_cost_ref[model]["input_cost_per_token"] * prompt_tokens - ) - completion_tokens_cost_usd_dollar = ( - model_cost_ref[model]["output_cost_per_token"] * completion_tokens - ) - elif ( - model_cost_ref[model].get("output_cost_per_second", None) is not None - and response_time_ms is not None - ): - print_verbose( - f"For model={model} - output_cost_per_second: {model_cost_ref[model].get('output_cost_per_second')}; response time: {response_time_ms}" - ) - ## COST PER SECOND ## - prompt_tokens_cost_usd_dollar = 0 - completion_tokens_cost_usd_dollar = ( - model_cost_ref[model]["output_cost_per_second"] - * response_time_ms - / 1000 - ) - elif ( - model_cost_ref[model].get("input_cost_per_second", None) is not None - and response_time_ms is not None - ): - print_verbose( - f"For model={model} - input_cost_per_second: {model_cost_ref[model].get('input_cost_per_second')}; response time: {response_time_ms}" - ) - ## COST PER SECOND ## - prompt_tokens_cost_usd_dollar = ( - model_cost_ref[model]["input_cost_per_second"] * response_time_ms / 1000 - ) - completion_tokens_cost_usd_dollar = 0.0 - print_verbose( - f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}" - ) - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar - elif "ft:gpt-3.5-turbo" in model: - print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM") - # fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm - prompt_tokens_cost_usd_dollar = ( - model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens - ) - completion_tokens_cost_usd_dollar = ( - model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"] - * completion_tokens - ) - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar - elif "ft:davinci-002" in model: - print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM") - # fuzzy match ft:davinci-002:abcd-id-cool-litellm - prompt_tokens_cost_usd_dollar = ( - model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens - ) - completion_tokens_cost_usd_dollar = ( - model_cost_ref["ft:davinci-002"]["output_cost_per_token"] - * completion_tokens - ) - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar - elif "ft:babbage-002" in model: - print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM") - # fuzzy match ft:babbage-002:abcd-id-cool-litellm - prompt_tokens_cost_usd_dollar = ( - model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens - ) - completion_tokens_cost_usd_dollar = ( - model_cost_ref["ft:babbage-002"]["output_cost_per_token"] - * completion_tokens - ) - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar - elif model in litellm.azure_llms: - verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM") - model = litellm.azure_llms[model] - verbose_logger.debug( - f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}" - ) - prompt_tokens_cost_usd_dollar = ( - model_cost_ref[model]["input_cost_per_token"] * prompt_tokens - ) - verbose_logger.debug( - f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}" - ) - completion_tokens_cost_usd_dollar = ( - model_cost_ref[model]["output_cost_per_token"] * completion_tokens - ) - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar - elif model in litellm.azure_embedding_models: - verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model") - model = litellm.azure_embedding_models[model] - prompt_tokens_cost_usd_dollar = ( - model_cost_ref[model]["input_cost_per_token"] * prompt_tokens - ) - completion_tokens_cost_usd_dollar = ( - model_cost_ref[model]["output_cost_per_token"] * completion_tokens - ) - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar - else: - # if model is not in model_prices_and_context_window.json. Raise an exception-let users know - error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n" - raise litellm.exceptions.NotFoundError( # type: ignore - message=error_str, - model=model, - response=httpx.Response( - status_code=404, - content=error_str, - request=httpx.Request(method="cost_per_token", url="https://github.com/BerriAI/litellm"), # type: ignore - ), - llm_provider="", - ) - - def supports_httpx_timeout(custom_llm_provider: str) -> bool: """ Helper function to know if a provider implementation supports httpx timeout @@ -7513,264 +4771,6 @@ def validate_environment(model: Optional[str] = None) -> dict: return {"keys_in_environment": keys_in_environment, "missing_keys": missing_keys} -def set_callbacks(callback_list, function_id=None): - global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger - - try: - for callback in callback_list: - print_verbose(f"init callback list: {callback}") - if callback == "sentry": - try: - import sentry_sdk - except ImportError: - print_verbose("Package 'sentry_sdk' is missing. Installing it...") - subprocess.check_call( - [sys.executable, "-m", "pip", "install", "sentry_sdk"] - ) - import sentry_sdk - sentry_sdk_instance = sentry_sdk - sentry_trace_rate = ( - os.environ.get("SENTRY_API_TRACE_RATE") - if "SENTRY_API_TRACE_RATE" in os.environ - else "1.0" - ) - sentry_sdk_instance.init( - dsn=os.environ.get("SENTRY_DSN"), - traces_sample_rate=float(sentry_trace_rate), - ) - capture_exception = sentry_sdk_instance.capture_exception - add_breadcrumb = sentry_sdk_instance.add_breadcrumb - elif callback == "posthog": - try: - from posthog import Posthog - except ImportError: - print_verbose("Package 'posthog' is missing. Installing it...") - subprocess.check_call( - [sys.executable, "-m", "pip", "install", "posthog"] - ) - from posthog import Posthog - posthog = Posthog( - project_api_key=os.environ.get("POSTHOG_API_KEY"), - host=os.environ.get("POSTHOG_API_URL"), - ) - elif callback == "slack": - try: - from slack_bolt import App - except ImportError: - print_verbose("Package 'slack_bolt' is missing. Installing it...") - subprocess.check_call( - [sys.executable, "-m", "pip", "install", "slack_bolt"] - ) - from slack_bolt import App - slack_app = App( - token=os.environ.get("SLACK_API_TOKEN"), - signing_secret=os.environ.get("SLACK_API_SECRET"), - ) - alerts_channel = os.environ["SLACK_API_CHANNEL"] - print_verbose(f"Initialized Slack App: {slack_app}") - elif callback == "traceloop": - traceloopLogger = TraceloopLogger() - elif callback == "athina": - athinaLogger = AthinaLogger() - print_verbose("Initialized Athina Logger") - elif callback == "helicone": - heliconeLogger = HeliconeLogger() - elif callback == "lunary": - lunaryLogger = LunaryLogger() - elif callback == "promptlayer": - promptLayerLogger = PromptLayerLogger() - elif callback == "langfuse": - langFuseLogger = LangFuseLogger() - elif callback == "openmeter": - openMeterLogger = OpenMeterLogger() - elif callback == "datadog": - dataDogLogger = DataDogLogger() - elif callback == "prometheus": - if prometheusLogger is None: - prometheusLogger = PrometheusLogger() - elif callback == "dynamodb": - dynamoLogger = DyanmoDBLogger() - elif callback == "s3": - s3Logger = S3Logger() - elif callback == "wandb": - weightsBiasesLogger = WeightsBiasesLogger() - elif callback == "langsmith": - langsmithLogger = LangsmithLogger() - elif callback == "logfire": - logfireLogger = LogfireLogger() - elif callback == "aispend": - aispendLogger = AISpendLogger() - elif callback == "berrispend": - berrispendLogger = BerriSpendLogger() - elif callback == "supabase": - print_verbose(f"instantiating supabase") - supabaseClient = Supabase() - elif callback == "greenscale": - greenscaleLogger = GreenscaleLogger() - print_verbose("Initialized Greenscale Logger") - elif callback == "lite_debugger": - print_verbose(f"instantiating lite_debugger") - if function_id: - liteDebuggerClient = LiteDebugger(email=function_id) - elif litellm.token: - liteDebuggerClient = LiteDebugger(email=litellm.token) - elif litellm.email: - liteDebuggerClient = LiteDebugger(email=litellm.email) - else: - liteDebuggerClient = LiteDebugger(email=str(uuid.uuid4())) - elif callable(callback): - customLogger = CustomLogger() - except Exception as e: - raise e - - -# NOTE: DEPRECATING this in favor of using failure_handler() in Logging: -def handle_failure(exception, traceback_exception, start_time, end_time, args, kwargs): - global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger - try: - # print_verbose(f"handle_failure args: {args}") - # print_verbose(f"handle_failure kwargs: {kwargs}") - - success_handler = additional_details.pop("success_handler", None) - failure_handler = additional_details.pop("failure_handler", None) - - additional_details["Event_Name"] = additional_details.pop( - "failed_event_name", "litellm.failed_query" - ) - print_verbose(f"self.failure_callback: {litellm.failure_callback}") - for callback in litellm.failure_callback: - try: - if callback == "slack": - slack_msg = "" - if len(kwargs) > 0: - for key in kwargs: - slack_msg += f"{key}: {kwargs[key]}\n" - if len(args) > 0: - for i, arg in enumerate(args): - slack_msg += f"LiteLLM_Args_{str(i)}: {arg}" - for detail in additional_details: - slack_msg += f"{detail}: {additional_details[detail]}\n" - slack_msg += f"Traceback: {traceback_exception}" - truncated_slack_msg = textwrap.shorten( - slack_msg, width=512, placeholder="..." - ) - slack_app.client.chat_postMessage( - channel=alerts_channel, text=truncated_slack_msg - ) - elif callback == "sentry": - capture_exception(exception) - elif callback == "posthog": - print_verbose( - f"inside posthog, additional_details: {len(additional_details.keys())}" - ) - ph_obj = {} - if len(kwargs) > 0: - ph_obj = kwargs - if len(args) > 0: - for i, arg in enumerate(args): - ph_obj["litellm_args_" + str(i)] = arg - for detail in additional_details: - ph_obj[detail] = additional_details[detail] - event_name = additional_details["Event_Name"] - print_verbose(f"ph_obj: {ph_obj}") - print_verbose(f"PostHog Event Name: {event_name}") - if "user_id" in additional_details: - posthog.capture( - additional_details["user_id"], event_name, ph_obj - ) - else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python - unique_id = str(uuid.uuid4()) - posthog.capture(unique_id, event_name) - print_verbose(f"successfully logged to PostHog!") - elif callback == "berrispend": - print_verbose("reaches berrispend for logging!") - model = args[0] if len(args) > 0 else kwargs["model"] - messages = args[1] if len(args) > 1 else kwargs["messages"] - result = { - "model": model, - "created": time.time(), - "error": traceback_exception, - "usage": { - "prompt_tokens": prompt_token_calculator( - model, messages=messages - ), - "completion_tokens": 0, - }, - } - berrispendLogger.log_event( - model=model, - messages=messages, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - elif callback == "aispend": - print_verbose("reaches aispend for logging!") - model = args[0] if len(args) > 0 else kwargs["model"] - messages = args[1] if len(args) > 1 else kwargs["messages"] - result = { - "model": model, - "created": time.time(), - "usage": { - "prompt_tokens": prompt_token_calculator( - model, messages=messages - ), - "completion_tokens": 0, - }, - } - aispendLogger.log_event( - model=model, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - elif callback == "supabase": - print_verbose("reaches supabase for logging!") - print_verbose(f"supabaseClient: {supabaseClient}") - model = args[0] if len(args) > 0 else kwargs["model"] - messages = args[1] if len(args) > 1 else kwargs["messages"] - result = { - "model": model, - "created": time.time(), - "error": traceback_exception, - "usage": { - "prompt_tokens": prompt_token_calculator( - model, messages=messages - ), - "completion_tokens": 0, - }, - } - supabaseClient.log_event( - model=model, - messages=messages, - end_user=kwargs.get("user", "default"), - response_obj=result, - start_time=start_time, - end_time=end_time, - litellm_call_id=kwargs["litellm_call_id"], - print_verbose=print_verbose, - ) - except: - print_verbose( - f"Error Occurred while logging failure: {traceback.format_exc()}" - ) - pass - - if failure_handler and callable(failure_handler): - call_details = { - "exception": exception, - "additional_details": additional_details, - } - failure_handler(call_details) - pass - except Exception as e: - # LOGGING - exception_logging(logger_fn=user_logger_fn, exception=e) - pass - - async def convert_to_streaming_response_async(response_object: Optional[dict] = None): """ Asynchronously converts a response object to a streaming response. diff --git a/poetry.lock b/poetry.lock index ac946690c..750def101 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2174,7 +2174,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -3198,4 +3197,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi- [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0, !=3.9.7" -content-hash = "73054c657782120d170dc168ef07b494a916f1f810ff9c2b0ac878bd857a9dac" +content-hash = "62156f0fa65f39f36576ef6ed91d773658399757111dd4b0660e1ce2a58ea7b2"