From 4f9120553058f620000b9e1b5e506462f44dd3f7 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 15 Jun 2024 10:57:20 -0700 Subject: [PATCH] refactor(utils.py): refactor Logging to it's own class. Cut down utils.py to <10k lines. Easier debugging Reference: https://github.com/BerriAI/litellm/issues/4206 --- .pre-commit-config.yaml | 14 +- litellm/__init__.py | 8 +- litellm/_logging.py | 9 +- litellm/cost_calculator.py | 221 +- litellm/litellm_core_utils/core_helpers.py | 41 + litellm/litellm_core_utils/litellm_logging.py | 3215 +++++++++++++++++ litellm/litellm_core_utils/redact_messages.py | 4 +- litellm/llms/anthropic.py | 8 +- litellm/llms/base.py | 2 +- litellm/llms/bedrock.py | 16 +- litellm/llms/bedrock_httpx.py | 72 +- litellm/llms/databricks.py | 4 +- litellm/llms/predibase.py | 4 +- litellm/llms/triton.py | 1 - litellm/llms/vertex_ai.py | 3 +- litellm/llms/vertex_ai_anthropic.py | 3 +- litellm/llms/vertex_httpx.py | 7 +- litellm/proxy/utils.py | 6 +- litellm/types/utils.py | 910 +++++ litellm/utils.py | 2932 +-------------- 20 files changed, 4517 insertions(+), 2963 deletions(-) create mode 100644 litellm/litellm_core_utils/core_helpers.py create mode 100644 litellm/litellm_core_utils/litellm_logging.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2d85031b5..74f165bdd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,10 +24,10 @@ repos: language: system types: [python] files: ^litellm/ - # - id: check-file-length - # name: Check file length - # entry: python check_file_length.py - # args: ["10000"] # set your desired maximum number of lines - # language: python - # files: litellm/.*\.py - # exclude: ^litellm/tests/ \ No newline at end of file + - id: check-file-length + name: Check file length + entry: python check_file_length.py + args: ["10000"] # set your desired maximum number of lines + language: python + files: litellm/.*\.py + exclude: ^litellm/tests/ \ No newline at end of file diff --git a/litellm/__init__.py b/litellm/__init__.py index 6ecf70d0d..4c9baac19 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -723,12 +723,10 @@ from .utils import ( token_counter, create_pretrained_tokenizer, create_tokenizer, - cost_per_token, supports_function_calling, supports_parallel_function_calling, supports_vision, get_litellm_params, - Logging, acreate, get_model_list, get_max_tokens, @@ -748,9 +746,10 @@ from .utils import ( get_first_chars_messages, ModelResponse, ImageResponse, - ImageObject, get_provider_fields, ) + +from .types.utils import ImageObject from .llms.huggingface_restapi import HuggingfaceConfig from .llms.anthropic import AnthropicConfig from .llms.databricks import DatabricksConfig, DatabricksEmbeddingConfig @@ -827,4 +826,5 @@ from .router import Router from .assistants.main import * from .batches.main import * from .scheduler import * -from .cost_calculator import response_cost_calculator +from .cost_calculator import response_cost_calculator, cost_per_token +from litellm.litellm_core_utils.litellm_logging import Logging diff --git a/litellm/_logging.py b/litellm/_logging.py index ab7a08f97..52a445b49 100644 --- a/litellm/_logging.py +++ b/litellm/_logging.py @@ -3,10 +3,17 @@ from logging import Formatter import traceback set_verbose = False + +if set_verbose is True: + logging.warning( + "`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs." + ) json_logs = bool(os.getenv("JSON_LOGS", False)) # Create a handler for the logger (you may need to adapt this based on your needs) +log_level = os.getenv("LITELLM_LOG", "ERROR") +numeric_level: str = getattr(logging, log_level.upper()) handler = logging.StreamHandler() -handler.setLevel(logging.DEBUG) +handler.setLevel(numeric_level) class JsonFormatter(Formatter): diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index d1e2dab52..c84df53e8 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -1,6 +1,6 @@ # What is this? ## File for 'response_cost' calculation in Logging -from typing import Optional, Union, Literal, List +from typing import Optional, Union, Literal, List, Tuple import litellm._logging from litellm.utils import ( ModelResponse, @@ -9,7 +9,6 @@ from litellm.utils import ( TranscriptionResponse, TextCompletionResponse, CallTypes, - cost_per_token, print_verbose, CostPerToken, token_counter, @@ -18,6 +17,224 @@ import litellm from litellm import verbose_logger +def _cost_per_token_custom_pricing_helper( + prompt_tokens=0, + completion_tokens=0, + response_time_ms=None, + ### CUSTOM PRICING ### + custom_cost_per_token: Optional[CostPerToken] = None, + custom_cost_per_second: Optional[float] = None, +) -> Optional[Tuple[float, float]]: + """Internal helper function for calculating cost, if custom pricing given""" + if custom_cost_per_token is None and custom_cost_per_second is None: + return None + + if custom_cost_per_token is not None: + input_cost = custom_cost_per_token["input_cost_per_token"] * prompt_tokens + output_cost = custom_cost_per_token["output_cost_per_token"] * completion_tokens + return input_cost, output_cost + elif custom_cost_per_second is not None: + output_cost = custom_cost_per_second * response_time_ms / 1000 # type: ignore + return 0, output_cost + + return None + + +def cost_per_token( + model: str = "", + prompt_tokens=0, + completion_tokens=0, + response_time_ms=None, + custom_llm_provider=None, + region_name=None, + ### CUSTOM PRICING ### + custom_cost_per_token: Optional[CostPerToken] = None, + custom_cost_per_second: Optional[float] = None, +) -> Tuple[float, float]: + """ + Calculates the cost per token for a given model, prompt tokens, and completion tokens. + + Parameters: + model (str): The name of the model to use. Default is "" + prompt_tokens (int): The number of tokens in the prompt. + completion_tokens (int): The number of tokens in the completion. + response_time (float): The amount of time, in milliseconds, it took the call to complete. + custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list) + custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call. + custom_cost_per_second: Optional[float]: the cost per second for the llm api call. + + Returns: + tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively. + """ + if model is None: + raise Exception("Invalid arg. Model cannot be none.") + ## CUSTOM PRICING ## + response_cost = _cost_per_token_custom_pricing_helper( + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + response_time_ms=response_time_ms, + custom_cost_per_second=custom_cost_per_second, + custom_cost_per_token=custom_cost_per_token, + ) + if response_cost is not None: + return response_cost[0], response_cost[1] + + # given + prompt_tokens_cost_usd_dollar: float = 0 + completion_tokens_cost_usd_dollar: float = 0 + model_cost_ref = litellm.model_cost + model_with_provider = model + if custom_llm_provider is not None: + model_with_provider = custom_llm_provider + "/" + model + if region_name is not None: + model_with_provider_and_region = ( + f"{custom_llm_provider}/{region_name}/{model}" + ) + if ( + model_with_provider_and_region in model_cost_ref + ): # use region based pricing, if it's available + model_with_provider = model_with_provider_and_region + + model_without_prefix = model + model_parts = model.split("/") + if len(model_parts) > 1: + model_without_prefix = model_parts[1] + else: + model_without_prefix = model + """ + Code block that formats model to lookup in litellm.model_cost + Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1 + Option2. model = "openai/gpt-4" - model = provider/model + Option3. model = "anthropic.claude-3" - model = model + """ + if ( + model_with_provider in model_cost_ref + ): # Option 2. use model with provider, model = "openai/gpt-4" + model = model_with_provider + elif model in model_cost_ref: # Option 1. use model passed, model="gpt-4" + model = model + elif ( + model_without_prefix in model_cost_ref + ): # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3" + model = model_without_prefix + + # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models + print_verbose(f"Looking up model={model} in model_cost_map") + if model in model_cost_ref: + print_verbose(f"Success: model={model} in model_cost_map") + print_verbose( + f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}" + ) + if ( + model_cost_ref[model].get("input_cost_per_token", None) is not None + and model_cost_ref[model].get("output_cost_per_token", None) is not None + ): + ## COST PER TOKEN ## + prompt_tokens_cost_usd_dollar = ( + model_cost_ref[model]["input_cost_per_token"] * prompt_tokens + ) + completion_tokens_cost_usd_dollar = ( + model_cost_ref[model]["output_cost_per_token"] * completion_tokens + ) + elif ( + model_cost_ref[model].get("output_cost_per_second", None) is not None + and response_time_ms is not None + ): + print_verbose( + f"For model={model} - output_cost_per_second: {model_cost_ref[model].get('output_cost_per_second')}; response time: {response_time_ms}" + ) + ## COST PER SECOND ## + prompt_tokens_cost_usd_dollar = 0 + completion_tokens_cost_usd_dollar = ( + model_cost_ref[model]["output_cost_per_second"] + * response_time_ms + / 1000 + ) + elif ( + model_cost_ref[model].get("input_cost_per_second", None) is not None + and response_time_ms is not None + ): + print_verbose( + f"For model={model} - input_cost_per_second: {model_cost_ref[model].get('input_cost_per_second')}; response time: {response_time_ms}" + ) + ## COST PER SECOND ## + prompt_tokens_cost_usd_dollar = ( + model_cost_ref[model]["input_cost_per_second"] * response_time_ms / 1000 + ) + completion_tokens_cost_usd_dollar = 0.0 + print_verbose( + f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}" + ) + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + elif "ft:gpt-3.5-turbo" in model: + print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM") + # fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm + prompt_tokens_cost_usd_dollar = ( + model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens + ) + completion_tokens_cost_usd_dollar = ( + model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"] + * completion_tokens + ) + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + elif "ft:davinci-002" in model: + print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM") + # fuzzy match ft:davinci-002:abcd-id-cool-litellm + prompt_tokens_cost_usd_dollar = ( + model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens + ) + completion_tokens_cost_usd_dollar = ( + model_cost_ref["ft:davinci-002"]["output_cost_per_token"] + * completion_tokens + ) + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + elif "ft:babbage-002" in model: + print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM") + # fuzzy match ft:babbage-002:abcd-id-cool-litellm + prompt_tokens_cost_usd_dollar = ( + model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens + ) + completion_tokens_cost_usd_dollar = ( + model_cost_ref["ft:babbage-002"]["output_cost_per_token"] + * completion_tokens + ) + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + elif model in litellm.azure_llms: + verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM") + model = litellm.azure_llms[model] + verbose_logger.debug( + f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}" + ) + prompt_tokens_cost_usd_dollar = ( + model_cost_ref[model]["input_cost_per_token"] * prompt_tokens + ) + verbose_logger.debug( + f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}" + ) + completion_tokens_cost_usd_dollar = ( + model_cost_ref[model]["output_cost_per_token"] * completion_tokens + ) + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + elif model in litellm.azure_embedding_models: + verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model") + model = litellm.azure_embedding_models[model] + prompt_tokens_cost_usd_dollar = ( + model_cost_ref[model]["input_cost_per_token"] * prompt_tokens + ) + completion_tokens_cost_usd_dollar = ( + model_cost_ref[model]["output_cost_per_token"] * completion_tokens + ) + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + else: + # if model is not in model_prices_and_context_window.json. Raise an exception-let users know + error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n" + raise litellm.exceptions.NotFoundError( # type: ignore + message=error_str, + model=model, + llm_provider="", + ) + + # Extract the number of billion parameters from the model name # only used for together_computer LLMs def get_model_params_and_category(model_name) -> str: diff --git a/litellm/litellm_core_utils/core_helpers.py b/litellm/litellm_core_utils/core_helpers.py new file mode 100644 index 000000000..7b911895d --- /dev/null +++ b/litellm/litellm_core_utils/core_helpers.py @@ -0,0 +1,41 @@ +# What is this? +## Helper utilities for the model response objects + + +def map_finish_reason( + finish_reason: str, +): # openai supports 5 stop sequences - 'stop', 'length', 'function_call', 'content_filter', 'null' + # anthropic mapping + if finish_reason == "stop_sequence": + return "stop" + # cohere mapping - https://docs.cohere.com/reference/generate + elif finish_reason == "COMPLETE": + return "stop" + elif finish_reason == "MAX_TOKENS": # cohere + vertex ai + return "length" + elif finish_reason == "ERROR_TOXIC": + return "content_filter" + elif ( + finish_reason == "ERROR" + ): # openai currently doesn't support an 'error' finish reason + return "stop" + # huggingface mapping https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/generate_stream + elif finish_reason == "eos_token" or finish_reason == "stop_sequence": + return "stop" + elif ( + finish_reason == "FINISH_REASON_UNSPECIFIED" or finish_reason == "STOP" + ): # vertex ai - got from running `print(dir(response_obj.candidates[0].finish_reason))`: ['FINISH_REASON_UNSPECIFIED', 'MAX_TOKENS', 'OTHER', 'RECITATION', 'SAFETY', 'STOP',] + return "stop" + elif finish_reason == "SAFETY": # vertex ai + return "content_filter" + elif finish_reason == "STOP": # vertex ai + return "stop" + elif finish_reason == "end_turn" or finish_reason == "stop_sequence": # anthropic + return "stop" + elif finish_reason == "max_tokens": # anthropic + return "length" + elif finish_reason == "tool_use": # anthropic + return "tool_calls" + elif finish_reason == "content_filtered": + return "content_filter" + return finish_reason diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py new file mode 100644 index 000000000..ab9874fdc --- /dev/null +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -0,0 +1,3215 @@ +# What is this? +## Common Utility file for Logging handler +# Logging function -> log the exact model details + what's being sent | Non-Blocking +from litellm.types.utils import CallTypes +from typing import Optional +import datetime +from litellm import ( + verbose_logger, + json_logs, + log_raw_request_response, + turn_off_message_logging, +) +import traceback +import litellm +import copy +from litellm.integrations.custom_logger import CustomLogger +import json +import time +from litellm.utils import ( + redact_message_input_output_from_logging, + _get_base_model_from_metadata, + supabaseClient, + liteDebuggerClient, + promptLayerLogger, + weightsBiasesLogger, + langsmithLogger, + logfireLogger, + capture_exception, + add_breadcrumb, + lunaryLogger, + prometheusLogger, + LogfireLevel, + print_verbose, + customLogger, + prompt_token_calculator, +) +from litellm.types.utils import ( + ModelResponse, + EmbeddingResponse, + ImageResponse, + TranscriptionResponse, + TextCompletionResponse, +) + + +class Logging: + global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app + custom_pricing: bool = False + stream_options = None + + def __init__( + self, + model, + messages, + stream, + call_type, + start_time, + litellm_call_id, + function_id, + dynamic_success_callbacks=None, + dynamic_failure_callbacks=None, + dynamic_async_success_callbacks=None, + langfuse_public_key=None, + langfuse_secret=None, + ): + if call_type not in [item.value for item in CallTypes]: + allowed_values = ", ".join([item.value for item in CallTypes]) + raise ValueError( + f"Invalid call_type {call_type}. Allowed values: {allowed_values}" + ) + if messages is not None: + if isinstance(messages, str): + messages = [ + {"role": "user", "content": messages} + ] # convert text completion input to the chat completion format + elif ( + isinstance(messages, list) + and len(messages) > 0 + and isinstance(messages[0], str) + ): + new_messages = [] + for m in messages: + new_messages.append({"role": "user", "content": m}) + messages = new_messages + self.model = model + self.messages = messages + self.stream = stream + self.start_time = start_time # log the call start time + self.call_type = call_type + self.litellm_call_id = litellm_call_id + self.function_id = function_id + self.streaming_chunks = [] # for generating complete stream response + self.sync_streaming_chunks = [] # for generating complete stream response + self.model_call_details = {} + self.dynamic_input_callbacks = [] # [TODO] callbacks set for just that call + self.dynamic_failure_callbacks = dynamic_failure_callbacks + self.dynamic_success_callbacks = ( + dynamic_success_callbacks # callbacks set for just that call + ) + self.dynamic_async_success_callbacks = ( + dynamic_async_success_callbacks # callbacks set for just that call + ) + ## DYNAMIC LANGFUSE KEYS ## + self.langfuse_public_key = langfuse_public_key + self.langfuse_secret = langfuse_secret + ## TIME TO FIRST TOKEN LOGGING ## + self.completion_start_time: Optional[datetime.datetime] = None + + def update_environment_variables( + self, model, user, optional_params, litellm_params, **additional_params + ): + self.optional_params = optional_params + self.model = model + self.user = user + self.litellm_params = litellm_params + self.logger_fn = litellm_params.get("logger_fn", None) + verbose_logger.debug(f"self.optional_params: {self.optional_params}") + + self.model_call_details = { + "model": self.model, + "messages": self.messages, + "optional_params": self.optional_params, + "litellm_params": self.litellm_params, + "start_time": self.start_time, + "stream": self.stream, + "user": user, + "call_type": str(self.call_type), + "litellm_call_id": self.litellm_call_id, + "completion_start_time": self.completion_start_time, + **self.optional_params, + **additional_params, + } + + ## check if stream options is set ## - used by CustomStreamWrapper for easy instrumentation + if "stream_options" in additional_params: + self.stream_options = additional_params["stream_options"] + ## check if custom pricing set ## + if ( + litellm_params.get("input_cost_per_token") is not None + or litellm_params.get("input_cost_per_second") is not None + or litellm_params.get("output_cost_per_token") is not None + or litellm_params.get("output_cost_per_second") is not None + ): + self.custom_pricing = True + + def _pre_call(self, input, api_key, model=None, additional_args={}): + """ + Common helper function across the sync + async pre-call function + """ + self.model_call_details["input"] = input + self.model_call_details["api_key"] = api_key + self.model_call_details["additional_args"] = additional_args + self.model_call_details["log_event_type"] = "pre_api_call" + if ( + model + ): # if model name was changes pre-call, overwrite the initial model call name with the new one + self.model_call_details["model"] = model + + def pre_call(self, input, api_key, model=None, additional_args={}): + # Log the exact input to the LLM API + litellm.error_logs["PRE_CALL"] = locals() + try: + self._pre_call( + input=input, + api_key=api_key, + model=model, + additional_args=additional_args, + ) + + # User Logging -> if you pass in a custom logging function + headers = additional_args.get("headers", {}) + if headers is None: + headers = {} + data = additional_args.get("complete_input_dict", {}) + api_base = additional_args.get("api_base", "") + self.model_call_details["litellm_params"]["api_base"] = str( + api_base + ) # used for alerting + masked_headers = { + k: ( + (v[:-44] + "*" * 44) + if (isinstance(v, str) and len(v) > 44) + else "*****" + ) + for k, v in headers.items() + } + formatted_headers = " ".join( + [f"-H '{k}: {v}'" for k, v in masked_headers.items()] + ) + + verbose_logger.debug(f"PRE-API-CALL ADDITIONAL ARGS: {additional_args}") + + curl_command = "\n\nPOST Request Sent from LiteLLM:\n" + curl_command += "curl -X POST \\\n" + curl_command += f"{api_base} \\\n" + curl_command += ( + f"{formatted_headers} \\\n" if formatted_headers.strip() != "" else "" + ) + curl_command += f"-d '{str(data)}'\n" + if additional_args.get("request_str", None) is not None: + # print the sagemaker / bedrock client request + curl_command = "\nRequest Sent from LiteLLM:\n" + curl_command += additional_args.get("request_str", None) + elif api_base == "": + curl_command = self.model_call_details + + if json_logs: + verbose_logger.debug( + "POST Request Sent from LiteLLM", + extra={"api_base": {api_base}, **masked_headers}, + ) + else: + verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n") + # log raw request to provider (like LangFuse) -- if opted in. + if log_raw_request_response is True: + try: + # [Non-blocking Extra Debug Information in metadata] + _litellm_params = self.model_call_details.get("litellm_params", {}) + _metadata = _litellm_params.get("metadata", {}) or {} + if ( + turn_off_message_logging is not None + and turn_off_message_logging is True + ): + _metadata["raw_request"] = ( + "redacted by litellm. \ + 'litellm.turn_off_message_logging=True'" + ) + else: + _metadata["raw_request"] = str(curl_command) + except Exception as e: + _metadata["raw_request"] = ( + "Unable to Log \ + raw request: {}".format( + str(e) + ) + ) + if self.logger_fn and callable(self.logger_fn): + try: + self.logger_fn( + self.model_call_details + ) # Expectation: any logger function passed in by the user should accept a dict object + except Exception as e: + verbose_logger.error( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {}\n{}".format( + str(e), traceback.format_exc() + ) + ) + # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made + callbacks = litellm.input_callback + self.dynamic_input_callbacks + for callback in callbacks: + try: + if callback == "supabase": + verbose_logger.debug("reaches supabase for logging!") + model = self.model_call_details["model"] + messages = self.model_call_details["input"] + verbose_logger.debug(f"supabaseClient: {supabaseClient}") + supabaseClient.input_log_event( + model=model, + messages=messages, + end_user=self.model_call_details.get("user", "default"), + litellm_call_id=self.litellm_params["litellm_call_id"], + print_verbose=print_verbose, + ) + elif callback == "sentry" and add_breadcrumb: + try: + details_to_log = copy.deepcopy(self.model_call_details) + except: + details_to_log = self.model_call_details + if litellm.turn_off_message_logging: + # make a copy of the _model_Call_details and log it + details_to_log.pop("messages", None) + details_to_log.pop("input", None) + details_to_log.pop("prompt", None) + + add_breadcrumb( + category="litellm.llm_call", + message=f"Model Call Details pre-call: {details_to_log}", + level="info", + ) + elif isinstance(callback, CustomLogger): # custom logger class + callback.log_pre_api_call( + model=self.model, + messages=self.messages, + kwargs=self.model_call_details, + ) + elif callable(callback): # custom logger functions + customLogger.log_input_event( + model=self.model, + messages=self.messages, + kwargs=self.model_call_details, + print_verbose=print_verbose, + callback_func=callback, + ) + except Exception as e: + verbose_logger.error( + "litellm.Logging.pre_call(): Exception occured - {}\n{}".format( + str(e), traceback.format_exc() + ) + ) + verbose_logger.debug( + f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" + ) + if capture_exception: # log this error to sentry for debugging + capture_exception(e) + except Exception: + verbose_logger.error( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {}\n{}".format( + str(e), traceback.format_exc() + ) + ) + verbose_logger.error( + f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" + ) + if capture_exception: # log this error to sentry for debugging + capture_exception(e) + + def post_call( + self, original_response, input=None, api_key=None, additional_args={} + ): + # Log the exact result from the LLM API, for streaming - log the type of response received + litellm.error_logs["POST_CALL"] = locals() + if isinstance(original_response, dict): + original_response = json.dumps(original_response) + try: + self.model_call_details["input"] = input + self.model_call_details["api_key"] = api_key + self.model_call_details["original_response"] = original_response + self.model_call_details["additional_args"] = additional_args + self.model_call_details["log_event_type"] = "post_api_call" + + verbose_logger.debug( + "RAW RESPONSE:\n{}\n\n".format( + self.model_call_details.get( + "original_response", self.model_call_details + ) + ), + ) + if self.logger_fn and callable(self.logger_fn): + try: + self.logger_fn( + self.model_call_details + ) # Expectation: any logger function passed in by the user should accept a dict object + except Exception as e: + verbose_logger.debug( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {}\n{}".format( + str(e), traceback.format_exc() + ) + ) + original_response = redact_message_input_output_from_logging( + litellm_logging_obj=self, result=original_response + ) + # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made + + callbacks = litellm.input_callback + self.dynamic_input_callbacks + for callback in callbacks: + try: + if callback == "sentry" and add_breadcrumb: + verbose_logger.debug("reaches sentry breadcrumbing") + try: + details_to_log = copy.deepcopy(self.model_call_details) + except: + details_to_log = self.model_call_details + if litellm.turn_off_message_logging: + # make a copy of the _model_Call_details and log it + details_to_log.pop("messages", None) + details_to_log.pop("input", None) + details_to_log.pop("prompt", None) + + add_breadcrumb( + category="litellm.llm_call", + message=f"Model Call Details post-call: {details_to_log}", + level="info", + ) + elif isinstance(callback, CustomLogger): # custom logger class + callback.log_post_api_call( + kwargs=self.model_call_details, + response_obj=None, + start_time=self.start_time, + end_time=None, + ) + except Exception as e: + verbose_logger.error( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while post-call logging with integrations {}\n{}".format( + str(e), traceback.format_exc() + ) + ) + verbose_logger.debug( + f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" + ) + if capture_exception: # log this error to sentry for debugging + capture_exception(e) + except Exception as e: + verbose_logger.error( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {}\n{}".format( + str(e), traceback.format_exc() + ) + ) + + def _success_handler_helper_fn( + self, result=None, start_time=None, end_time=None, cache_hit=None + ): + try: + if start_time is None: + start_time = self.start_time + if end_time is None: + end_time = datetime.datetime.now() + if self.completion_start_time is None: + self.completion_start_time = end_time + self.model_call_details["completion_start_time"] = ( + self.completion_start_time + ) + self.model_call_details["log_event_type"] = "successful_api_call" + self.model_call_details["end_time"] = end_time + self.model_call_details["cache_hit"] = cache_hit + ## if model in model cost map - log the response cost + ## else set cost to None + verbose_logger.debug(f"Model={self.model};") + if ( + result is not None + and ( + isinstance(result, ModelResponse) + or isinstance(result, EmbeddingResponse) + or isinstance(result, ImageResponse) + or isinstance(result, TranscriptionResponse) + or isinstance(result, TextCompletionResponse) + ) + and self.stream != True + ): # handle streaming separately + self.model_call_details["response_cost"] = ( + litellm.response_cost_calculator( + response_object=result, + model=self.model, + cache_hit=self.model_call_details.get("cache_hit", False), + custom_llm_provider=self.model_call_details.get( + "custom_llm_provider", None + ), + base_model=_get_base_model_from_metadata( + model_call_details=self.model_call_details + ), + call_type=self.call_type, + optional_params=self.optional_params, + ) + ) + else: # streaming chunks + image gen. + self.model_call_details["response_cost"] = None + + if ( + litellm.max_budget + and self.stream == False + and result is not None + and "content" in result + ): + time_diff = (end_time - start_time).total_seconds() + float_diff = float(time_diff) + litellm._current_cost += litellm.completion_cost( + model=self.model, + prompt="", + completion=result["content"], + total_time=float_diff, + ) + + return start_time, end_time, result + except Exception as e: + raise Exception(f"[Non-Blocking] LiteLLM.Success_Call Error: {str(e)}") + + def success_handler( + self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs + ): + verbose_logger.debug( + f"Logging Details LiteLLM-Success Call: Cache_hit={cache_hit}" + ) + start_time, end_time, result = self._success_handler_helper_fn( + start_time=start_time, + end_time=end_time, + result=result, + cache_hit=cache_hit, + ) + # print(f"original response in success handler: {self.model_call_details['original_response']}") + try: + verbose_logger.debug(f"success callbacks: {litellm.success_callback}") + ## BUILD COMPLETE STREAMED RESPONSE + complete_streaming_response = None + if self.stream and isinstance(result, ModelResponse): + if ( + result.choices[0].finish_reason is not None + ): # if it's the last chunk + self.sync_streaming_chunks.append(result) + # print_verbose(f"final set of received chunks: {self.sync_streaming_chunks}") + try: + complete_streaming_response = litellm.stream_chunk_builder( + self.sync_streaming_chunks, + messages=self.model_call_details.get("messages", None), + start_time=start_time, + end_time=end_time, + ) + except Exception as e: + verbose_logger.error( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format( + str(e), traceback.format_exc() + ), + log_level="ERROR", + ) + complete_streaming_response = None + else: + self.sync_streaming_chunks.append(result) + + if complete_streaming_response is not None: + verbose_logger.debug( + f"Logging Details LiteLLM-Success Call streaming complete" + ) + self.model_call_details["complete_streaming_response"] = ( + complete_streaming_response + ) + self.model_call_details["response_cost"] = ( + litellm.response_cost_calculator( + response_object=complete_streaming_response, + model=self.model, + cache_hit=self.model_call_details.get("cache_hit", False), + custom_llm_provider=self.model_call_details.get( + "custom_llm_provider", None + ), + base_model=_get_base_model_from_metadata( + model_call_details=self.model_call_details + ), + call_type=self.call_type, + optional_params=self.optional_params, + ) + ) + if self.dynamic_success_callbacks is not None and isinstance( + self.dynamic_success_callbacks, list + ): + callbacks = self.dynamic_success_callbacks + ## keep the internal functions ## + for callback in litellm.success_callback: + if ( + isinstance(callback, CustomLogger) + and "_PROXY_" in callback.__class__.__name__ + ): + callbacks.append(callback) + else: + callbacks = litellm.success_callback + + result = redact_message_input_output_from_logging( + result=result, litellm_logging_obj=self + ) + + for callback in callbacks: + try: + litellm_params = self.model_call_details.get("litellm_params", {}) + if litellm_params.get("no-log", False) == True: + # proxy cost tracking cal backs should run + if not ( + isinstance(callback, CustomLogger) + and "_PROXY_" in callback.__class__.__name__ + ): + print_verbose("no-log request, skipping logging") + continue + if callback == "lite_debugger": + print_verbose("reaches lite_debugger for logging!") + print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") + print_verbose( + f"liteDebuggerClient details function {self.call_type} and stream set to {self.stream}" + ) + liteDebuggerClient.log_event( + end_user=kwargs.get("user", "default"), + response_obj=result, + start_time=start_time, + end_time=end_time, + litellm_call_id=self.litellm_call_id, + print_verbose=print_verbose, + call_type=self.call_type, + stream=self.stream, + ) + if callback == "promptlayer": + print_verbose("reaches promptlayer for logging!") + promptLayerLogger.log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + if callback == "supabase": + print_verbose("reaches supabase for logging!") + kwargs = self.model_call_details + + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + if "complete_streaming_response" not in kwargs: + continue + else: + print_verbose("reaches supabase for streaming logging!") + result = kwargs["complete_streaming_response"] + + model = kwargs["model"] + messages = kwargs["messages"] + optional_params = kwargs.get("optional_params", {}) + litellm_params = kwargs.get("litellm_params", {}) + supabaseClient.log_event( + model=model, + messages=messages, + end_user=optional_params.get("user", "default"), + response_obj=result, + start_time=start_time, + end_time=end_time, + litellm_call_id=litellm_params.get( + "litellm_call_id", str(uuid.uuid4()) + ), + print_verbose=print_verbose, + ) + if callback == "wandb": + print_verbose("reaches wandb for logging!") + weightsBiasesLogger.log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + if callback == "langsmith": + print_verbose("reaches langsmith for logging!") + if self.stream: + if "complete_streaming_response" not in kwargs: + continue + else: + print_verbose( + "reaches langsmith for streaming logging!" + ) + result = kwargs["complete_streaming_response"] + langsmithLogger.log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + if callback == "logfire": + global logfireLogger + verbose_logger.debug("reaches logfire for success logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + if "complete_streaming_response" not in kwargs: + continue + else: + print_verbose("reaches logfire for streaming logging!") + result = kwargs["complete_streaming_response"] + + logfireLogger.log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + level=LogfireLevel.INFO.value, + ) + + if callback == "lunary": + print_verbose("reaches lunary for logging!") + model = self.model + kwargs = self.model_call_details + + input = kwargs.get("messages", kwargs.get("input", None)) + + type = ( + "embed" + if self.call_type == CallTypes.embedding.value + else "llm" + ) + + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + if "complete_streaming_response" not in kwargs: + continue + else: + result = kwargs["complete_streaming_response"] + + lunaryLogger.log_event( + type=type, + kwargs=kwargs, + event="end", + model=model, + input=input, + user_id=kwargs.get("user", None), + # user_props=self.model_call_details.get("user_props", None), + extra=kwargs.get("optional_params", {}), + response_obj=result, + start_time=start_time, + end_time=end_time, + run_id=self.litellm_call_id, + print_verbose=print_verbose, + ) + if callback == "helicone": + print_verbose("reaches helicone for logging!") + model = self.model + messages = self.model_call_details["input"] + heliconeLogger.log_success( + model=model, + messages=messages, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + if callback == "langfuse": + global langFuseLogger + verbose_logger.debug("reaches langfuse for success logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + verbose_logger.debug( + f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" + ) + if complete_streaming_response is None: + continue + else: + print_verbose("reaches langfuse for streaming logging!") + result = kwargs["complete_streaming_response"] + if langFuseLogger is None or ( + ( + self.langfuse_public_key is not None + and self.langfuse_public_key + != langFuseLogger.public_key + ) + and ( + self.langfuse_public_key is not None + and self.langfuse_public_key + != langFuseLogger.public_key + ) + ): + langFuseLogger = LangFuseLogger( + langfuse_public_key=self.langfuse_public_key, + langfuse_secret=self.langfuse_secret, + ) + langFuseLogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + ) + if callback == "datadog": + global dataDogLogger + verbose_logger.debug("reaches datadog for success logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + verbose_logger.debug( + f"datadog: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" + ) + if complete_streaming_response is None: + continue + else: + print_verbose("reaches datadog for streaming logging!") + result = kwargs["complete_streaming_response"] + dataDogLogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + ) + if callback == "prometheus": + verbose_logger.debug("reaches prometheus for success logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + verbose_logger.debug( + f"prometheus: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" + ) + if complete_streaming_response is None: + continue + else: + print_verbose( + "reaches prometheus for streaming logging!" + ) + result = kwargs["complete_streaming_response"] + prometheusLogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + ) + if callback == "generic": + global genericAPILogger + verbose_logger.debug("reaches langfuse for success logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + verbose_logger.debug( + f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" + ) + if complete_streaming_response is None: + continue + else: + print_verbose("reaches langfuse for streaming logging!") + result = kwargs["complete_streaming_response"] + if genericAPILogger is None: + genericAPILogger = GenericAPILogger() + genericAPILogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + ) + if callback == "clickhouse": + global clickHouseLogger + verbose_logger.debug("reaches clickhouse for success logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + verbose_logger.debug( + f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" + ) + if complete_streaming_response is None: + continue + else: + print_verbose( + "reaches clickhouse for streaming logging!" + ) + result = kwargs["complete_streaming_response"] + if clickHouseLogger is None: + clickHouseLogger = ClickhouseLogger() + clickHouseLogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + ) + if callback == "greenscale": + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if self.stream: + verbose_logger.debug( + f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" + ) + if complete_streaming_response is None: + continue + else: + print_verbose( + "reaches greenscale for streaming logging!" + ) + result = kwargs["complete_streaming_response"] + + greenscaleLogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + if callback == "cache" and litellm.cache is not None: + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + print_verbose("success_callback: reaches cache for logging!") + kwargs = self.model_call_details + if self.stream: + if "complete_streaming_response" not in kwargs: + print_verbose( + f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n" + ) + pass + else: + print_verbose( + "success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache" + ) + result = kwargs["complete_streaming_response"] + # only add to cache once we have a complete streaming response + litellm.cache.add_cache(result, **kwargs) + if callback == "athina": + deep_copy = {} + for k, v in self.model_call_details.items(): + deep_copy[k] = v + athinaLogger.log_event( + kwargs=deep_copy, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + if callback == "traceloop": + deep_copy = {} + for k, v in self.model_call_details.items(): + if k != "original_response": + deep_copy[k] = v + traceloopLogger.log_event( + kwargs=deep_copy, + response_obj=result, + start_time=start_time, + end_time=end_time, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + ) + if callback == "s3": + global s3Logger + if s3Logger is None: + s3Logger = S3Logger() + if self.stream: + if "complete_streaming_response" in self.model_call_details: + print_verbose( + "S3Logger Logger: Got Stream Event - Completed Stream Response" + ) + s3Logger.log_event( + kwargs=self.model_call_details, + response_obj=self.model_call_details[ + "complete_streaming_response" + ], + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + else: + print_verbose( + "S3Logger Logger: Got Stream Event - No complete stream response as yet" + ) + else: + s3Logger.log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + if ( + callback == "openmeter" + and self.model_call_details.get("litellm_params", {}).get( + "acompletion", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "aembedding", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "aimage_generation", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "atranscription", False + ) + == False + ): + global openMeterLogger + if openMeterLogger is None: + print_verbose("Instantiates openmeter client") + openMeterLogger = OpenMeterLogger() + if self.stream and complete_streaming_response is None: + openMeterLogger.log_stream_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) + else: + if self.stream and complete_streaming_response: + self.model_call_details["complete_response"] = ( + self.model_call_details.get( + "complete_streaming_response", {} + ) + ) + result = self.model_call_details["complete_response"] + openMeterLogger.log_success_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) + + if ( + isinstance(callback, CustomLogger) + and self.model_call_details.get("litellm_params", {}).get( + "acompletion", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "aembedding", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "aimage_generation", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "atranscription", False + ) + == False + ): # custom logger class + if self.stream and complete_streaming_response is None: + callback.log_stream_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) + else: + if self.stream and complete_streaming_response: + self.model_call_details["complete_response"] = ( + self.model_call_details.get( + "complete_streaming_response", {} + ) + ) + result = self.model_call_details["complete_response"] + callback.log_success_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) + if ( + callable(callback) == True + and self.model_call_details.get("litellm_params", {}).get( + "acompletion", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "aembedding", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "aimage_generation", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "atranscription", False + ) + == False + ): # custom logger functions + print_verbose( + f"success callbacks: Running Custom Callback Function" + ) + customLogger.log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + callback_func=callback, + ) + + except Exception as e: + print_verbose( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging with integrations {traceback.format_exc()}" + ) + print_verbose( + f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" + ) + if capture_exception: # log this error to sentry for debugging + capture_exception(e) + except: + verbose_logger.error( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {}\n{}".format( + str(e), traceback.format_exc() + ), + ) + + async def async_success_handler( + self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs + ): + """ + Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions. + """ + print_verbose("Logging Details LiteLLM-Async Success Call") + start_time, end_time, result = self._success_handler_helper_fn( + start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit + ) + ## BUILD COMPLETE STREAMED RESPONSE + complete_streaming_response = None + if self.stream: + if result.choices[0].finish_reason is not None: # if it's the last chunk + self.streaming_chunks.append(result) + # verbose_logger.debug(f"final set of received chunks: {self.streaming_chunks}") + try: + complete_streaming_response = litellm.stream_chunk_builder( + self.streaming_chunks, + messages=self.model_call_details.get("messages", None), + start_time=start_time, + end_time=end_time, + ) + except Exception as e: + print_verbose( + "Error occurred building stream chunk in success logging: {}\n{}".format( + str(e), traceback.format_exc() + ), + log_level="ERROR", + ) + complete_streaming_response = None + else: + self.streaming_chunks.append(result) + if complete_streaming_response is not None: + print_verbose("Async success callbacks: Got a complete streaming response") + self.model_call_details["async_complete_streaming_response"] = ( + complete_streaming_response + ) + try: + if self.model_call_details.get("cache_hit", False) is True: + self.model_call_details["response_cost"] = 0.0 + else: + # check if base_model set on azure + base_model = _get_base_model_from_metadata( + model_call_details=self.model_call_details + ) + # base_model defaults to None if not set on model_info + self.model_call_details["response_cost"] = litellm.completion_cost( + completion_response=complete_streaming_response, + model=base_model, + ) + verbose_logger.debug( + f"Model={self.model}; cost={self.model_call_details['response_cost']}" + ) + except litellm.NotFoundError as e: + verbose_logger.error( + f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None" + ) + self.model_call_details["response_cost"] = None + + if self.dynamic_async_success_callbacks is not None and isinstance( + self.dynamic_async_success_callbacks, list + ): + callbacks = self.dynamic_async_success_callbacks + ## keep the internal functions ## + for callback in litellm._async_success_callback: + callback_name = "" + if isinstance(callback, CustomLogger): + callback_name = callback.__class__.__name__ + if callable(callback): + callback_name = callback.__name__ + if "_PROXY_" in callback_name: + callbacks.append(callback) + else: + callbacks = litellm._async_success_callback + + result = redact_message_input_output_from_logging( + result=result, litellm_logging_obj=self + ) + + for callback in callbacks: + # check if callback can run for this request + litellm_params = self.model_call_details.get("litellm_params", {}) + if litellm_params.get("no-log", False) == True: + # proxy cost tracking cal backs should run + if not ( + isinstance(callback, CustomLogger) + and "_PROXY_" in callback.__class__.__name__ + ): + print_verbose("no-log request, skipping logging") + continue + try: + if kwargs.get("no-log", False) == True: + print_verbose("no-log request, skipping logging") + continue + if callback == "cache" and litellm.cache is not None: + # set_cache once complete streaming response is built + print_verbose("async success_callback: reaches cache for logging!") + kwargs = self.model_call_details + if self.stream: + if "async_complete_streaming_response" not in kwargs: + print_verbose( + f"async success_callback: reaches cache for logging, there is no async_complete_streaming_response. Kwargs={kwargs}\n\n" + ) + pass + else: + print_verbose( + "async success_callback: reaches cache for logging, there is a async_complete_streaming_response. Adding to cache" + ) + result = kwargs["async_complete_streaming_response"] + # only add to cache once we have a complete streaming response + if litellm.cache is not None and not isinstance( + litellm.cache.cache, S3Cache + ): + await litellm.cache.async_add_cache(result, **kwargs) + else: + litellm.cache.add_cache(result, **kwargs) + if callback == "openmeter": + global openMeterLogger + if self.stream == True: + if ( + "async_complete_streaming_response" + in self.model_call_details + ): + await openMeterLogger.async_log_success_event( + kwargs=self.model_call_details, + response_obj=self.model_call_details[ + "async_complete_streaming_response" + ], + start_time=start_time, + end_time=end_time, + ) + else: + await openMeterLogger.async_log_stream_event( # [TODO]: move this to being an async log stream event function + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) + else: + await openMeterLogger.async_log_success_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) + if isinstance(callback, CustomLogger): # custom logger class + if self.stream == True: + if ( + "async_complete_streaming_response" + in self.model_call_details + ): + await callback.async_log_success_event( + kwargs=self.model_call_details, + response_obj=self.model_call_details[ + "async_complete_streaming_response" + ], + start_time=start_time, + end_time=end_time, + ) + else: + await callback.async_log_stream_event( # [TODO]: move this to being an async log stream event function + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) + else: + await callback.async_log_success_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) + if callable(callback): # custom logger functions + if self.stream: + if ( + "async_complete_streaming_response" + in self.model_call_details + ): + await customLogger.async_log_event( + kwargs=self.model_call_details, + response_obj=self.model_call_details[ + "async_complete_streaming_response" + ], + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + callback_func=callback, + ) + else: + await customLogger.async_log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + callback_func=callback, + ) + if callback == "dynamodb": + global dynamoLogger + if dynamoLogger is None: + dynamoLogger = DyanmoDBLogger() + if self.stream: + if ( + "async_complete_streaming_response" + in self.model_call_details + ): + print_verbose( + "DynamoDB Logger: Got Stream Event - Completed Stream Response" + ) + await dynamoLogger._async_log_event( + kwargs=self.model_call_details, + response_obj=self.model_call_details[ + "async_complete_streaming_response" + ], + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + else: + print_verbose( + "DynamoDB Logger: Got Stream Event - No complete stream response as yet" + ) + else: + await dynamoLogger._async_log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + except Exception as e: + verbose_logger.error( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}" + ) + pass + + def _failure_handler_helper_fn( + self, exception, traceback_exception, start_time=None, end_time=None + ): + if start_time is None: + start_time = self.start_time + if end_time is None: + end_time = datetime.datetime.now() + + # on some exceptions, model_call_details is not always initialized, this ensures that we still log those exceptions + if not hasattr(self, "model_call_details"): + self.model_call_details = {} + + self.model_call_details["log_event_type"] = "failed_api_call" + self.model_call_details["exception"] = exception + self.model_call_details["traceback_exception"] = traceback_exception + self.model_call_details["end_time"] = end_time + self.model_call_details.setdefault("original_response", None) + return start_time, end_time + + def failure_handler( + self, exception, traceback_exception, start_time=None, end_time=None + ): + verbose_logger.debug( + f"Logging Details LiteLLM-Failure Call: {litellm.failure_callback}" + ) + try: + start_time, end_time = self._failure_handler_helper_fn( + exception=exception, + traceback_exception=traceback_exception, + start_time=start_time, + end_time=end_time, + ) + callbacks = [] # init this to empty incase it's not created + + if self.dynamic_failure_callbacks is not None and isinstance( + self.dynamic_failure_callbacks, list + ): + callbacks = self.dynamic_failure_callbacks + ## keep the internal functions ## + for callback in litellm.failure_callback: + if ( + isinstance(callback, CustomLogger) + and "_PROXY_" in callback.__class__.__name__ + ): + callbacks.append(callback) + else: + callbacks = litellm.failure_callback + + result = None # result sent to all loggers, init this to None incase it's not created + + result = redact_message_input_output_from_logging( + result=result, litellm_logging_obj=self + ) + for callback in callbacks: + try: + if callback == "lite_debugger": + print_verbose("reaches lite_debugger for logging!") + print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") + result = { + "model": self.model, + "created": time.time(), + "error": traceback_exception, + "usage": { + "prompt_tokens": prompt_token_calculator( + self.model, messages=self.messages + ), + "completion_tokens": 0, + }, + } + liteDebuggerClient.log_event( + model=self.model, + messages=self.messages, + end_user=self.model_call_details.get("user", "default"), + response_obj=result, + start_time=start_time, + end_time=end_time, + litellm_call_id=self.litellm_call_id, + print_verbose=print_verbose, + call_type=self.call_type, + stream=self.stream, + ) + if callback == "lunary": + print_verbose("reaches lunary for logging error!") + + model = self.model + + input = self.model_call_details["input"] + + _type = ( + "embed" + if self.call_type == CallTypes.embedding.value + else "llm" + ) + + lunaryLogger.log_event( + type=_type, + event="error", + user_id=self.model_call_details.get("user", "default"), + model=model, + input=input, + error=traceback_exception, + run_id=self.litellm_call_id, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) + if callback == "sentry": + print_verbose("sending exception to sentry") + if capture_exception: + capture_exception(exception) + else: + print_verbose( + f"capture exception not initialized: {capture_exception}" + ) + elif callback == "supabase": + print_verbose("reaches supabase for logging!") + print_verbose(f"supabaseClient: {supabaseClient}") + result = { + "model": model, + "created": time.time(), + "error": traceback_exception, + "usage": { + "prompt_tokens": prompt_token_calculator( + model, messages=self.messages + ), + "completion_tokens": 0, + }, + } + supabaseClient.log_event( + model=self.model, + messages=self.messages, + end_user=self.model_call_details.get("user", "default"), + response_obj=result, + start_time=start_time, + end_time=end_time, + litellm_call_id=self.model_call_details["litellm_call_id"], + print_verbose=print_verbose, + ) + if callable(callback): # custom logger functions + customLogger.log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + callback_func=callback, + ) + if ( + isinstance(callback, CustomLogger) + and self.model_call_details.get("litellm_params", {}).get( + "acompletion", False + ) + == False + and self.model_call_details.get("litellm_params", {}).get( + "aembedding", False + ) + == False + ): # custom logger class + callback.log_failure_event( + start_time=start_time, + end_time=end_time, + response_obj=result, + kwargs=self.model_call_details, + ) + if callback == "langfuse": + global langFuseLogger + verbose_logger.debug("reaches langfuse for logging failure") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if langFuseLogger is None or ( + ( + self.langfuse_public_key is not None + and self.langfuse_public_key + != langFuseLogger.public_key + ) + and ( + self.langfuse_public_key is not None + and self.langfuse_public_key + != langFuseLogger.public_key + ) + ): + langFuseLogger = LangFuseLogger( + langfuse_public_key=self.langfuse_public_key, + langfuse_secret=self.langfuse_secret, + ) + langFuseLogger.log_event( + start_time=start_time, + end_time=end_time, + response_obj=None, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + status_message=str(exception), + level="ERROR", + kwargs=self.model_call_details, + ) + if callback == "traceloop": + traceloopLogger.log_event( + start_time=start_time, + end_time=end_time, + response_obj=None, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + status_message=str(exception), + level="ERROR", + kwargs=self.model_call_details, + ) + if callback == "prometheus": + global prometheusLogger + verbose_logger.debug("reaches prometheus for success logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + kwargs["exception"] = str(exception) + prometheusLogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + ) + + if callback == "logfire": + verbose_logger.debug("reaches logfire for failure logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + kwargs["exception"] = exception + + logfireLogger.log_event( + kwargs=kwargs, + response_obj=result, + start_time=start_time, + end_time=end_time, + level=LogfireLevel.ERROR.value, + print_verbose=print_verbose, + ) + except Exception as e: + print_verbose( + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}" + ) + print_verbose( + f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" + ) + if capture_exception: # log this error to sentry for debugging + capture_exception(e) + except Exception as e: + verbose_logger.error( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging {}\n{}".format( + str(e), traceback.format_exc() + ) + ) + + async def async_failure_handler( + self, exception, traceback_exception, start_time=None, end_time=None + ): + """ + Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions. + """ + start_time, end_time = self._failure_handler_helper_fn( + exception=exception, + traceback_exception=traceback_exception, + start_time=start_time, + end_time=end_time, + ) + result = None # result sent to all loggers, init this to None incase it's not created + for callback in litellm._async_failure_callback: + try: + if isinstance(callback, CustomLogger): # custom logger class + await callback.async_log_failure_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + ) # type: ignore + if callable(callback): # custom logger functions + await customLogger.async_log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + callback_func=callback, + ) + except Exception as e: + verbose_logger.error( + "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success \ + logging {}\n{}\nCallback={}".format( + str(e), traceback.format_exc(), callback + ) + ) + + +# # class Logging: +# global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger + +# custom_pricing: bool = False +# stream_options = None + +# def __init__( +# self, +# model, +# messages, +# stream, +# call_type, +# start_time, +# litellm_call_id, +# function_id, +# dynamic_success_callbacks=None, +# dynamic_failure_callbacks=None, +# dynamic_async_success_callbacks=None, +# langfuse_public_key=None, +# langfuse_secret=None, +# ): +# if call_type not in [item.value for item in CallTypes]: +# allowed_values = ", ".join([item.value for item in CallTypes]) +# raise ValueError( +# f"Invalid call_type {call_type}. Allowed values: {allowed_values}" +# ) +# if messages is not None: +# if isinstance(messages, str): +# messages = [ +# {"role": "user", "content": messages} +# ] # convert text completion input to the chat completion format +# elif ( +# isinstance(messages, list) +# and len(messages) > 0 +# and isinstance(messages[0], str) +# ): +# new_messages = [] +# for m in messages: +# new_messages.append({"role": "user", "content": m}) +# messages = new_messages +# self.model = model +# self.messages = messages +# self.stream = stream +# self.start_time = start_time # log the call start time +# self.call_type = call_type +# self.litellm_call_id = litellm_call_id +# self.function_id = function_id +# self.streaming_chunks = [] # for generating complete stream response +# self.sync_streaming_chunks = [] # for generating complete stream response +# self.model_call_details = {} +# self.dynamic_input_callbacks = [] # [TODO] callbacks set for just that call +# self.dynamic_failure_callbacks = dynamic_failure_callbacks +# self.dynamic_success_callbacks = ( +# dynamic_success_callbacks # callbacks set for just that call +# ) +# self.dynamic_async_success_callbacks = ( +# dynamic_async_success_callbacks # callbacks set for just that call +# ) +# ## DYNAMIC LANGFUSE KEYS ## +# self.langfuse_public_key = langfuse_public_key +# self.langfuse_secret = langfuse_secret +# ## TIME TO FIRST TOKEN LOGGING ## +# self.completion_start_time: Optional[datetime.datetime] = None + +# def update_environment_variables( +# self, model, user, optional_params, litellm_params, **additional_params +# ): +# self.optional_params = optional_params +# self.model = model +# self.user = user +# self.litellm_params = litellm_params +# self.logger_fn = litellm_params.get("logger_fn", None) +# print_verbose(f"self.optional_params: {self.optional_params}") + +# self.model_call_details = { +# "model": self.model, +# "messages": self.messages, +# "optional_params": self.optional_params, +# "litellm_params": self.litellm_params, +# "start_time": self.start_time, +# "stream": self.stream, +# "user": user, +# "call_type": str(self.call_type), +# "litellm_call_id": self.litellm_call_id, +# "completion_start_time": self.completion_start_time, +# **self.optional_params, +# **additional_params, +# } + +# ## check if stream options is set ## - used by CustomStreamWrapper for easy instrumentation +# if "stream_options" in additional_params: +# self.stream_options = additional_params["stream_options"] +# ## check if custom pricing set ## +# if ( +# litellm_params.get("input_cost_per_token") is not None +# or litellm_params.get("input_cost_per_second") is not None +# or litellm_params.get("output_cost_per_token") is not None +# or litellm_params.get("output_cost_per_second") is not None +# ): +# self.custom_pricing = True + +# def _pre_call(self, input, api_key, model=None, additional_args={}): +# """ +# Common helper function across the sync + async pre-call function +# """ +# # print_verbose(f"logging pre call for model: {self.model} with call type: {self.call_type}") +# self.model_call_details["input"] = input +# self.model_call_details["api_key"] = api_key +# self.model_call_details["additional_args"] = additional_args +# self.model_call_details["log_event_type"] = "pre_api_call" +# if ( +# model +# ): # if model name was changes pre-call, overwrite the initial model call name with the new one +# self.model_call_details["model"] = model + +# def pre_call(self, input, api_key, model=None, additional_args={}): +# # Log the exact input to the LLM API +# litellm.error_logs["PRE_CALL"] = locals() +# try: +# self._pre_call( +# input=input, +# api_key=api_key, +# model=model, +# additional_args=additional_args, +# ) + +# # User Logging -> if you pass in a custom logging function +# headers = additional_args.get("headers", {}) +# if headers is None: +# headers = {} +# data = additional_args.get("complete_input_dict", {}) +# api_base = additional_args.get("api_base", "") +# self.model_call_details["litellm_params"]["api_base"] = str( +# api_base +# ) # used for alerting +# masked_headers = { +# k: ( +# (v[:-44] + "*" * 44) +# if (isinstance(v, str) and len(v) > 44) +# else "*****" +# ) +# for k, v in headers.items() +# } +# formatted_headers = " ".join( +# [f"-H '{k}: {v}'" for k, v in masked_headers.items()] +# ) + +# verbose_logger.debug(f"PRE-API-CALL ADDITIONAL ARGS: {additional_args}") + +# curl_command = "\n\nPOST Request Sent from LiteLLM:\n" +# curl_command += "curl -X POST \\\n" +# curl_command += f"{api_base} \\\n" +# curl_command += ( +# f"{formatted_headers} \\\n" if formatted_headers.strip() != "" else "" +# ) +# curl_command += f"-d '{str(data)}'\n" +# if additional_args.get("request_str", None) is not None: +# # print the sagemaker / bedrock client request +# curl_command = "\nRequest Sent from LiteLLM:\n" +# curl_command += additional_args.get("request_str", None) +# elif api_base == "": +# curl_command = self.model_call_details + +# # only print verbose if verbose logger is not set +# if verbose_logger.level == 0: +# # this means verbose logger was not switched on - user is in litellm.set_verbose=True +# print_verbose(f"\033[92m{curl_command}\033[0m\n") + +# if litellm.json_logs: +# verbose_logger.debug( +# "POST Request Sent from LiteLLM", +# extra={"api_base": {api_base}, **masked_headers}, +# ) +# else: +# verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n") +# # log raw request to provider (like LangFuse) -- if opted in. +# if litellm.log_raw_request_response is True: +# try: +# # [Non-blocking Extra Debug Information in metadata] +# _litellm_params = self.model_call_details.get("litellm_params", {}) +# _metadata = _litellm_params.get("metadata", {}) or {} +# if ( +# litellm.turn_off_message_logging is not None +# and litellm.turn_off_message_logging is True +# ): +# _metadata["raw_request"] = ( +# "redacted by litellm. \ +# 'litellm.turn_off_message_logging=True'" +# ) +# else: +# _metadata["raw_request"] = str(curl_command) +# except Exception as e: +# _metadata["raw_request"] = ( +# "Unable to Log \ +# raw request: {}".format( +# str(e) +# ) +# ) +# if self.logger_fn and callable(self.logger_fn): +# try: +# self.logger_fn( +# self.model_call_details +# ) # Expectation: any logger function passed in by the user should accept a dict object +# except Exception as e: +# print_verbose( +# f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" +# ) +# # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made +# callbacks = litellm.input_callback + self.dynamic_input_callbacks +# for callback in callbacks: +# try: +# if callback == "supabase": +# print_verbose("reaches supabase for logging!") +# model = self.model_call_details["model"] +# messages = self.model_call_details["input"] +# print_verbose(f"supabaseClient: {supabaseClient}") +# supabaseClient.input_log_event( +# model=model, +# messages=messages, +# end_user=self.model_call_details.get("user", "default"), +# litellm_call_id=self.litellm_params["litellm_call_id"], +# print_verbose=print_verbose, +# ) +# elif callback == "sentry" and add_breadcrumb: +# try: +# details_to_log = copy.deepcopy(self.model_call_details) +# except: +# details_to_log = self.model_call_details +# if litellm.turn_off_message_logging: +# # make a copy of the _model_Call_details and log it +# details_to_log.pop("messages", None) +# details_to_log.pop("input", None) +# details_to_log.pop("prompt", None) + +# add_breadcrumb( +# category="litellm.llm_call", +# message=f"Model Call Details pre-call: {details_to_log}", +# level="info", +# ) +# elif isinstance(callback, CustomLogger): # custom logger class +# callback.log_pre_api_call( +# model=self.model, +# messages=self.messages, +# kwargs=self.model_call_details, +# ) +# elif callable(callback): # custom logger functions +# customLogger.log_input_event( +# model=self.model, +# messages=self.messages, +# kwargs=self.model_call_details, +# print_verbose=print_verbose, +# callback_func=callback, +# ) +# except Exception as e: +# verbose_logger.error( +# "litellm.Logging.pre_call(): Exception occured - {}".format( +# str(e) +# ) +# ) +# verbose_logger.debug( +# f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}" +# ) +# print_verbose( +# f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" +# ) +# if capture_exception: # log this error to sentry for debugging +# capture_exception(e) +# except: +# print_verbose( +# f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" +# ) +# print_verbose( +# f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" +# ) +# if capture_exception: # log this error to sentry for debugging +# capture_exception(e) + +# def post_call( +# self, original_response, input=None, api_key=None, additional_args={} +# ): +# # Log the exact result from the LLM API, for streaming - log the type of response received +# litellm.error_logs["POST_CALL"] = locals() +# if isinstance(original_response, dict): +# original_response = json.dumps(original_response) +# try: +# self.model_call_details["input"] = input +# self.model_call_details["api_key"] = api_key +# self.model_call_details["original_response"] = original_response +# self.model_call_details["additional_args"] = additional_args +# self.model_call_details["log_event_type"] = "post_api_call" +# # User Logging -> if you pass in a custom logging function +# print_verbose( +# f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n", +# log_level="DEBUG", +# ) +# if self.logger_fn and callable(self.logger_fn): +# try: +# self.logger_fn( +# self.model_call_details +# ) # Expectation: any logger function passed in by the user should accept a dict object +# except Exception as e: +# print_verbose( +# f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" +# ) +# original_response = redact_message_input_output_from_logging( +# litellm_logging_obj=self, result=original_response +# ) +# # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made + +# callbacks = litellm.input_callback + self.dynamic_input_callbacks +# for callback in callbacks: +# try: +# if callback == "lite_debugger": +# print_verbose("reaches litedebugger for post-call logging!") +# print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") +# liteDebuggerClient.post_call_log_event( +# original_response=original_response, +# litellm_call_id=self.litellm_params["litellm_call_id"], +# print_verbose=print_verbose, +# call_type=self.call_type, +# stream=self.stream, +# ) +# elif callback == "sentry" and add_breadcrumb: +# print_verbose("reaches sentry breadcrumbing") +# try: +# details_to_log = copy.deepcopy(self.model_call_details) +# except: +# details_to_log = self.model_call_details +# if litellm.turn_off_message_logging: +# # make a copy of the _model_Call_details and log it +# details_to_log.pop("messages", None) +# details_to_log.pop("input", None) +# details_to_log.pop("prompt", None) + +# add_breadcrumb( +# category="litellm.llm_call", +# message=f"Model Call Details post-call: {details_to_log}", +# level="info", +# ) +# elif isinstance(callback, CustomLogger): # custom logger class +# callback.log_post_api_call( +# kwargs=self.model_call_details, +# response_obj=None, +# start_time=self.start_time, +# end_time=None, +# ) +# except Exception as e: +# print_verbose( +# f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while post-call logging with integrations {traceback.format_exc()}" +# ) +# print_verbose( +# f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" +# ) +# if capture_exception: # log this error to sentry for debugging +# capture_exception(e) +# except: +# print_verbose( +# f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" +# ) +# pass + +# def _success_handler_helper_fn( +# self, result=None, start_time=None, end_time=None, cache_hit=None +# ): +# try: +# if start_time is None: +# start_time = self.start_time +# if end_time is None: +# end_time = datetime.datetime.now() +# if self.completion_start_time is None: +# self.completion_start_time = end_time +# self.model_call_details["completion_start_time"] = ( +# self.completion_start_time +# ) +# self.model_call_details["log_event_type"] = "successful_api_call" +# self.model_call_details["end_time"] = end_time +# self.model_call_details["cache_hit"] = cache_hit +# ## if model in model cost map - log the response cost +# ## else set cost to None +# verbose_logger.debug(f"Model={self.model};") +# if ( +# result is not None +# and ( +# isinstance(result, ModelResponse) +# or isinstance(result, EmbeddingResponse) +# or isinstance(result, ImageResponse) +# or isinstance(result, TranscriptionResponse) +# or isinstance(result, TextCompletionResponse) +# ) +# and self.stream != True +# ): # handle streaming separately +# self.model_call_details["response_cost"] = ( +# litellm.response_cost_calculator( +# response_object=result, +# model=self.model, +# cache_hit=self.model_call_details.get("cache_hit", False), +# custom_llm_provider=self.model_call_details.get( +# "custom_llm_provider", None +# ), +# base_model=_get_base_model_from_metadata( +# model_call_details=self.model_call_details +# ), +# call_type=self.call_type, +# optional_params=self.optional_params, +# ) +# ) +# else: # streaming chunks + image gen. +# self.model_call_details["response_cost"] = None + +# if ( +# litellm.max_budget +# and self.stream == False +# and result is not None +# and "content" in result +# ): +# time_diff = (end_time - start_time).total_seconds() +# float_diff = float(time_diff) +# litellm._current_cost += litellm.completion_cost( +# model=self.model, +# prompt="", +# completion=result["content"], +# total_time=float_diff, +# ) + +# return start_time, end_time, result +# except Exception as e: +# raise Exception(f"[Non-Blocking] LiteLLM.Success_Call Error: {str(e)}") + +# def success_handler( +# self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs +# ): +# print_verbose(f"Logging Details LiteLLM-Success Call: {cache_hit}") +# start_time, end_time, result = self._success_handler_helper_fn( +# start_time=start_time, +# end_time=end_time, +# result=result, +# cache_hit=cache_hit, +# ) +# # print(f"original response in success handler: {self.model_call_details['original_response']}") +# try: +# print_verbose(f"success callbacks: {litellm.success_callback}") +# ## BUILD COMPLETE STREAMED RESPONSE +# complete_streaming_response = None +# if self.stream and isinstance(result, ModelResponse): +# if ( +# result.choices[0].finish_reason is not None +# ): # if it's the last chunk +# self.sync_streaming_chunks.append(result) +# # print_verbose(f"final set of received chunks: {self.sync_streaming_chunks}") +# try: +# complete_streaming_response = litellm.stream_chunk_builder( +# self.sync_streaming_chunks, +# messages=self.model_call_details.get("messages", None), +# start_time=start_time, +# end_time=end_time, +# ) +# except Exception as e: +# print_verbose( +# "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format( +# str(e), traceback.format_exc() +# ), +# log_level="ERROR", +# ) +# complete_streaming_response = None +# else: +# self.sync_streaming_chunks.append(result) + +# if complete_streaming_response is not None: +# print_verbose( +# f"Logging Details LiteLLM-Success Call streaming complete" +# ) +# self.model_call_details["complete_streaming_response"] = ( +# complete_streaming_response +# ) +# self.model_call_details["response_cost"] = ( +# litellm.response_cost_calculator( +# response_object=complete_streaming_response, +# model=self.model, +# cache_hit=self.model_call_details.get("cache_hit", False), +# custom_llm_provider=self.model_call_details.get( +# "custom_llm_provider", None +# ), +# base_model=_get_base_model_from_metadata( +# model_call_details=self.model_call_details +# ), +# call_type=self.call_type, +# optional_params=self.optional_params, +# ) +# ) +# if self.dynamic_success_callbacks is not None and isinstance( +# self.dynamic_success_callbacks, list +# ): +# callbacks = self.dynamic_success_callbacks +# ## keep the internal functions ## +# for callback in litellm.success_callback: +# if ( +# isinstance(callback, CustomLogger) +# and "_PROXY_" in callback.__class__.__name__ +# ): +# callbacks.append(callback) +# else: +# callbacks = litellm.success_callback + +# result = redact_message_input_output_from_logging( +# result=result, litellm_logging_obj=self +# ) + +# for callback in callbacks: +# try: +# litellm_params = self.model_call_details.get("litellm_params", {}) +# if litellm_params.get("no-log", False) == True: +# # proxy cost tracking cal backs should run +# if not ( +# isinstance(callback, CustomLogger) +# and "_PROXY_" in callback.__class__.__name__ +# ): +# print_verbose("no-log request, skipping logging") +# continue +# if callback == "lite_debugger": +# print_verbose("reaches lite_debugger for logging!") +# print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") +# print_verbose( +# f"liteDebuggerClient details function {self.call_type} and stream set to {self.stream}" +# ) +# liteDebuggerClient.log_event( +# end_user=kwargs.get("user", "default"), +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# litellm_call_id=self.litellm_call_id, +# print_verbose=print_verbose, +# call_type=self.call_type, +# stream=self.stream, +# ) +# if callback == "promptlayer": +# print_verbose("reaches promptlayer for logging!") +# promptLayerLogger.log_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# ) +# if callback == "supabase": +# print_verbose("reaches supabase for logging!") +# kwargs = self.model_call_details + +# # this only logs streaming once, complete_streaming_response exists i.e when stream ends +# if self.stream: +# if "complete_streaming_response" not in kwargs: +# continue +# else: +# print_verbose("reaches supabase for streaming logging!") +# result = kwargs["complete_streaming_response"] + +# model = kwargs["model"] +# messages = kwargs["messages"] +# optional_params = kwargs.get("optional_params", {}) +# litellm_params = kwargs.get("litellm_params", {}) +# supabaseClient.log_event( +# model=model, +# messages=messages, +# end_user=optional_params.get("user", "default"), +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# litellm_call_id=litellm_params.get( +# "litellm_call_id", str(uuid.uuid4()) +# ), +# print_verbose=print_verbose, +# ) +# if callback == "wandb": +# print_verbose("reaches wandb for logging!") +# weightsBiasesLogger.log_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# ) +# if callback == "langsmith": +# print_verbose("reaches langsmith for logging!") +# if self.stream: +# if "complete_streaming_response" not in kwargs: +# continue +# else: +# print_verbose( +# "reaches langsmith for streaming logging!" +# ) +# result = kwargs["complete_streaming_response"] +# langsmithLogger.log_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# ) +# if callback == "logfire": +# global logfireLogger +# verbose_logger.debug("reaches logfire for success logging!") +# kwargs = {} +# for k, v in self.model_call_details.items(): +# if ( +# k != "original_response" +# ): # copy.deepcopy raises errors as this could be a coroutine +# kwargs[k] = v + +# # this only logs streaming once, complete_streaming_response exists i.e when stream ends +# if self.stream: +# if "complete_streaming_response" not in kwargs: +# continue +# else: +# print_verbose("reaches logfire for streaming logging!") +# result = kwargs["complete_streaming_response"] + +# logfireLogger.log_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# level=LogfireLevel.INFO.value, +# ) + +# if callback == "lunary": +# print_verbose("reaches lunary for logging!") +# model = self.model +# kwargs = self.model_call_details + +# input = kwargs.get("messages", kwargs.get("input", None)) + +# type = ( +# "embed" +# if self.call_type == CallTypes.embedding.value +# else "llm" +# ) + +# # this only logs streaming once, complete_streaming_response exists i.e when stream ends +# if self.stream: +# if "complete_streaming_response" not in kwargs: +# continue +# else: +# result = kwargs["complete_streaming_response"] + +# lunaryLogger.log_event( +# type=type, +# kwargs=kwargs, +# event="end", +# model=model, +# input=input, +# user_id=kwargs.get("user", None), +# # user_props=self.model_call_details.get("user_props", None), +# extra=kwargs.get("optional_params", {}), +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# run_id=self.litellm_call_id, +# print_verbose=print_verbose, +# ) +# if callback == "helicone": +# print_verbose("reaches helicone for logging!") +# model = self.model +# messages = self.model_call_details["input"] +# heliconeLogger.log_success( +# model=model, +# messages=messages, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# ) +# if callback == "langfuse": +# global langFuseLogger +# verbose_logger.debug("reaches langfuse for success logging!") +# kwargs = {} +# for k, v in self.model_call_details.items(): +# if ( +# k != "original_response" +# ): # copy.deepcopy raises errors as this could be a coroutine +# kwargs[k] = v +# # this only logs streaming once, complete_streaming_response exists i.e when stream ends +# if self.stream: +# verbose_logger.debug( +# f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" +# ) +# if complete_streaming_response is None: +# continue +# else: +# print_verbose("reaches langfuse for streaming logging!") +# result = kwargs["complete_streaming_response"] +# if langFuseLogger is None or ( +# ( +# self.langfuse_public_key is not None +# and self.langfuse_public_key +# != langFuseLogger.public_key +# ) +# and ( +# self.langfuse_public_key is not None +# and self.langfuse_public_key +# != langFuseLogger.public_key +# ) +# ): +# langFuseLogger = LangFuseLogger( +# langfuse_public_key=self.langfuse_public_key, +# langfuse_secret=self.langfuse_secret, +# ) +# langFuseLogger.log_event( +# kwargs=kwargs, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# user_id=kwargs.get("user", None), +# print_verbose=print_verbose, +# ) +# if callback == "datadog": +# global dataDogLogger +# verbose_logger.debug("reaches datadog for success logging!") +# kwargs = {} +# for k, v in self.model_call_details.items(): +# if ( +# k != "original_response" +# ): # copy.deepcopy raises errors as this could be a coroutine +# kwargs[k] = v +# # this only logs streaming once, complete_streaming_response exists i.e when stream ends +# if self.stream: +# verbose_logger.debug( +# f"datadog: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" +# ) +# if complete_streaming_response is None: +# continue +# else: +# print_verbose("reaches datadog for streaming logging!") +# result = kwargs["complete_streaming_response"] +# dataDogLogger.log_event( +# kwargs=kwargs, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# user_id=kwargs.get("user", None), +# print_verbose=print_verbose, +# ) +# if callback == "prometheus": +# global prometheusLogger +# verbose_logger.debug("reaches prometheus for success logging!") +# kwargs = {} +# for k, v in self.model_call_details.items(): +# if ( +# k != "original_response" +# ): # copy.deepcopy raises errors as this could be a coroutine +# kwargs[k] = v +# # this only logs streaming once, complete_streaming_response exists i.e when stream ends +# if self.stream: +# verbose_logger.debug( +# f"prometheus: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" +# ) +# if complete_streaming_response is None: +# continue +# else: +# print_verbose( +# "reaches prometheus for streaming logging!" +# ) +# result = kwargs["complete_streaming_response"] +# prometheusLogger.log_event( +# kwargs=kwargs, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# user_id=kwargs.get("user", None), +# print_verbose=print_verbose, +# ) +# if callback == "generic": +# global genericAPILogger +# verbose_logger.debug("reaches langfuse for success logging!") +# kwargs = {} +# for k, v in self.model_call_details.items(): +# if ( +# k != "original_response" +# ): # copy.deepcopy raises errors as this could be a coroutine +# kwargs[k] = v +# # this only logs streaming once, complete_streaming_response exists i.e when stream ends +# if self.stream: +# verbose_logger.debug( +# f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" +# ) +# if complete_streaming_response is None: +# continue +# else: +# print_verbose("reaches langfuse for streaming logging!") +# result = kwargs["complete_streaming_response"] +# if genericAPILogger is None: +# genericAPILogger = GenericAPILogger() +# genericAPILogger.log_event( +# kwargs=kwargs, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# user_id=kwargs.get("user", None), +# print_verbose=print_verbose, +# ) +# if callback == "clickhouse": +# global clickHouseLogger +# verbose_logger.debug("reaches clickhouse for success logging!") +# kwargs = {} +# for k, v in self.model_call_details.items(): +# if ( +# k != "original_response" +# ): # copy.deepcopy raises errors as this could be a coroutine +# kwargs[k] = v +# # this only logs streaming once, complete_streaming_response exists i.e when stream ends +# if self.stream: +# verbose_logger.debug( +# f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" +# ) +# if complete_streaming_response is None: +# continue +# else: +# print_verbose( +# "reaches clickhouse for streaming logging!" +# ) +# result = kwargs["complete_streaming_response"] +# if clickHouseLogger is None: +# clickHouseLogger = ClickhouseLogger() +# clickHouseLogger.log_event( +# kwargs=kwargs, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# user_id=kwargs.get("user", None), +# print_verbose=print_verbose, +# ) +# if callback == "greenscale": +# kwargs = {} +# for k, v in self.model_call_details.items(): +# if ( +# k != "original_response" +# ): # copy.deepcopy raises errors as this could be a coroutine +# kwargs[k] = v +# # this only logs streaming once, complete_streaming_response exists i.e when stream ends +# if self.stream: +# verbose_logger.debug( +# f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" +# ) +# if complete_streaming_response is None: +# continue +# else: +# print_verbose( +# "reaches greenscale for streaming logging!" +# ) +# result = kwargs["complete_streaming_response"] + +# greenscaleLogger.log_event( +# kwargs=kwargs, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# ) +# if callback == "cache" and litellm.cache is not None: +# # this only logs streaming once, complete_streaming_response exists i.e when stream ends +# print_verbose("success_callback: reaches cache for logging!") +# kwargs = self.model_call_details +# if self.stream: +# if "complete_streaming_response" not in kwargs: +# print_verbose( +# f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n" +# ) +# pass +# else: +# print_verbose( +# "success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache" +# ) +# result = kwargs["complete_streaming_response"] +# # only add to cache once we have a complete streaming response +# litellm.cache.add_cache(result, **kwargs) +# if callback == "athina": +# deep_copy = {} +# for k, v in self.model_call_details.items(): +# deep_copy[k] = v +# athinaLogger.log_event( +# kwargs=deep_copy, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# ) +# if callback == "traceloop": +# deep_copy = {} +# for k, v in self.model_call_details.items(): +# if k != "original_response": +# deep_copy[k] = v +# traceloopLogger.log_event( +# kwargs=deep_copy, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# user_id=kwargs.get("user", None), +# print_verbose=print_verbose, +# ) +# if callback == "s3": +# global s3Logger +# if s3Logger is None: +# s3Logger = S3Logger() +# if self.stream: +# if "complete_streaming_response" in self.model_call_details: +# print_verbose( +# "S3Logger Logger: Got Stream Event - Completed Stream Response" +# ) +# s3Logger.log_event( +# kwargs=self.model_call_details, +# response_obj=self.model_call_details[ +# "complete_streaming_response" +# ], +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# ) +# else: +# print_verbose( +# "S3Logger Logger: Got Stream Event - No complete stream response as yet" +# ) +# else: +# s3Logger.log_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# ) +# if ( +# callback == "openmeter" +# and self.model_call_details.get("litellm_params", {}).get( +# "acompletion", False +# ) +# == False +# and self.model_call_details.get("litellm_params", {}).get( +# "aembedding", False +# ) +# == False +# and self.model_call_details.get("litellm_params", {}).get( +# "aimage_generation", False +# ) +# == False +# and self.model_call_details.get("litellm_params", {}).get( +# "atranscription", False +# ) +# == False +# ): +# global openMeterLogger +# if openMeterLogger is None: +# print_verbose("Instantiates openmeter client") +# openMeterLogger = OpenMeterLogger() +# if self.stream and complete_streaming_response is None: +# openMeterLogger.log_stream_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# ) +# else: +# if self.stream and complete_streaming_response: +# self.model_call_details["complete_response"] = ( +# self.model_call_details.get( +# "complete_streaming_response", {} +# ) +# ) +# result = self.model_call_details["complete_response"] +# openMeterLogger.log_success_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# ) + +# if ( +# isinstance(callback, CustomLogger) +# and self.model_call_details.get("litellm_params", {}).get( +# "acompletion", False +# ) +# == False +# and self.model_call_details.get("litellm_params", {}).get( +# "aembedding", False +# ) +# == False +# and self.model_call_details.get("litellm_params", {}).get( +# "aimage_generation", False +# ) +# == False +# and self.model_call_details.get("litellm_params", {}).get( +# "atranscription", False +# ) +# == False +# ): # custom logger class +# if self.stream and complete_streaming_response is None: +# callback.log_stream_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# ) +# else: +# if self.stream and complete_streaming_response: +# self.model_call_details["complete_response"] = ( +# self.model_call_details.get( +# "complete_streaming_response", {} +# ) +# ) +# result = self.model_call_details["complete_response"] +# callback.log_success_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# ) +# if ( +# callable(callback) == True +# and self.model_call_details.get("litellm_params", {}).get( +# "acompletion", False +# ) +# == False +# and self.model_call_details.get("litellm_params", {}).get( +# "aembedding", False +# ) +# == False +# and self.model_call_details.get("litellm_params", {}).get( +# "aimage_generation", False +# ) +# == False +# and self.model_call_details.get("litellm_params", {}).get( +# "atranscription", False +# ) +# == False +# ): # custom logger functions +# print_verbose( +# f"success callbacks: Running Custom Callback Function" +# ) +# customLogger.log_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# callback_func=callback, +# ) + +# except Exception as e: +# print_verbose( +# f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging with integrations {traceback.format_exc()}" +# ) +# print_verbose( +# f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" +# ) +# if capture_exception: # log this error to sentry for debugging +# capture_exception(e) +# except: +# print_verbose( +# "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {}\n{}".format( +# str(e), traceback.format_exc() +# ), +# log_level="ERROR", +# ) +# pass + +# async def async_success_handler( +# self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs +# ): +# """ +# Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions. +# """ +# print_verbose("Logging Details LiteLLM-Async Success Call") +# start_time, end_time, result = self._success_handler_helper_fn( +# start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit +# ) +# ## BUILD COMPLETE STREAMED RESPONSE +# complete_streaming_response = None +# if self.stream: +# if result.choices[0].finish_reason is not None: # if it's the last chunk +# self.streaming_chunks.append(result) +# # verbose_logger.debug(f"final set of received chunks: {self.streaming_chunks}") +# try: +# complete_streaming_response = litellm.stream_chunk_builder( +# self.streaming_chunks, +# messages=self.model_call_details.get("messages", None), +# start_time=start_time, +# end_time=end_time, +# ) +# except Exception as e: +# print_verbose( +# "Error occurred building stream chunk in success logging: {}\n{}".format( +# str(e), traceback.format_exc() +# ), +# log_level="ERROR", +# ) +# complete_streaming_response = None +# else: +# self.streaming_chunks.append(result) +# if complete_streaming_response is not None: +# print_verbose("Async success callbacks: Got a complete streaming response") +# self.model_call_details["async_complete_streaming_response"] = ( +# complete_streaming_response +# ) +# try: +# if self.model_call_details.get("cache_hit", False) is True: +# self.model_call_details["response_cost"] = 0.0 +# else: +# # check if base_model set on azure +# base_model = _get_base_model_from_metadata( +# model_call_details=self.model_call_details +# ) +# # base_model defaults to None if not set on model_info +# self.model_call_details["response_cost"] = litellm.completion_cost( +# completion_response=complete_streaming_response, +# model=base_model, +# ) +# verbose_logger.debug( +# f"Model={self.model}; cost={self.model_call_details['response_cost']}" +# ) +# except litellm.NotFoundError as e: +# verbose_logger.error( +# f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None" +# ) +# self.model_call_details["response_cost"] = None + +# if self.dynamic_async_success_callbacks is not None and isinstance( +# self.dynamic_async_success_callbacks, list +# ): +# callbacks = self.dynamic_async_success_callbacks +# ## keep the internal functions ## +# for callback in litellm._async_success_callback: +# callback_name = "" +# if isinstance(callback, CustomLogger): +# callback_name = callback.__class__.__name__ +# if callable(callback): +# callback_name = callback.__name__ +# if "_PROXY_" in callback_name: +# callbacks.append(callback) +# else: +# callbacks = litellm._async_success_callback + +# result = redact_message_input_output_from_logging( +# result=result, litellm_logging_obj=self +# ) + +# for callback in callbacks: +# # check if callback can run for this request +# litellm_params = self.model_call_details.get("litellm_params", {}) +# if litellm_params.get("no-log", False) == True: +# # proxy cost tracking cal backs should run +# if not ( +# isinstance(callback, CustomLogger) +# and "_PROXY_" in callback.__class__.__name__ +# ): +# print_verbose("no-log request, skipping logging") +# continue +# try: +# if kwargs.get("no-log", False) == True: +# print_verbose("no-log request, skipping logging") +# continue +# if callback == "cache" and litellm.cache is not None: +# # set_cache once complete streaming response is built +# print_verbose("async success_callback: reaches cache for logging!") +# kwargs = self.model_call_details +# if self.stream: +# if "async_complete_streaming_response" not in kwargs: +# print_verbose( +# f"async success_callback: reaches cache for logging, there is no async_complete_streaming_response. Kwargs={kwargs}\n\n" +# ) +# pass +# else: +# print_verbose( +# "async success_callback: reaches cache for logging, there is a async_complete_streaming_response. Adding to cache" +# ) +# result = kwargs["async_complete_streaming_response"] +# # only add to cache once we have a complete streaming response +# if litellm.cache is not None and not isinstance( +# litellm.cache.cache, S3Cache +# ): +# await litellm.cache.async_add_cache(result, **kwargs) +# else: +# litellm.cache.add_cache(result, **kwargs) +# if callback == "openmeter": +# global openMeterLogger +# if self.stream == True: +# if ( +# "async_complete_streaming_response" +# in self.model_call_details +# ): +# await openMeterLogger.async_log_success_event( +# kwargs=self.model_call_details, +# response_obj=self.model_call_details[ +# "async_complete_streaming_response" +# ], +# start_time=start_time, +# end_time=end_time, +# ) +# else: +# await openMeterLogger.async_log_stream_event( # [TODO]: move this to being an async log stream event function +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# ) +# else: +# await openMeterLogger.async_log_success_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# ) +# if isinstance(callback, CustomLogger): # custom logger class +# if self.stream == True: +# if ( +# "async_complete_streaming_response" +# in self.model_call_details +# ): +# await callback.async_log_success_event( +# kwargs=self.model_call_details, +# response_obj=self.model_call_details[ +# "async_complete_streaming_response" +# ], +# start_time=start_time, +# end_time=end_time, +# ) +# else: +# await callback.async_log_stream_event( # [TODO]: move this to being an async log stream event function +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# ) +# else: +# await callback.async_log_success_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# ) +# if callable(callback): # custom logger functions +# if self.stream: +# if ( +# "async_complete_streaming_response" +# in self.model_call_details +# ): +# await customLogger.async_log_event( +# kwargs=self.model_call_details, +# response_obj=self.model_call_details[ +# "async_complete_streaming_response" +# ], +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# callback_func=callback, +# ) +# else: +# await customLogger.async_log_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# callback_func=callback, +# ) +# if callback == "dynamodb": +# global dynamoLogger +# if dynamoLogger is None: +# dynamoLogger = DyanmoDBLogger() +# if self.stream: +# if ( +# "async_complete_streaming_response" +# in self.model_call_details +# ): +# print_verbose( +# "DynamoDB Logger: Got Stream Event - Completed Stream Response" +# ) +# await dynamoLogger._async_log_event( +# kwargs=self.model_call_details, +# response_obj=self.model_call_details[ +# "async_complete_streaming_response" +# ], +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# ) +# else: +# print_verbose( +# "DynamoDB Logger: Got Stream Event - No complete stream response as yet" +# ) +# else: +# await dynamoLogger._async_log_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# ) +# except Exception as e: +# verbose_logger.error( +# f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}" +# ) +# pass + +# def _failure_handler_helper_fn( +# self, exception, traceback_exception, start_time=None, end_time=None +# ): +# if start_time is None: +# start_time = self.start_time +# if end_time is None: +# end_time = datetime.datetime.now() + +# # on some exceptions, model_call_details is not always initialized, this ensures that we still log those exceptions +# if not hasattr(self, "model_call_details"): +# self.model_call_details = {} + +# self.model_call_details["log_event_type"] = "failed_api_call" +# self.model_call_details["exception"] = exception +# self.model_call_details["traceback_exception"] = traceback_exception +# self.model_call_details["end_time"] = end_time +# self.model_call_details.setdefault("original_response", None) +# return start_time, end_time + +# def failure_handler( +# self, exception, traceback_exception, start_time=None, end_time=None +# ): +# print_verbose( +# f"Logging Details LiteLLM-Failure Call: {litellm.failure_callback}" +# ) +# try: +# start_time, end_time = self._failure_handler_helper_fn( +# exception=exception, +# traceback_exception=traceback_exception, +# start_time=start_time, +# end_time=end_time, +# ) +# callbacks = [] # init this to empty incase it's not created + +# if self.dynamic_failure_callbacks is not None and isinstance( +# self.dynamic_failure_callbacks, list +# ): +# callbacks = self.dynamic_failure_callbacks +# ## keep the internal functions ## +# for callback in litellm.failure_callback: +# if ( +# isinstance(callback, CustomLogger) +# and "_PROXY_" in callback.__class__.__name__ +# ): +# callbacks.append(callback) +# else: +# callbacks = litellm.failure_callback + +# result = None # result sent to all loggers, init this to None incase it's not created + +# result = redact_message_input_output_from_logging( +# result=result, litellm_logging_obj=self +# ) +# for callback in callbacks: +# try: +# if callback == "lite_debugger": +# print_verbose("reaches lite_debugger for logging!") +# print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") +# result = { +# "model": self.model, +# "created": time.time(), +# "error": traceback_exception, +# "usage": { +# "prompt_tokens": prompt_token_calculator( +# self.model, messages=self.messages +# ), +# "completion_tokens": 0, +# }, +# } +# liteDebuggerClient.log_event( +# model=self.model, +# messages=self.messages, +# end_user=self.model_call_details.get("user", "default"), +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# litellm_call_id=self.litellm_call_id, +# print_verbose=print_verbose, +# call_type=self.call_type, +# stream=self.stream, +# ) +# if callback == "lunary": +# print_verbose("reaches lunary for logging error!") + +# model = self.model + +# input = self.model_call_details["input"] + +# _type = ( +# "embed" +# if self.call_type == CallTypes.embedding.value +# else "llm" +# ) + +# lunaryLogger.log_event( +# type=_type, +# event="error", +# user_id=self.model_call_details.get("user", "default"), +# model=model, +# input=input, +# error=traceback_exception, +# run_id=self.litellm_call_id, +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# ) +# if callback == "sentry": +# print_verbose("sending exception to sentry") +# if capture_exception: +# capture_exception(exception) +# else: +# print_verbose( +# f"capture exception not initialized: {capture_exception}" +# ) +# if callable(callback): # custom logger functions +# customLogger.log_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# callback_func=callback, +# ) +# if ( +# isinstance(callback, CustomLogger) +# and self.model_call_details.get("litellm_params", {}).get( +# "acompletion", False +# ) +# == False +# and self.model_call_details.get("litellm_params", {}).get( +# "aembedding", False +# ) +# == False +# ): # custom logger class +# callback.log_failure_event( +# start_time=start_time, +# end_time=end_time, +# response_obj=result, +# kwargs=self.model_call_details, +# ) +# if callback == "langfuse": +# global langFuseLogger +# verbose_logger.debug("reaches langfuse for logging failure") +# kwargs = {} +# for k, v in self.model_call_details.items(): +# if ( +# k != "original_response" +# ): # copy.deepcopy raises errors as this could be a coroutine +# kwargs[k] = v +# # this only logs streaming once, complete_streaming_response exists i.e when stream ends +# if langFuseLogger is None or ( +# ( +# self.langfuse_public_key is not None +# and self.langfuse_public_key +# != langFuseLogger.public_key +# ) +# and ( +# self.langfuse_public_key is not None +# and self.langfuse_public_key +# != langFuseLogger.public_key +# ) +# ): +# langFuseLogger = LangFuseLogger( +# langfuse_public_key=self.langfuse_public_key, +# langfuse_secret=self.langfuse_secret, +# ) +# langFuseLogger.log_event( +# start_time=start_time, +# end_time=end_time, +# response_obj=None, +# user_id=kwargs.get("user", None), +# print_verbose=print_verbose, +# status_message=str(exception), +# level="ERROR", +# kwargs=self.model_call_details, +# ) +# if callback == "traceloop": +# traceloopLogger.log_event( +# start_time=start_time, +# end_time=end_time, +# response_obj=None, +# user_id=kwargs.get("user", None), +# print_verbose=print_verbose, +# status_message=str(exception), +# level="ERROR", +# kwargs=self.model_call_details, +# ) +# if callback == "prometheus": +# global prometheusLogger +# verbose_logger.debug("reaches prometheus for success logging!") +# kwargs = {} +# for k, v in self.model_call_details.items(): +# if ( +# k != "original_response" +# ): # copy.deepcopy raises errors as this could be a coroutine +# kwargs[k] = v +# kwargs["exception"] = str(exception) +# prometheusLogger.log_event( +# kwargs=kwargs, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# user_id=kwargs.get("user", None), +# print_verbose=print_verbose, +# ) + +# if callback == "logfire": +# global logfireLogger +# verbose_logger.debug("reaches logfire for failure logging!") +# kwargs = {} +# for k, v in self.model_call_details.items(): +# if ( +# k != "original_response" +# ): # copy.deepcopy raises errors as this could be a coroutine +# kwargs[k] = v +# kwargs["exception"] = exception + +# logfireLogger.log_event( +# kwargs=kwargs, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# level=LogfireLevel.ERROR.value, +# print_verbose=print_verbose, +# ) +# except Exception as e: +# print_verbose( +# f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}" +# ) +# print_verbose( +# f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" +# ) +# if capture_exception: # log this error to sentry for debugging +# capture_exception(e) +# except Exception as e: +# print_verbose( +# f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging {traceback.format_exc()}" +# ) +# pass + +# async def async_failure_handler( +# self, exception, traceback_exception, start_time=None, end_time=None +# ): +# """ +# Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions. +# """ +# start_time, end_time = self._failure_handler_helper_fn( +# exception=exception, +# traceback_exception=traceback_exception, +# start_time=start_time, +# end_time=end_time, +# ) +# result = None # result sent to all loggers, init this to None incase it's not created +# for callback in litellm._async_failure_callback: +# try: +# if isinstance(callback, CustomLogger): # custom logger class +# await callback.async_log_failure_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# ) # type: ignore +# if callable(callback): # custom logger functions +# await customLogger.async_log_event( +# kwargs=self.model_call_details, +# response_obj=result, +# start_time=start_time, +# end_time=end_time, +# print_verbose=print_verbose, +# callback_func=callback, +# ) +# except Exception as e: +# print_verbose( +# f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}" +# ) diff --git a/litellm/litellm_core_utils/redact_messages.py b/litellm/litellm_core_utils/redact_messages.py index 9c0df2011..8f270d8be 100644 --- a/litellm/litellm_core_utils/redact_messages.py +++ b/litellm/litellm_core_utils/redact_messages.py @@ -12,7 +12,9 @@ from typing import TYPE_CHECKING, Any import litellm if TYPE_CHECKING: - from litellm.utils import Logging as _LiteLLMLoggingObject + from litellm.litellm_core_utils.litellm_logging import ( + Logging as _LiteLLMLoggingObject, + ) LiteLLMLoggingObject = _LiteLLMLoggingObject else: diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index 8e469a8f4..1edd99110 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -5,7 +5,9 @@ import requests, copy # type: ignore import time from functools import partial from typing import Callable, Optional, List, Union -from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper +import litellm.litellm_core_utils +from litellm.utils import ModelResponse, Usage, CustomStreamWrapper +from litellm.litellm_core_utils.core_helpers import map_finish_reason import litellm from .prompt_templates.factory import prompt_factory, custom_prompt from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler @@ -201,7 +203,7 @@ class AnthropicChatCompletion(BaseLLM): response: Union[requests.Response, httpx.Response], model_response: ModelResponse, stream: bool, - logging_obj: litellm.utils.Logging, + logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, optional_params: dict, api_key: str, data: Union[dict, str], @@ -316,7 +318,7 @@ class AnthropicChatCompletion(BaseLLM): response: Union[requests.Response, httpx.Response], model_response: ModelResponse, stream: bool, - logging_obj: litellm.utils.Logging, + logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, optional_params: dict, api_key: str, data: Union[dict, str], diff --git a/litellm/llms/base.py b/litellm/llms/base.py index 8c2f5101e..0222d2366 100644 --- a/litellm/llms/base.py +++ b/litellm/llms/base.py @@ -2,7 +2,7 @@ import litellm import httpx, requests from typing import Optional, Union -from litellm.utils import Logging +from litellm.litellm_core_utils.litellm_logging import Logging class BaseLLM: diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py index 4314032e7..8d88cdd3d 100644 --- a/litellm/llms/bedrock.py +++ b/litellm/llms/bedrock.py @@ -5,12 +5,10 @@ import time, uuid from typing import Callable, Optional, Any, Union, List import litellm from litellm.utils import ( - ModelResponse, get_secret, - Usage, - ImageResponse, - map_finish_reason, ) +from litellm.litellm_core_utils.model_response_helpers import map_finish_reason +from litellm.types.utils import ImageResponse, ModelResponse, Usage from .prompt_templates.factory import ( prompt_factory, custom_prompt, @@ -633,7 +631,11 @@ def init_bedrock_client( config = boto3.session.Config() ### CHECK STS ### - if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None: + if ( + aws_web_identity_token is not None + and aws_role_name is not None + and aws_session_name is not None + ): oidc_token = get_secret(aws_web_identity_token) if oidc_token is None: @@ -642,9 +644,7 @@ def init_bedrock_client( status_code=401, ) - sts_client = boto3.client( - "sts" - ) + sts_client = boto3.client("sts") # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py index 84b61d4cb..ffbc6c680 100644 --- a/litellm/llms/bedrock_httpx.py +++ b/litellm/llms/bedrock_httpx.py @@ -22,13 +22,12 @@ from typing import ( from litellm.utils import ( ModelResponse, Usage, - map_finish_reason, CustomStreamWrapper, - Message, - Choices, get_secret, - Logging, ) +from litellm.litellm_core_utils.core_helpers import map_finish_reason +from litellm.litellm_core_utils.litellm_logging import Logging +from litellm.types.utils import Message, Choices import litellm, uuid from .prompt_templates.factory import ( prompt_factory, @@ -57,6 +56,7 @@ from litellm.caching import DualCache iam_cache = DualCache() + class AmazonCohereChatConfig: """ Reference - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere-command-r-plus.html @@ -327,13 +327,19 @@ class BedrockLLM(BaseLLM): ) = params_to_check ### CHECK STS ### - if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None: - iam_creds_cache_key = json.dumps({ - "aws_web_identity_token": aws_web_identity_token, - "aws_role_name": aws_role_name, - "aws_session_name": aws_session_name, - "aws_region_name": aws_region_name, - }) + if ( + aws_web_identity_token is not None + and aws_role_name is not None + and aws_session_name is not None + ): + iam_creds_cache_key = json.dumps( + { + "aws_web_identity_token": aws_web_identity_token, + "aws_role_name": aws_role_name, + "aws_session_name": aws_session_name, + "aws_region_name": aws_region_name, + } + ) iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key) if iam_creds_dict is None: @@ -348,7 +354,7 @@ class BedrockLLM(BaseLLM): sts_client = boto3.client( "sts", region_name=aws_region_name, - endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com" + endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com", ) # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html @@ -362,12 +368,18 @@ class BedrockLLM(BaseLLM): iam_creds_dict = { "aws_access_key_id": sts_response["Credentials"]["AccessKeyId"], - "aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"], + "aws_secret_access_key": sts_response["Credentials"][ + "SecretAccessKey" + ], "aws_session_token": sts_response["Credentials"]["SessionToken"], "region_name": aws_region_name, } - iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60) + iam_cache.set_cache( + key=iam_creds_cache_key, + value=json.dumps(iam_creds_dict), + ttl=3600 - 60, + ) session = boto3.Session(**iam_creds_dict) @@ -1433,13 +1445,19 @@ class BedrockConverseLLM(BaseLLM): ) = params_to_check ### CHECK STS ### - if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None: - iam_creds_cache_key = json.dumps({ - "aws_web_identity_token": aws_web_identity_token, - "aws_role_name": aws_role_name, - "aws_session_name": aws_session_name, - "aws_region_name": aws_region_name, - }) + if ( + aws_web_identity_token is not None + and aws_role_name is not None + and aws_session_name is not None + ): + iam_creds_cache_key = json.dumps( + { + "aws_web_identity_token": aws_web_identity_token, + "aws_role_name": aws_role_name, + "aws_session_name": aws_session_name, + "aws_region_name": aws_region_name, + } + ) iam_creds_dict = iam_cache.get_cache(iam_creds_cache_key) if iam_creds_dict is None: @@ -1454,7 +1472,7 @@ class BedrockConverseLLM(BaseLLM): sts_client = boto3.client( "sts", region_name=aws_region_name, - endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com" + endpoint_url=f"https://sts.{aws_region_name}.amazonaws.com", ) # https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html @@ -1468,12 +1486,18 @@ class BedrockConverseLLM(BaseLLM): iam_creds_dict = { "aws_access_key_id": sts_response["Credentials"]["AccessKeyId"], - "aws_secret_access_key": sts_response["Credentials"]["SecretAccessKey"], + "aws_secret_access_key": sts_response["Credentials"][ + "SecretAccessKey" + ], "aws_session_token": sts_response["Credentials"]["SessionToken"], "region_name": aws_region_name, } - iam_cache.set_cache(key=iam_creds_cache_key, value=json.dumps(iam_creds_dict), ttl=3600 - 60) + iam_cache.set_cache( + key=iam_creds_cache_key, + value=json.dumps(iam_creds_dict), + ttl=3600 - 60, + ) session = boto3.Session(**iam_creds_dict) diff --git a/litellm/llms/databricks.py b/litellm/llms/databricks.py index 4fe475259..1ab09246b 100644 --- a/litellm/llms/databricks.py +++ b/litellm/llms/databricks.py @@ -10,10 +10,10 @@ from typing import Callable, Optional, List, Union, Tuple, Literal from litellm.utils import ( ModelResponse, Usage, - map_finish_reason, CustomStreamWrapper, EmbeddingResponse, ) +from litellm.litellm_core_utils.core_helpers import map_finish_reason import litellm from .prompt_templates.factory import prompt_factory, custom_prompt from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler @@ -289,7 +289,7 @@ class DatabricksChatCompletion(BaseLLM): response: Union[requests.Response, httpx.Response], model_response: ModelResponse, stream: bool, - logging_obj: litellm.utils.Logging, + logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, optional_params: dict, api_key: str, data: Union[dict, str], diff --git a/litellm/llms/predibase.py b/litellm/llms/predibase.py index 66c28acee..8ad294457 100644 --- a/litellm/llms/predibase.py +++ b/litellm/llms/predibase.py @@ -12,11 +12,11 @@ from typing import Callable, Optional, List, Literal, Union from litellm.utils import ( ModelResponse, Usage, - map_finish_reason, CustomStreamWrapper, Message, Choices, ) +from litellm.litellm_core_utils.core_helpers import map_finish_reason import litellm from .prompt_templates.factory import prompt_factory, custom_prompt from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler @@ -198,7 +198,7 @@ class PredibaseChatCompletion(BaseLLM): response: Union[requests.Response, httpx.Response], model_response: ModelResponse, stream: bool, - logging_obj: litellm.utils.Logging, + logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, optional_params: dict, api_key: str, data: Union[dict, str], diff --git a/litellm/llms/triton.py b/litellm/llms/triton.py index 711186b3f..d647c9c43 100644 --- a/litellm/llms/triton.py +++ b/litellm/llms/triton.py @@ -4,7 +4,6 @@ from enum import Enum import requests, copy # type: ignore import time from typing import Callable, Optional, List -from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper import litellm from .prompt_templates.factory import prompt_factory, custom_prompt from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py index 67a8a4519..28cdde518 100644 --- a/litellm/llms/vertex_ai.py +++ b/litellm/llms/vertex_ai.py @@ -5,7 +5,8 @@ import requests # type: ignore import time from typing import Callable, Optional, Union, List, Literal, Any from pydantic import BaseModel -from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason +from litellm.utils import ModelResponse, Usage, CustomStreamWrapper +from litellm.litellm_core_utils.model_response_helpers import map_finish_reason import litellm, uuid import httpx, inspect # type: ignore from litellm.types.llms.vertex_ai import * diff --git a/litellm/llms/vertex_ai_anthropic.py b/litellm/llms/vertex_ai_anthropic.py index 065294280..1907ad5f0 100644 --- a/litellm/llms/vertex_ai_anthropic.py +++ b/litellm/llms/vertex_ai_anthropic.py @@ -6,7 +6,8 @@ from enum import Enum import requests, copy # type: ignore import time, uuid from typing import Callable, Optional, List -from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper +from litellm.utils import ModelResponse, Usage, CustomStreamWrapper +from litellm.litellm_core_utils.model_response_helpers import map_finish_reason import litellm from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from .prompt_templates.factory import ( diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py index b1c38f0bc..c9e48f3e1 100644 --- a/litellm/llms/vertex_httpx.py +++ b/litellm/llms/vertex_httpx.py @@ -8,7 +8,10 @@ from enum import Enum import requests # type: ignore import time from typing import Callable, Optional, Union, List, Any, Tuple -from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason +import litellm.litellm_core_utils +import litellm.litellm_core_utils.litellm_logging +from litellm.utils import ModelResponse, Usage, CustomStreamWrapper +from litellm.litellm_core_utils.core_helpers import map_finish_reason import litellm, uuid import httpx, inspect # type: ignore from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler @@ -320,7 +323,7 @@ class VertexLLM(BaseLLM): model: str, response: httpx.Response, model_response: ModelResponse, - logging_obj: litellm.utils.Logging, + logging_obj: litellm.litellm_core_utils.litellm_logging.Logging, optional_params: dict, api_key: str, data: Union[dict, str], diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index ea2d5d3f8..20fa90cbe 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -12,6 +12,8 @@ import litellm import backoff import traceback from pydantic import BaseModel +import litellm.litellm_core_utils +import litellm.litellm_core_utils.litellm_logging from litellm.proxy._types import ( UserAPIKeyAuth, DynamoDBArgs, @@ -331,7 +333,9 @@ class ProxyLogging: return data except Exception as e: if "litellm_logging_obj" in data: - logging_obj: litellm.utils.Logging = data["litellm_logging_obj"] + logging_obj: litellm.litellm_core_utils.litellm_logging.Logging = data[ + "litellm_logging_obj" + ] ## ASYNC FAILURE HANDLER ## error_message = "" diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 1fbb375d3..312ca210e 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -3,6 +3,15 @@ from typing_extensions import TypedDict from enum import Enum from typing_extensions import override, Required, Dict from .llms.openai import ChatCompletionUsageBlock, ChatCompletionToolCallChunk +from ..litellm_core_utils.model_response_helpers import map_finish_reason +from openai._models import BaseModel as OpenAIObject +from pydantic import ConfigDict +import uuid +import json + + +def _generate_id(): # private helper function + return "chatcmpl-" + str(uuid.uuid4()) class LiteLLMCommonStrings(Enum): @@ -48,3 +57,904 @@ class GenericStreamingChunk(TypedDict): finish_reason: Required[str] usage: Optional[ChatCompletionUsageBlock] index: int + + +from enum import Enum + + +class CallTypes(Enum): + embedding = "embedding" + aembedding = "aembedding" + completion = "completion" + acompletion = "acompletion" + atext_completion = "atext_completion" + text_completion = "text_completion" + image_generation = "image_generation" + aimage_generation = "aimage_generation" + moderation = "moderation" + amoderation = "amoderation" + atranscription = "atranscription" + transcription = "transcription" + aspeech = "aspeech" + speech = "speech" + + +class TopLogprob(OpenAIObject): + token: str + """The token.""" + + bytes: Optional[List[int]] = None + """A list of integers representing the UTF-8 bytes representation of the token. + + Useful in instances where characters are represented by multiple tokens and + their byte representations must be combined to generate the correct text + representation. Can be `null` if there is no bytes representation for the token. + """ + + logprob: float + """The log probability of this token, if it is within the top 20 most likely + tokens. + + Otherwise, the value `-9999.0` is used to signify that the token is very + unlikely. + """ + + +class ChatCompletionTokenLogprob(OpenAIObject): + token: str + """The token.""" + + bytes: Optional[List[int]] = None + """A list of integers representing the UTF-8 bytes representation of the token. + + Useful in instances where characters are represented by multiple tokens and + their byte representations must be combined to generate the correct text + representation. Can be `null` if there is no bytes representation for the token. + """ + + logprob: float + """The log probability of this token, if it is within the top 20 most likely + tokens. + + Otherwise, the value `-9999.0` is used to signify that the token is very + unlikely. + """ + + top_logprobs: List[TopLogprob] + """List of the most likely tokens and their log probability, at this token + position. + + In rare cases, there may be fewer than the number of requested `top_logprobs` + returned. + """ + + +class ChoiceLogprobs(OpenAIObject): + content: Optional[List[ChatCompletionTokenLogprob]] = None + """A list of message content tokens with log probability information.""" + + +class FunctionCall(OpenAIObject): + arguments: str + name: Optional[str] = None + + +class Function(OpenAIObject): + arguments: str + name: Optional[str] = None + + def __init__( + self, + arguments: Union[Dict, str], + name: Optional[str] = None, + **params, + ): + if isinstance(arguments, Dict): + arguments = json.dumps(arguments) + else: + arguments = arguments + + name = name + + # Build a dictionary with the structure your BaseModel expects + data = {"arguments": arguments, "name": name, **params} + + super(Function, self).__init__(**data) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class ChatCompletionDeltaToolCall(OpenAIObject): + id: Optional[str] = None + function: Function + type: Optional[str] = None + index: int + + +class HiddenParams(OpenAIObject): + original_response: Optional[str] = None + model_id: Optional[str] = None # used in Router for individual deployments + api_base: Optional[str] = None # returns api base used for making completion call + + model_config = ConfigDict(extra="allow", protected_namespaces=()) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() + + +class ChatCompletionMessageToolCall(OpenAIObject): + def __init__( + self, + function: Union[Dict, Function], + id: Optional[str] = None, + type: Optional[str] = None, + **params, + ): + super(ChatCompletionMessageToolCall, self).__init__(**params) + if isinstance(function, Dict): + self.function = Function(**function) + else: + self.function = function + + if id is not None: + self.id = id + else: + self.id = f"{uuid.uuid4()}" + + if type is not None: + self.type = type + else: + self.type = "function" + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class Message(OpenAIObject): + def __init__( + self, + content: Optional[str] = "default", + role="assistant", + logprobs=None, + function_call=None, + tool_calls=None, + **params, + ): + super(Message, self).__init__(**params) + self.content = content + self.role = role + if function_call is not None: + self.function_call = FunctionCall(**function_call) + + if tool_calls is not None: + self.tool_calls = [] + for tool_call in tool_calls: + self.tool_calls.append(ChatCompletionMessageToolCall(**tool_call)) + + if logprobs is not None: + self._logprobs = ChoiceLogprobs(**logprobs) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() + + +class Delta(OpenAIObject): + def __init__( + self, + content=None, + role=None, + function_call=None, + tool_calls=None, + **params, + ): + super(Delta, self).__init__(**params) + self.content = content + self.role = role + + if function_call is not None and isinstance(function_call, dict): + self.function_call = FunctionCall(**function_call) + else: + self.function_call = function_call + if tool_calls is not None and isinstance(tool_calls, list): + self.tool_calls = [] + for tool_call in tool_calls: + if isinstance(tool_call, dict): + if tool_call.get("index", None) is None: + tool_call["index"] = 0 + self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call)) + elif isinstance(tool_call, ChatCompletionDeltaToolCall): + self.tool_calls.append(tool_call) + else: + self.tool_calls = tool_calls + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class Choices(OpenAIObject): + def __init__( + self, + finish_reason=None, + index=0, + message: Optional[Union[Message, dict]] = None, + logprobs=None, + enhancements=None, + **params, + ): + super(Choices, self).__init__(**params) + if finish_reason is not None: + self.finish_reason = map_finish_reason( + finish_reason + ) # set finish_reason for all responses + else: + self.finish_reason = "stop" + self.index = index + if message is None: + self.message = Message() + else: + if isinstance(message, Message): + self.message = message + elif isinstance(message, dict): + self.message = Message(**message) + if logprobs is not None: + self.logprobs = logprobs + if enhancements is not None: + self.enhancements = enhancements + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class Usage(OpenAIObject): + def __init__( + self, prompt_tokens=None, completion_tokens=None, total_tokens=None, **params + ): + super(Usage, self).__init__(**params) + if prompt_tokens: + self.prompt_tokens = prompt_tokens + if completion_tokens: + self.completion_tokens = completion_tokens + if total_tokens: + self.total_tokens = total_tokens + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class StreamingChoices(OpenAIObject): + def __init__( + self, + finish_reason=None, + index=0, + delta: Optional[Delta] = None, + logprobs=None, + enhancements=None, + **params, + ): + super(StreamingChoices, self).__init__(**params) + if finish_reason: + self.finish_reason = finish_reason + else: + self.finish_reason = None + self.index = index + if delta is not None: + if isinstance(delta, Delta): + self.delta = delta + elif isinstance(delta, dict): + self.delta = Delta(**delta) + else: + self.delta = Delta() + if enhancements is not None: + self.enhancements = enhancements + + if logprobs is not None and isinstance(logprobs, dict): + self.logprobs = ChoiceLogprobs(**logprobs) + else: + self.logprobs = logprobs # type: ignore + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class ModelResponse(OpenAIObject): + id: str + """A unique identifier for the completion.""" + + choices: List[Union[Choices, StreamingChoices]] + """The list of completion choices the model generated for the input prompt.""" + + created: int + """The Unix timestamp (in seconds) of when the completion was created.""" + + model: Optional[str] = None + """The model used for completion.""" + + object: str + """The object type, which is always "text_completion" """ + + system_fingerprint: Optional[str] = None + """This fingerprint represents the backend configuration that the model runs with. + + Can be used in conjunction with the `seed` request parameter to understand when + backend changes have been made that might impact determinism. + """ + + _hidden_params: dict = {} + + def __init__( + self, + id=None, + choices=None, + created=None, + model=None, + object=None, + system_fingerprint=None, + usage=None, + stream=None, + stream_options=None, + response_ms=None, + hidden_params=None, + **params, + ): + if stream is not None and stream is True: + object = "chat.completion.chunk" + if choices is not None and isinstance(choices, list): + new_choices = [] + for choice in choices: + if isinstance(choice, StreamingChoices): + _new_choice = choice + elif isinstance(choice, dict): + _new_choice = StreamingChoices(**choice) + new_choices.append(_new_choice) + choices = new_choices + else: + choices = [StreamingChoices()] + else: + object = "chat.completion" + if choices is not None and isinstance(choices, list): + new_choices = [] + for choice in choices: + if isinstance(choice, Choices): + _new_choice = choice + elif isinstance(choice, dict): + _new_choice = Choices(**choice) + new_choices.append(_new_choice) + choices = new_choices + else: + choices = [Choices()] + if id is None: + id = _generate_id() + else: + id = id + if created is None: + created = int(time.time()) + else: + created = created + model = model + if usage is not None: + if isinstance(usage, dict): + usage = Usage(**usage) + else: + usage = usage + elif stream is None or stream is False: + usage = Usage() + if hidden_params: + self._hidden_params = hidden_params + + init_values = { + "id": id, + "choices": choices, + "created": created, + "model": model, + "object": object, + "system_fingerprint": system_fingerprint, + } + + if usage is not None: + init_values["usage"] = usage + + super().__init__( + **init_values, + **params, + ) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() + + +class Embedding(OpenAIObject): + embedding: Union[list, str] = [] + index: int + object: str + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class EmbeddingResponse(OpenAIObject): + model: Optional[str] = None + """The model used for embedding.""" + + data: Optional[List] = None + """The actual embedding value""" + + object: str + """The object type, which is always "embedding" """ + + usage: Optional[Usage] = None + """Usage statistics for the embedding request.""" + + _hidden_params: dict = {} + + def __init__( + self, + model=None, + usage=None, + stream=False, + response_ms=None, + data=None, + **params, + ): + object = "list" + if response_ms: + _response_ms = response_ms + else: + _response_ms = None + if data: + data = data + else: + data = None + + if usage: + usage = usage + else: + usage = Usage() + + model = model + super().__init__(model=model, object=object, data=data, usage=usage) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() + + +class Logprobs(OpenAIObject): + text_offset: List[int] + token_logprobs: List[float] + tokens: List[str] + top_logprobs: List[Dict[str, float]] + + +class TextChoices(OpenAIObject): + def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params): + super(TextChoices, self).__init__(**params) + if finish_reason: + self.finish_reason = map_finish_reason(finish_reason) + else: + self.finish_reason = None + self.index = index + if text is not None: + self.text = text + else: + self.text = None + if logprobs is None: + self.logprobs = None + else: + if isinstance(logprobs, dict): + self.logprobs = Logprobs(**logprobs) + else: + self.logprobs = logprobs + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() + + +class TextCompletionResponse(OpenAIObject): + """ + { + "id": response["id"], + "object": "text_completion", + "created": response["created"], + "model": response["model"], + "choices": [ + { + "text": response["choices"][0]["message"]["content"], + "index": response["choices"][0]["index"], + "logprobs": transformed_logprobs, + "finish_reason": response["choices"][0]["finish_reason"] + } + ], + "usage": response["usage"] + } + """ + + id: str + object: str + created: int + model: Optional[str] + choices: List[TextChoices] + usage: Optional[Usage] + _response_ms: Optional[int] = None + _hidden_params: HiddenParams + + def __init__( + self, + id=None, + choices=None, + created=None, + model=None, + usage=None, + stream=False, + response_ms=None, + object=None, + **params, + ): + if stream: + object = "text_completion.chunk" + choices = [TextChoices()] + else: + object = "text_completion" + if choices is not None and isinstance(choices, list): + new_choices = [] + for choice in choices: + if isinstance(choice, TextChoices): + _new_choice = choice + elif isinstance(choice, dict): + _new_choice = TextChoices(**choice) + new_choices.append(_new_choice) + choices = new_choices + else: + choices = [TextChoices()] + if object is not None: + object = object + if id is None: + id = _generate_id() + else: + id = id + if created is None: + created = int(time.time()) + else: + created = created + + model = model + if usage: + usage = usage + else: + usage = Usage() + + super(TextCompletionResponse, self).__init__( + id=id, + object=object, + created=created, + model=model, + choices=choices, + usage=usage, + **params, + ) + + if response_ms: + self._response_ms = response_ms + else: + self._response_ms = None + self._hidden_params = HiddenParams() + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + +class ImageObject(OpenAIObject): + """ + Represents the url or the content of an image generated by the OpenAI API. + + Attributes: + b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json. + url: The URL of the generated image, if response_format is url (default). + revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt. + + https://platform.openai.com/docs/api-reference/images/object + """ + + b64_json: Optional[str] = None + url: Optional[str] = None + revised_prompt: Optional[str] = None + + def __init__(self, b64_json=None, url=None, revised_prompt=None): + super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() + + +class ImageResponse(OpenAIObject): + created: Optional[int] = None + + data: Optional[List[ImageObject]] = None + + usage: Optional[dict] = None + + _hidden_params: dict = {} + + def __init__(self, created=None, data=None, response_ms=None): + if response_ms: + _response_ms = response_ms + else: + _response_ms = None + if data: + data = data + else: + data = None + + if created: + created = created + else: + created = None + + super().__init__(data=data, created=created) + self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() + + +class TranscriptionResponse(OpenAIObject): + text: Optional[str] = None + + _hidden_params: dict = {} + + def __init__(self, text=None): + super().__init__(text=text) + + def __contains__(self, key): + # Define custom behavior for the 'in' operator + return hasattr(self, key) + + def get(self, key, default=None): + # Custom .get() method to access attributes with a default value if the attribute doesn't exist + return getattr(self, key, default) + + def __getitem__(self, key): + # Allow dictionary-style access to attributes + return getattr(self, key) + + def __setitem__(self, key, value): + # Allow dictionary-style assignment of attributes + setattr(self, key, value) + + def json(self, **kwargs): + try: + return self.model_dump() # noqa + except: + # if using pydantic v1 + return self.dict() diff --git a/litellm/utils.py b/litellm/utils.py index 7f37bcf7c..a126a10cd 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -18,7 +18,7 @@ from functools import wraps, lru_cache import datetime, time import tiktoken import uuid -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel import aiohttp import textwrap import logging @@ -32,9 +32,29 @@ from dataclasses import ( ) import os import litellm._service_logger # for storing API inputs, outputs, and metadata +import litellm.litellm_core_utils +import litellm.litellm_core_utils.litellm_logging from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler from litellm.caching import DualCache -from litellm.types.utils import CostPerToken, ProviderField, ModelInfo +from litellm.types.utils import ( + CostPerToken, + ProviderField, + ModelInfo, + CallTypes, + ModelResponse, + EmbeddingResponse, + ImageResponse, + TranscriptionResponse, + TextCompletionResponse, + ChatCompletionDeltaToolCall, + Message, + Delta, + Choices, + Usage, + StreamingChoices, + Embedding, + TextChoices, +) from litellm.litellm_core_utils.redact_messages import ( redact_message_input_output_from_logging, ) @@ -96,7 +116,6 @@ from .integrations.greenscale import GreenscaleLogger from .integrations.litedebugger import LiteDebugger from .proxy._types import KeyManagementSystem from openai import OpenAIError as OriginalError -from openai._models import BaseModel as OpenAIObject from .caching import S3Cache, RedisSemanticCache, RedisCache from .exceptions import ( AuthenticationError, @@ -179,6 +198,8 @@ local_cache: Optional[Dict[str, str]] = {} last_fetched_at = None last_fetched_at_keys = None ######## Model Response ######################### + + # All liteLLM Model responses will be in this format, Follows the OpenAI Format # https://docs.litellm.ai/docs/completion/output # { @@ -209,933 +230,6 @@ class UnsupportedParamsError(Exception): ) # Call the base class constructor with the parameters it needs -def _generate_id(): # private helper function - return "chatcmpl-" + str(uuid.uuid4()) - - -def map_finish_reason( - finish_reason: str, -): # openai supports 5 stop sequences - 'stop', 'length', 'function_call', 'content_filter', 'null' - # anthropic mapping - if finish_reason == "stop_sequence": - return "stop" - # cohere mapping - https://docs.cohere.com/reference/generate - elif finish_reason == "COMPLETE": - return "stop" - elif finish_reason == "MAX_TOKENS": # cohere + vertex ai - return "length" - elif finish_reason == "ERROR_TOXIC": - return "content_filter" - elif ( - finish_reason == "ERROR" - ): # openai currently doesn't support an 'error' finish reason - return "stop" - # huggingface mapping https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/generate_stream - elif finish_reason == "eos_token" or finish_reason == "stop_sequence": - return "stop" - elif ( - finish_reason == "FINISH_REASON_UNSPECIFIED" or finish_reason == "STOP" - ): # vertex ai - got from running `print(dir(response_obj.candidates[0].finish_reason))`: ['FINISH_REASON_UNSPECIFIED', 'MAX_TOKENS', 'OTHER', 'RECITATION', 'SAFETY', 'STOP',] - return "stop" - elif finish_reason == "SAFETY": # vertex ai - return "content_filter" - elif finish_reason == "STOP": # vertex ai - return "stop" - elif finish_reason == "end_turn" or finish_reason == "stop_sequence": # anthropic - return "stop" - elif finish_reason == "max_tokens": # anthropic - return "length" - elif finish_reason == "tool_use": # anthropic - return "tool_calls" - elif finish_reason == "content_filtered": - return "content_filter" - return finish_reason - - -class TopLogprob(OpenAIObject): - token: str - """The token.""" - - bytes: Optional[List[int]] = None - """A list of integers representing the UTF-8 bytes representation of the token. - - Useful in instances where characters are represented by multiple tokens and - their byte representations must be combined to generate the correct text - representation. Can be `null` if there is no bytes representation for the token. - """ - - logprob: float - """The log probability of this token, if it is within the top 20 most likely - tokens. - - Otherwise, the value `-9999.0` is used to signify that the token is very - unlikely. - """ - - -class ChatCompletionTokenLogprob(OpenAIObject): - token: str - """The token.""" - - bytes: Optional[List[int]] = None - """A list of integers representing the UTF-8 bytes representation of the token. - - Useful in instances where characters are represented by multiple tokens and - their byte representations must be combined to generate the correct text - representation. Can be `null` if there is no bytes representation for the token. - """ - - logprob: float - """The log probability of this token, if it is within the top 20 most likely - tokens. - - Otherwise, the value `-9999.0` is used to signify that the token is very - unlikely. - """ - - top_logprobs: List[TopLogprob] - """List of the most likely tokens and their log probability, at this token - position. - - In rare cases, there may be fewer than the number of requested `top_logprobs` - returned. - """ - - -class ChoiceLogprobs(OpenAIObject): - content: Optional[List[ChatCompletionTokenLogprob]] = None - """A list of message content tokens with log probability information.""" - - -class FunctionCall(OpenAIObject): - arguments: str - name: Optional[str] = None - - -class Function(OpenAIObject): - arguments: str - name: Optional[str] = None - - def __init__( - self, - arguments: Union[Dict, str], - name: Optional[str] = None, - **params, - ): - if isinstance(arguments, Dict): - arguments = json.dumps(arguments) - else: - arguments = arguments - - name = name - - # Build a dictionary with the structure your BaseModel expects - data = {"arguments": arguments, "name": name, **params} - - super(Function, self).__init__(**data) - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - -class ChatCompletionDeltaToolCall(OpenAIObject): - id: Optional[str] = None - function: Function - type: Optional[str] = None - index: int - - -class HiddenParams(OpenAIObject): - original_response: Optional[str] = None - model_id: Optional[str] = None # used in Router for individual deployments - api_base: Optional[str] = None # returns api base used for making completion call - - model_config = ConfigDict(extra="allow", protected_namespaces=()) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - def json(self, **kwargs): - try: - return self.model_dump() # noqa - except: - # if using pydantic v1 - return self.dict() - - -class ChatCompletionMessageToolCall(OpenAIObject): - def __init__( - self, - function: Union[Dict, Function], - id: Optional[str] = None, - type: Optional[str] = None, - **params, - ): - super(ChatCompletionMessageToolCall, self).__init__(**params) - if isinstance(function, Dict): - self.function = Function(**function) - else: - self.function = function - - if id is not None: - self.id = id - else: - self.id = f"{uuid.uuid4()}" - - if type is not None: - self.type = type - else: - self.type = "function" - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - -class Message(OpenAIObject): - def __init__( - self, - content: Optional[str] = "default", - role="assistant", - logprobs=None, - function_call=None, - tool_calls=None, - **params, - ): - super(Message, self).__init__(**params) - self.content = content - self.role = role - if function_call is not None: - self.function_call = FunctionCall(**function_call) - - if tool_calls is not None: - self.tool_calls = [] - for tool_call in tool_calls: - self.tool_calls.append(ChatCompletionMessageToolCall(**tool_call)) - - if logprobs is not None: - self._logprobs = ChoiceLogprobs(**logprobs) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - def json(self, **kwargs): - try: - return self.model_dump() # noqa - except: - # if using pydantic v1 - return self.dict() - - -class Delta(OpenAIObject): - def __init__( - self, - content=None, - role=None, - function_call=None, - tool_calls=None, - **params, - ): - super(Delta, self).__init__(**params) - self.content = content - self.role = role - - if function_call is not None and isinstance(function_call, dict): - self.function_call = FunctionCall(**function_call) - else: - self.function_call = function_call - if tool_calls is not None and isinstance(tool_calls, list): - self.tool_calls = [] - for tool_call in tool_calls: - if isinstance(tool_call, dict): - if tool_call.get("index", None) is None: - tool_call["index"] = 0 - self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call)) - elif isinstance(tool_call, ChatCompletionDeltaToolCall): - self.tool_calls.append(tool_call) - else: - self.tool_calls = tool_calls - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - -class Choices(OpenAIObject): - def __init__( - self, - finish_reason=None, - index=0, - message: Optional[Union[Message, dict]] = None, - logprobs=None, - enhancements=None, - **params, - ): - super(Choices, self).__init__(**params) - if finish_reason is not None: - self.finish_reason = map_finish_reason( - finish_reason - ) # set finish_reason for all responses - else: - self.finish_reason = "stop" - self.index = index - if message is None: - self.message = Message() - else: - if isinstance(message, Message): - self.message = message - elif isinstance(message, dict): - self.message = Message(**message) - if logprobs is not None: - self.logprobs = logprobs - if enhancements is not None: - self.enhancements = enhancements - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - -class Usage(OpenAIObject): - def __init__( - self, prompt_tokens=None, completion_tokens=None, total_tokens=None, **params - ): - super(Usage, self).__init__(**params) - if prompt_tokens: - self.prompt_tokens = prompt_tokens - if completion_tokens: - self.completion_tokens = completion_tokens - if total_tokens: - self.total_tokens = total_tokens - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - -class StreamingChoices(OpenAIObject): - def __init__( - self, - finish_reason=None, - index=0, - delta: Optional[Delta] = None, - logprobs=None, - enhancements=None, - **params, - ): - super(StreamingChoices, self).__init__(**params) - if finish_reason: - self.finish_reason = finish_reason - else: - self.finish_reason = None - self.index = index - if delta is not None: - if isinstance(delta, Delta): - self.delta = delta - elif isinstance(delta, dict): - self.delta = Delta(**delta) - else: - self.delta = Delta() - if enhancements is not None: - self.enhancements = enhancements - - if logprobs is not None and isinstance(logprobs, dict): - self.logprobs = ChoiceLogprobs(**logprobs) - else: - self.logprobs = logprobs # type: ignore - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - -class ModelResponse(OpenAIObject): - id: str - """A unique identifier for the completion.""" - - choices: List[Union[Choices, StreamingChoices]] - """The list of completion choices the model generated for the input prompt.""" - - created: int - """The Unix timestamp (in seconds) of when the completion was created.""" - - model: Optional[str] = None - """The model used for completion.""" - - object: str - """The object type, which is always "text_completion" """ - - system_fingerprint: Optional[str] = None - """This fingerprint represents the backend configuration that the model runs with. - - Can be used in conjunction with the `seed` request parameter to understand when - backend changes have been made that might impact determinism. - """ - - _hidden_params: dict = {} - - def __init__( - self, - id=None, - choices=None, - created=None, - model=None, - object=None, - system_fingerprint=None, - usage=None, - stream=None, - stream_options=None, - response_ms=None, - hidden_params=None, - **params, - ): - if stream is not None and stream == True: - object = "chat.completion.chunk" - if choices is not None and isinstance(choices, list): - new_choices = [] - for choice in choices: - if isinstance(choice, StreamingChoices): - _new_choice = choice - elif isinstance(choice, dict): - _new_choice = StreamingChoices(**choice) - new_choices.append(_new_choice) - choices = new_choices - else: - choices = [StreamingChoices()] - else: - if model in litellm.open_ai_embedding_models: - object = "embedding" - else: - object = "chat.completion" - if choices is not None and isinstance(choices, list): - new_choices = [] - for choice in choices: - if isinstance(choice, Choices): - _new_choice = choice - elif isinstance(choice, dict): - _new_choice = Choices(**choice) - new_choices.append(_new_choice) - choices = new_choices - else: - choices = [Choices()] - if id is None: - id = _generate_id() - else: - id = id - if created is None: - created = int(time.time()) - else: - created = created - model = model - if usage is not None: - if isinstance(usage, dict): - usage = Usage(**usage) - else: - usage = usage - elif stream is None or stream == False: - usage = Usage() - if hidden_params: - self._hidden_params = hidden_params - - init_values = { - "id": id, - "choices": choices, - "created": created, - "model": model, - "object": object, - "system_fingerprint": system_fingerprint, - } - - if usage is not None: - init_values["usage"] = usage - - super().__init__( - **init_values, - **params, - ) - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - def json(self, **kwargs): - try: - return self.model_dump() # noqa - except: - # if using pydantic v1 - return self.dict() - - -class Embedding(OpenAIObject): - embedding: Union[list, str] = [] - index: int - object: str - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - -class EmbeddingResponse(OpenAIObject): - model: Optional[str] = None - """The model used for embedding.""" - - data: Optional[List] = None - """The actual embedding value""" - - object: str - """The object type, which is always "embedding" """ - - usage: Optional[Usage] = None - """Usage statistics for the embedding request.""" - - _hidden_params: dict = {} - - def __init__( - self, - model=None, - usage=None, - stream=False, - response_ms=None, - data=None, - **params, - ): - object = "list" - if response_ms: - _response_ms = response_ms - else: - _response_ms = None - if data: - data = data - else: - data = None - - if usage: - usage = usage - else: - usage = Usage() - - model = model - super().__init__(model=model, object=object, data=data, usage=usage) - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - def json(self, **kwargs): - try: - return self.model_dump() # noqa - except: - # if using pydantic v1 - return self.dict() - - -class Logprobs(OpenAIObject): - text_offset: List[int] - token_logprobs: List[float] - tokens: List[str] - top_logprobs: List[Dict[str, float]] - - -class TextChoices(OpenAIObject): - def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params): - super(TextChoices, self).__init__(**params) - if finish_reason: - self.finish_reason = map_finish_reason(finish_reason) - else: - self.finish_reason = None - self.index = index - if text is not None: - self.text = text - else: - self.text = None - if logprobs is None: - self.logprobs = None - else: - if isinstance(logprobs, dict): - self.logprobs = Logprobs(**logprobs) - else: - self.logprobs = logprobs - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - def json(self, **kwargs): - try: - return self.model_dump() # noqa - except: - # if using pydantic v1 - return self.dict() - - -class TextCompletionResponse(OpenAIObject): - """ - { - "id": response["id"], - "object": "text_completion", - "created": response["created"], - "model": response["model"], - "choices": [ - { - "text": response["choices"][0]["message"]["content"], - "index": response["choices"][0]["index"], - "logprobs": transformed_logprobs, - "finish_reason": response["choices"][0]["finish_reason"] - } - ], - "usage": response["usage"] - } - """ - - id: str - object: str - created: int - model: Optional[str] - choices: List[TextChoices] - usage: Optional[Usage] - _response_ms: Optional[int] = None - _hidden_params: HiddenParams - - def __init__( - self, - id=None, - choices=None, - created=None, - model=None, - usage=None, - stream=False, - response_ms=None, - object=None, - **params, - ): - if stream: - object = "text_completion.chunk" - choices = [TextChoices()] - else: - object = "text_completion" - if choices is not None and isinstance(choices, list): - new_choices = [] - for choice in choices: - if isinstance(choice, TextChoices): - _new_choice = choice - elif isinstance(choice, dict): - _new_choice = TextChoices(**choice) - new_choices.append(_new_choice) - choices = new_choices - else: - choices = [TextChoices()] - if object is not None: - object = object - if id is None: - id = _generate_id() - else: - id = id - if created is None: - created = int(time.time()) - else: - created = created - - model = model - if usage: - usage = usage - else: - usage = Usage() - - super(TextCompletionResponse, self).__init__( - id=id, - object=object, - created=created, - model=model, - choices=choices, - usage=usage, - **params, - ) - - if response_ms: - self._response_ms = response_ms - else: - self._response_ms = None - self._hidden_params = HiddenParams() - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - -class ImageObject(OpenAIObject): - """ - Represents the url or the content of an image generated by the OpenAI API. - - Attributes: - b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json. - url: The URL of the generated image, if response_format is url (default). - revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt. - - https://platform.openai.com/docs/api-reference/images/object - """ - - b64_json: Optional[str] = None - url: Optional[str] = None - revised_prompt: Optional[str] = None - - def __init__(self, b64_json=None, url=None, revised_prompt=None): - super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt) - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - def json(self, **kwargs): - try: - return self.model_dump() # noqa - except: - # if using pydantic v1 - return self.dict() - - -class ImageResponse(OpenAIObject): - created: Optional[int] = None - - data: Optional[List[ImageObject]] = None - - usage: Optional[dict] = None - - _hidden_params: dict = {} - - def __init__(self, created=None, data=None, response_ms=None): - if response_ms: - _response_ms = response_ms - else: - _response_ms = None - if data: - data = data - else: - data = None - - if created: - created = created - else: - created = None - - super().__init__(data=data, created=created) - self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - def json(self, **kwargs): - try: - return self.model_dump() # noqa - except: - # if using pydantic v1 - return self.dict() - - -class TranscriptionResponse(OpenAIObject): - text: Optional[str] = None - - _hidden_params: dict = {} - - def __init__(self, text=None): - super().__init__(text=text) - - def __contains__(self, key): - # Define custom behavior for the 'in' operator - return hasattr(self, key) - - def get(self, key, default=None): - # Custom .get() method to access attributes with a default value if the attribute doesn't exist - return getattr(self, key, default) - - def __getitem__(self, key): - # Allow dictionary-style access to attributes - return getattr(self, key) - - def __setitem__(self, key, value): - # Allow dictionary-style assignment of attributes - setattr(self, key, value) - - def json(self, **kwargs): - try: - return self.model_dump() # noqa - except: - # if using pydantic v1 - return self.dict() - - ############################################################ def print_verbose( print_statement, @@ -1156,1602 +250,6 @@ def print_verbose( ####### LOGGING ################### -from enum import Enum - - -class CallTypes(Enum): - embedding = "embedding" - aembedding = "aembedding" - completion = "completion" - acompletion = "acompletion" - atext_completion = "atext_completion" - text_completion = "text_completion" - image_generation = "image_generation" - aimage_generation = "aimage_generation" - moderation = "moderation" - amoderation = "amoderation" - atranscription = "atranscription" - transcription = "transcription" - aspeech = "aspeech" - speech = "speech" - - -# Logging function -> log the exact model details + what's being sent | Non-BlockingP -class Logging: - global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger - - custom_pricing: bool = False - stream_options = None - - def __init__( - self, - model, - messages, - stream, - call_type, - start_time, - litellm_call_id, - function_id, - dynamic_success_callbacks=None, - dynamic_failure_callbacks=None, - dynamic_async_success_callbacks=None, - langfuse_public_key=None, - langfuse_secret=None, - ): - if call_type not in [item.value for item in CallTypes]: - allowed_values = ", ".join([item.value for item in CallTypes]) - raise ValueError( - f"Invalid call_type {call_type}. Allowed values: {allowed_values}" - ) - if messages is not None: - if isinstance(messages, str): - messages = [ - {"role": "user", "content": messages} - ] # convert text completion input to the chat completion format - elif ( - isinstance(messages, list) - and len(messages) > 0 - and isinstance(messages[0], str) - ): - new_messages = [] - for m in messages: - new_messages.append({"role": "user", "content": m}) - messages = new_messages - self.model = model - self.messages = messages - self.stream = stream - self.start_time = start_time # log the call start time - self.call_type = call_type - self.litellm_call_id = litellm_call_id - self.function_id = function_id - self.streaming_chunks = [] # for generating complete stream response - self.sync_streaming_chunks = [] # for generating complete stream response - self.model_call_details = {} - self.dynamic_input_callbacks = [] # [TODO] callbacks set for just that call - self.dynamic_failure_callbacks = dynamic_failure_callbacks - self.dynamic_success_callbacks = ( - dynamic_success_callbacks # callbacks set for just that call - ) - self.dynamic_async_success_callbacks = ( - dynamic_async_success_callbacks # callbacks set for just that call - ) - ## DYNAMIC LANGFUSE KEYS ## - self.langfuse_public_key = langfuse_public_key - self.langfuse_secret = langfuse_secret - ## TIME TO FIRST TOKEN LOGGING ## - self.completion_start_time: Optional[datetime.datetime] = None - - def update_environment_variables( - self, model, user, optional_params, litellm_params, **additional_params - ): - self.optional_params = optional_params - self.model = model - self.user = user - self.litellm_params = litellm_params - self.logger_fn = litellm_params.get("logger_fn", None) - print_verbose(f"self.optional_params: {self.optional_params}") - - self.model_call_details = { - "model": self.model, - "messages": self.messages, - "optional_params": self.optional_params, - "litellm_params": self.litellm_params, - "start_time": self.start_time, - "stream": self.stream, - "user": user, - "call_type": str(self.call_type), - "litellm_call_id": self.litellm_call_id, - "completion_start_time": self.completion_start_time, - **self.optional_params, - **additional_params, - } - - ## check if stream options is set ## - used by CustomStreamWrapper for easy instrumentation - if "stream_options" in additional_params: - self.stream_options = additional_params["stream_options"] - ## check if custom pricing set ## - if ( - litellm_params.get("input_cost_per_token") is not None - or litellm_params.get("input_cost_per_second") is not None - or litellm_params.get("output_cost_per_token") is not None - or litellm_params.get("output_cost_per_second") is not None - ): - self.custom_pricing = True - - def _pre_call(self, input, api_key, model=None, additional_args={}): - """ - Common helper function across the sync + async pre-call function - """ - # print_verbose(f"logging pre call for model: {self.model} with call type: {self.call_type}") - self.model_call_details["input"] = input - self.model_call_details["api_key"] = api_key - self.model_call_details["additional_args"] = additional_args - self.model_call_details["log_event_type"] = "pre_api_call" - if ( - model - ): # if model name was changes pre-call, overwrite the initial model call name with the new one - self.model_call_details["model"] = model - - def pre_call(self, input, api_key, model=None, additional_args={}): - # Log the exact input to the LLM API - litellm.error_logs["PRE_CALL"] = locals() - try: - self._pre_call( - input=input, - api_key=api_key, - model=model, - additional_args=additional_args, - ) - - # User Logging -> if you pass in a custom logging function - headers = additional_args.get("headers", {}) - if headers is None: - headers = {} - data = additional_args.get("complete_input_dict", {}) - api_base = additional_args.get("api_base", "") - self.model_call_details["litellm_params"]["api_base"] = str( - api_base - ) # used for alerting - masked_headers = { - k: ( - (v[:-44] + "*" * 44) - if (isinstance(v, str) and len(v) > 44) - else "*****" - ) - for k, v in headers.items() - } - formatted_headers = " ".join( - [f"-H '{k}: {v}'" for k, v in masked_headers.items()] - ) - - verbose_logger.debug(f"PRE-API-CALL ADDITIONAL ARGS: {additional_args}") - - curl_command = "\n\nPOST Request Sent from LiteLLM:\n" - curl_command += "curl -X POST \\\n" - curl_command += f"{api_base} \\\n" - curl_command += ( - f"{formatted_headers} \\\n" if formatted_headers.strip() != "" else "" - ) - curl_command += f"-d '{str(data)}'\n" - if additional_args.get("request_str", None) is not None: - # print the sagemaker / bedrock client request - curl_command = "\nRequest Sent from LiteLLM:\n" - curl_command += additional_args.get("request_str", None) - elif api_base == "": - curl_command = self.model_call_details - - # only print verbose if verbose logger is not set - if verbose_logger.level == 0: - # this means verbose logger was not switched on - user is in litellm.set_verbose=True - print_verbose(f"\033[92m{curl_command}\033[0m\n") - - if litellm.json_logs: - verbose_logger.debug( - "POST Request Sent from LiteLLM", - extra={"api_base": {api_base}, **masked_headers}, - ) - else: - verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n") - # log raw request to provider (like LangFuse) -- if opted in. - if litellm.log_raw_request_response is True: - try: - # [Non-blocking Extra Debug Information in metadata] - _litellm_params = self.model_call_details.get("litellm_params", {}) - _metadata = _litellm_params.get("metadata", {}) or {} - if ( - litellm.turn_off_message_logging is not None - and litellm.turn_off_message_logging is True - ): - _metadata["raw_request"] = ( - "redacted by litellm. \ - 'litellm.turn_off_message_logging=True'" - ) - else: - _metadata["raw_request"] = str(curl_command) - except Exception as e: - _metadata["raw_request"] = ( - "Unable to Log \ - raw request: {}".format( - str(e) - ) - ) - if self.logger_fn and callable(self.logger_fn): - try: - self.logger_fn( - self.model_call_details - ) # Expectation: any logger function passed in by the user should accept a dict object - except Exception as e: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" - ) - # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made - callbacks = litellm.input_callback + self.dynamic_input_callbacks - for callback in callbacks: - try: - if callback == "supabase": - print_verbose("reaches supabase for logging!") - model = self.model_call_details["model"] - messages = self.model_call_details["input"] - print_verbose(f"supabaseClient: {supabaseClient}") - supabaseClient.input_log_event( - model=model, - messages=messages, - end_user=self.model_call_details.get("user", "default"), - litellm_call_id=self.litellm_params["litellm_call_id"], - print_verbose=print_verbose, - ) - elif callback == "sentry" and add_breadcrumb: - try: - details_to_log = copy.deepcopy(self.model_call_details) - except: - details_to_log = self.model_call_details - if litellm.turn_off_message_logging: - # make a copy of the _model_Call_details and log it - details_to_log.pop("messages", None) - details_to_log.pop("input", None) - details_to_log.pop("prompt", None) - - add_breadcrumb( - category="litellm.llm_call", - message=f"Model Call Details pre-call: {details_to_log}", - level="info", - ) - elif isinstance(callback, CustomLogger): # custom logger class - callback.log_pre_api_call( - model=self.model, - messages=self.messages, - kwargs=self.model_call_details, - ) - elif callable(callback): # custom logger functions - customLogger.log_input_event( - model=self.model, - messages=self.messages, - kwargs=self.model_call_details, - print_verbose=print_verbose, - callback_func=callback, - ) - except Exception as e: - verbose_logger.error( - "litellm.Logging.pre_call(): Exception occured - {}".format( - str(e) - ) - ) - verbose_logger.debug( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}" - ) - print_verbose( - f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" - ) - if capture_exception: # log this error to sentry for debugging - capture_exception(e) - except: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" - ) - print_verbose( - f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" - ) - if capture_exception: # log this error to sentry for debugging - capture_exception(e) - - def post_call( - self, original_response, input=None, api_key=None, additional_args={} - ): - # Log the exact result from the LLM API, for streaming - log the type of response received - litellm.error_logs["POST_CALL"] = locals() - if isinstance(original_response, dict): - original_response = json.dumps(original_response) - try: - self.model_call_details["input"] = input - self.model_call_details["api_key"] = api_key - self.model_call_details["original_response"] = original_response - self.model_call_details["additional_args"] = additional_args - self.model_call_details["log_event_type"] = "post_api_call" - # User Logging -> if you pass in a custom logging function - print_verbose( - f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n", - log_level="DEBUG", - ) - if self.logger_fn and callable(self.logger_fn): - try: - self.logger_fn( - self.model_call_details - ) # Expectation: any logger function passed in by the user should accept a dict object - except Exception as e: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" - ) - original_response = redact_message_input_output_from_logging( - litellm_logging_obj=self, result=original_response - ) - # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made - - callbacks = litellm.input_callback + self.dynamic_input_callbacks - for callback in callbacks: - try: - if callback == "lite_debugger": - print_verbose("reaches litedebugger for post-call logging!") - print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") - liteDebuggerClient.post_call_log_event( - original_response=original_response, - litellm_call_id=self.litellm_params["litellm_call_id"], - print_verbose=print_verbose, - call_type=self.call_type, - stream=self.stream, - ) - elif callback == "sentry" and add_breadcrumb: - print_verbose("reaches sentry breadcrumbing") - try: - details_to_log = copy.deepcopy(self.model_call_details) - except: - details_to_log = self.model_call_details - if litellm.turn_off_message_logging: - # make a copy of the _model_Call_details and log it - details_to_log.pop("messages", None) - details_to_log.pop("input", None) - details_to_log.pop("prompt", None) - - add_breadcrumb( - category="litellm.llm_call", - message=f"Model Call Details post-call: {details_to_log}", - level="info", - ) - elif isinstance(callback, CustomLogger): # custom logger class - callback.log_post_api_call( - kwargs=self.model_call_details, - response_obj=None, - start_time=self.start_time, - end_time=None, - ) - except Exception as e: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while post-call logging with integrations {traceback.format_exc()}" - ) - print_verbose( - f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" - ) - if capture_exception: # log this error to sentry for debugging - capture_exception(e) - except: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}" - ) - pass - - def _success_handler_helper_fn( - self, result=None, start_time=None, end_time=None, cache_hit=None - ): - try: - if start_time is None: - start_time = self.start_time - if end_time is None: - end_time = datetime.datetime.now() - if self.completion_start_time is None: - self.completion_start_time = end_time - self.model_call_details["completion_start_time"] = ( - self.completion_start_time - ) - self.model_call_details["log_event_type"] = "successful_api_call" - self.model_call_details["end_time"] = end_time - self.model_call_details["cache_hit"] = cache_hit - ## if model in model cost map - log the response cost - ## else set cost to None - verbose_logger.debug(f"Model={self.model};") - if ( - result is not None - and ( - isinstance(result, ModelResponse) - or isinstance(result, EmbeddingResponse) - or isinstance(result, ImageResponse) - or isinstance(result, TranscriptionResponse) - or isinstance(result, TextCompletionResponse) - ) - and self.stream != True - ): # handle streaming separately - self.model_call_details["response_cost"] = ( - litellm.response_cost_calculator( - response_object=result, - model=self.model, - cache_hit=self.model_call_details.get("cache_hit", False), - custom_llm_provider=self.model_call_details.get( - "custom_llm_provider", None - ), - base_model=_get_base_model_from_metadata( - model_call_details=self.model_call_details - ), - call_type=self.call_type, - optional_params=self.optional_params, - ) - ) - else: # streaming chunks + image gen. - self.model_call_details["response_cost"] = None - - if ( - litellm.max_budget - and self.stream == False - and result is not None - and "content" in result - ): - time_diff = (end_time - start_time).total_seconds() - float_diff = float(time_diff) - litellm._current_cost += litellm.completion_cost( - model=self.model, - prompt="", - completion=result["content"], - total_time=float_diff, - ) - - return start_time, end_time, result - except Exception as e: - raise Exception(f"[Non-Blocking] LiteLLM.Success_Call Error: {str(e)}") - - def success_handler( - self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs - ): - print_verbose(f"Logging Details LiteLLM-Success Call: {cache_hit}") - start_time, end_time, result = self._success_handler_helper_fn( - start_time=start_time, - end_time=end_time, - result=result, - cache_hit=cache_hit, - ) - # print(f"original response in success handler: {self.model_call_details['original_response']}") - try: - print_verbose(f"success callbacks: {litellm.success_callback}") - ## BUILD COMPLETE STREAMED RESPONSE - complete_streaming_response = None - if self.stream and isinstance(result, ModelResponse): - if ( - result.choices[0].finish_reason is not None - ): # if it's the last chunk - self.sync_streaming_chunks.append(result) - # print_verbose(f"final set of received chunks: {self.sync_streaming_chunks}") - try: - complete_streaming_response = litellm.stream_chunk_builder( - self.sync_streaming_chunks, - messages=self.model_call_details.get("messages", None), - start_time=start_time, - end_time=end_time, - ) - except Exception as e: - print_verbose( - "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format( - str(e), traceback.format_exc() - ), - log_level="ERROR", - ) - complete_streaming_response = None - else: - self.sync_streaming_chunks.append(result) - - if complete_streaming_response is not None: - print_verbose( - f"Logging Details LiteLLM-Success Call streaming complete" - ) - self.model_call_details["complete_streaming_response"] = ( - complete_streaming_response - ) - self.model_call_details["response_cost"] = ( - litellm.response_cost_calculator( - response_object=complete_streaming_response, - model=self.model, - cache_hit=self.model_call_details.get("cache_hit", False), - custom_llm_provider=self.model_call_details.get( - "custom_llm_provider", None - ), - base_model=_get_base_model_from_metadata( - model_call_details=self.model_call_details - ), - call_type=self.call_type, - optional_params=self.optional_params, - ) - ) - if self.dynamic_success_callbacks is not None and isinstance( - self.dynamic_success_callbacks, list - ): - callbacks = self.dynamic_success_callbacks - ## keep the internal functions ## - for callback in litellm.success_callback: - if ( - isinstance(callback, CustomLogger) - and "_PROXY_" in callback.__class__.__name__ - ): - callbacks.append(callback) - else: - callbacks = litellm.success_callback - - result = redact_message_input_output_from_logging( - result=result, litellm_logging_obj=self - ) - - for callback in callbacks: - try: - litellm_params = self.model_call_details.get("litellm_params", {}) - if litellm_params.get("no-log", False) == True: - # proxy cost tracking cal backs should run - if not ( - isinstance(callback, CustomLogger) - and "_PROXY_" in callback.__class__.__name__ - ): - print_verbose("no-log request, skipping logging") - continue - if callback == "lite_debugger": - print_verbose("reaches lite_debugger for logging!") - print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") - print_verbose( - f"liteDebuggerClient details function {self.call_type} and stream set to {self.stream}" - ) - liteDebuggerClient.log_event( - end_user=kwargs.get("user", "default"), - response_obj=result, - start_time=start_time, - end_time=end_time, - litellm_call_id=self.litellm_call_id, - print_verbose=print_verbose, - call_type=self.call_type, - stream=self.stream, - ) - if callback == "promptlayer": - print_verbose("reaches promptlayer for logging!") - promptLayerLogger.log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - if callback == "supabase": - print_verbose("reaches supabase for logging!") - kwargs = self.model_call_details - - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - if "complete_streaming_response" not in kwargs: - continue - else: - print_verbose("reaches supabase for streaming logging!") - result = kwargs["complete_streaming_response"] - - model = kwargs["model"] - messages = kwargs["messages"] - optional_params = kwargs.get("optional_params", {}) - litellm_params = kwargs.get("litellm_params", {}) - supabaseClient.log_event( - model=model, - messages=messages, - end_user=optional_params.get("user", "default"), - response_obj=result, - start_time=start_time, - end_time=end_time, - litellm_call_id=litellm_params.get( - "litellm_call_id", str(uuid.uuid4()) - ), - print_verbose=print_verbose, - ) - if callback == "wandb": - print_verbose("reaches wandb for logging!") - weightsBiasesLogger.log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - if callback == "langsmith": - print_verbose("reaches langsmith for logging!") - if self.stream: - if "complete_streaming_response" not in kwargs: - continue - else: - print_verbose( - "reaches langsmith for streaming logging!" - ) - result = kwargs["complete_streaming_response"] - langsmithLogger.log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - if callback == "logfire": - global logfireLogger - verbose_logger.debug("reaches logfire for success logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - if "complete_streaming_response" not in kwargs: - continue - else: - print_verbose("reaches logfire for streaming logging!") - result = kwargs["complete_streaming_response"] - - logfireLogger.log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - level=LogfireLevel.INFO.value, - ) - - if callback == "lunary": - print_verbose("reaches lunary for logging!") - model = self.model - kwargs = self.model_call_details - - input = kwargs.get("messages", kwargs.get("input", None)) - - type = ( - "embed" - if self.call_type == CallTypes.embedding.value - else "llm" - ) - - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - if "complete_streaming_response" not in kwargs: - continue - else: - result = kwargs["complete_streaming_response"] - - lunaryLogger.log_event( - type=type, - kwargs=kwargs, - event="end", - model=model, - input=input, - user_id=kwargs.get("user", None), - # user_props=self.model_call_details.get("user_props", None), - extra=kwargs.get("optional_params", {}), - response_obj=result, - start_time=start_time, - end_time=end_time, - run_id=self.litellm_call_id, - print_verbose=print_verbose, - ) - if callback == "helicone": - print_verbose("reaches helicone for logging!") - model = self.model - messages = self.model_call_details["input"] - heliconeLogger.log_success( - model=model, - messages=messages, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - if callback == "langfuse": - global langFuseLogger - verbose_logger.debug("reaches langfuse for success logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - verbose_logger.debug( - f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" - ) - if complete_streaming_response is None: - continue - else: - print_verbose("reaches langfuse for streaming logging!") - result = kwargs["complete_streaming_response"] - if langFuseLogger is None or ( - ( - self.langfuse_public_key is not None - and self.langfuse_public_key - != langFuseLogger.public_key - ) - and ( - self.langfuse_public_key is not None - and self.langfuse_public_key - != langFuseLogger.public_key - ) - ): - langFuseLogger = LangFuseLogger( - langfuse_public_key=self.langfuse_public_key, - langfuse_secret=self.langfuse_secret, - ) - langFuseLogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - ) - if callback == "datadog": - global dataDogLogger - verbose_logger.debug("reaches datadog for success logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - verbose_logger.debug( - f"datadog: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" - ) - if complete_streaming_response is None: - continue - else: - print_verbose("reaches datadog for streaming logging!") - result = kwargs["complete_streaming_response"] - dataDogLogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - ) - if callback == "prometheus": - global prometheusLogger - verbose_logger.debug("reaches prometheus for success logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - verbose_logger.debug( - f"prometheus: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" - ) - if complete_streaming_response is None: - continue - else: - print_verbose( - "reaches prometheus for streaming logging!" - ) - result = kwargs["complete_streaming_response"] - prometheusLogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - ) - if callback == "generic": - global genericAPILogger - verbose_logger.debug("reaches langfuse for success logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - verbose_logger.debug( - f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" - ) - if complete_streaming_response is None: - continue - else: - print_verbose("reaches langfuse for streaming logging!") - result = kwargs["complete_streaming_response"] - if genericAPILogger is None: - genericAPILogger = GenericAPILogger() - genericAPILogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - ) - if callback == "clickhouse": - global clickHouseLogger - verbose_logger.debug("reaches clickhouse for success logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - verbose_logger.debug( - f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" - ) - if complete_streaming_response is None: - continue - else: - print_verbose( - "reaches clickhouse for streaming logging!" - ) - result = kwargs["complete_streaming_response"] - if clickHouseLogger is None: - clickHouseLogger = ClickhouseLogger() - clickHouseLogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - ) - if callback == "greenscale": - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - verbose_logger.debug( - f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}" - ) - if complete_streaming_response is None: - continue - else: - print_verbose( - "reaches greenscale for streaming logging!" - ) - result = kwargs["complete_streaming_response"] - - greenscaleLogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - if callback == "cache" and litellm.cache is not None: - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - print_verbose("success_callback: reaches cache for logging!") - kwargs = self.model_call_details - if self.stream: - if "complete_streaming_response" not in kwargs: - print_verbose( - f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n" - ) - pass - else: - print_verbose( - "success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache" - ) - result = kwargs["complete_streaming_response"] - # only add to cache once we have a complete streaming response - litellm.cache.add_cache(result, **kwargs) - if callback == "athina": - deep_copy = {} - for k, v in self.model_call_details.items(): - deep_copy[k] = v - athinaLogger.log_event( - kwargs=deep_copy, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - if callback == "traceloop": - deep_copy = {} - for k, v in self.model_call_details.items(): - if k != "original_response": - deep_copy[k] = v - traceloopLogger.log_event( - kwargs=deep_copy, - response_obj=result, - start_time=start_time, - end_time=end_time, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - ) - if callback == "s3": - global s3Logger - if s3Logger is None: - s3Logger = S3Logger() - if self.stream: - if "complete_streaming_response" in self.model_call_details: - print_verbose( - "S3Logger Logger: Got Stream Event - Completed Stream Response" - ) - s3Logger.log_event( - kwargs=self.model_call_details, - response_obj=self.model_call_details[ - "complete_streaming_response" - ], - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - else: - print_verbose( - "S3Logger Logger: Got Stream Event - No complete stream response as yet" - ) - else: - s3Logger.log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - if ( - callback == "openmeter" - and self.model_call_details.get("litellm_params", {}).get( - "acompletion", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "aembedding", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "aimage_generation", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "atranscription", False - ) - == False - ): - global openMeterLogger - if openMeterLogger is None: - print_verbose("Instantiates openmeter client") - openMeterLogger = OpenMeterLogger() - if self.stream and complete_streaming_response is None: - openMeterLogger.log_stream_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) - else: - if self.stream and complete_streaming_response: - self.model_call_details["complete_response"] = ( - self.model_call_details.get( - "complete_streaming_response", {} - ) - ) - result = self.model_call_details["complete_response"] - openMeterLogger.log_success_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) - - if ( - isinstance(callback, CustomLogger) - and self.model_call_details.get("litellm_params", {}).get( - "acompletion", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "aembedding", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "aimage_generation", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "atranscription", False - ) - == False - ): # custom logger class - if self.stream and complete_streaming_response is None: - callback.log_stream_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) - else: - if self.stream and complete_streaming_response: - self.model_call_details["complete_response"] = ( - self.model_call_details.get( - "complete_streaming_response", {} - ) - ) - result = self.model_call_details["complete_response"] - callback.log_success_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) - if ( - callable(callback) == True - and self.model_call_details.get("litellm_params", {}).get( - "acompletion", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "aembedding", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "aimage_generation", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "atranscription", False - ) - == False - ): # custom logger functions - print_verbose( - f"success callbacks: Running Custom Callback Function" - ) - customLogger.log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - callback_func=callback, - ) - - except Exception as e: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging with integrations {traceback.format_exc()}" - ) - print_verbose( - f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" - ) - if capture_exception: # log this error to sentry for debugging - capture_exception(e) - except: - print_verbose( - "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {}\n{}".format( - str(e), traceback.format_exc() - ), - log_level="ERROR", - ) - pass - - async def async_success_handler( - self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs - ): - """ - Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions. - """ - print_verbose("Logging Details LiteLLM-Async Success Call") - start_time, end_time, result = self._success_handler_helper_fn( - start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit - ) - ## BUILD COMPLETE STREAMED RESPONSE - complete_streaming_response = None - if self.stream: - if result.choices[0].finish_reason is not None: # if it's the last chunk - self.streaming_chunks.append(result) - # verbose_logger.debug(f"final set of received chunks: {self.streaming_chunks}") - try: - complete_streaming_response = litellm.stream_chunk_builder( - self.streaming_chunks, - messages=self.model_call_details.get("messages", None), - start_time=start_time, - end_time=end_time, - ) - except Exception as e: - print_verbose( - "Error occurred building stream chunk in success logging: {}\n{}".format( - str(e), traceback.format_exc() - ), - log_level="ERROR", - ) - complete_streaming_response = None - else: - self.streaming_chunks.append(result) - if complete_streaming_response is not None: - print_verbose("Async success callbacks: Got a complete streaming response") - self.model_call_details["async_complete_streaming_response"] = ( - complete_streaming_response - ) - try: - if self.model_call_details.get("cache_hit", False) is True: - self.model_call_details["response_cost"] = 0.0 - else: - # check if base_model set on azure - base_model = _get_base_model_from_metadata( - model_call_details=self.model_call_details - ) - # base_model defaults to None if not set on model_info - self.model_call_details["response_cost"] = litellm.completion_cost( - completion_response=complete_streaming_response, - model=base_model, - ) - verbose_logger.debug( - f"Model={self.model}; cost={self.model_call_details['response_cost']}" - ) - except litellm.NotFoundError as e: - verbose_logger.error( - f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None" - ) - self.model_call_details["response_cost"] = None - - if self.dynamic_async_success_callbacks is not None and isinstance( - self.dynamic_async_success_callbacks, list - ): - callbacks = self.dynamic_async_success_callbacks - ## keep the internal functions ## - for callback in litellm._async_success_callback: - callback_name = "" - if isinstance(callback, CustomLogger): - callback_name = callback.__class__.__name__ - if callable(callback): - callback_name = callback.__name__ - if "_PROXY_" in callback_name: - callbacks.append(callback) - else: - callbacks = litellm._async_success_callback - - result = redact_message_input_output_from_logging( - result=result, litellm_logging_obj=self - ) - - for callback in callbacks: - # check if callback can run for this request - litellm_params = self.model_call_details.get("litellm_params", {}) - if litellm_params.get("no-log", False) == True: - # proxy cost tracking cal backs should run - if not ( - isinstance(callback, CustomLogger) - and "_PROXY_" in callback.__class__.__name__ - ): - print_verbose("no-log request, skipping logging") - continue - try: - if kwargs.get("no-log", False) == True: - print_verbose("no-log request, skipping logging") - continue - if callback == "cache" and litellm.cache is not None: - # set_cache once complete streaming response is built - print_verbose("async success_callback: reaches cache for logging!") - kwargs = self.model_call_details - if self.stream: - if "async_complete_streaming_response" not in kwargs: - print_verbose( - f"async success_callback: reaches cache for logging, there is no async_complete_streaming_response. Kwargs={kwargs}\n\n" - ) - pass - else: - print_verbose( - "async success_callback: reaches cache for logging, there is a async_complete_streaming_response. Adding to cache" - ) - result = kwargs["async_complete_streaming_response"] - # only add to cache once we have a complete streaming response - if litellm.cache is not None and not isinstance( - litellm.cache.cache, S3Cache - ): - await litellm.cache.async_add_cache(result, **kwargs) - else: - litellm.cache.add_cache(result, **kwargs) - if callback == "openmeter": - global openMeterLogger - if self.stream == True: - if ( - "async_complete_streaming_response" - in self.model_call_details - ): - await openMeterLogger.async_log_success_event( - kwargs=self.model_call_details, - response_obj=self.model_call_details[ - "async_complete_streaming_response" - ], - start_time=start_time, - end_time=end_time, - ) - else: - await openMeterLogger.async_log_stream_event( # [TODO]: move this to being an async log stream event function - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) - else: - await openMeterLogger.async_log_success_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) - if isinstance(callback, CustomLogger): # custom logger class - if self.stream == True: - if ( - "async_complete_streaming_response" - in self.model_call_details - ): - await callback.async_log_success_event( - kwargs=self.model_call_details, - response_obj=self.model_call_details[ - "async_complete_streaming_response" - ], - start_time=start_time, - end_time=end_time, - ) - else: - await callback.async_log_stream_event( # [TODO]: move this to being an async log stream event function - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) - else: - await callback.async_log_success_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) - if callable(callback): # custom logger functions - if self.stream: - if ( - "async_complete_streaming_response" - in self.model_call_details - ): - await customLogger.async_log_event( - kwargs=self.model_call_details, - response_obj=self.model_call_details[ - "async_complete_streaming_response" - ], - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - callback_func=callback, - ) - else: - await customLogger.async_log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - callback_func=callback, - ) - if callback == "dynamodb": - global dynamoLogger - if dynamoLogger is None: - dynamoLogger = DyanmoDBLogger() - if self.stream: - if ( - "async_complete_streaming_response" - in self.model_call_details - ): - print_verbose( - "DynamoDB Logger: Got Stream Event - Completed Stream Response" - ) - await dynamoLogger._async_log_event( - kwargs=self.model_call_details, - response_obj=self.model_call_details[ - "async_complete_streaming_response" - ], - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - else: - print_verbose( - "DynamoDB Logger: Got Stream Event - No complete stream response as yet" - ) - else: - await dynamoLogger._async_log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - except Exception as e: - verbose_logger.error( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}" - ) - pass - - def _failure_handler_helper_fn( - self, exception, traceback_exception, start_time=None, end_time=None - ): - if start_time is None: - start_time = self.start_time - if end_time is None: - end_time = datetime.datetime.now() - - # on some exceptions, model_call_details is not always initialized, this ensures that we still log those exceptions - if not hasattr(self, "model_call_details"): - self.model_call_details = {} - - self.model_call_details["log_event_type"] = "failed_api_call" - self.model_call_details["exception"] = exception - self.model_call_details["traceback_exception"] = traceback_exception - self.model_call_details["end_time"] = end_time - self.model_call_details.setdefault("original_response", None) - return start_time, end_time - - def failure_handler( - self, exception, traceback_exception, start_time=None, end_time=None - ): - print_verbose( - f"Logging Details LiteLLM-Failure Call: {litellm.failure_callback}" - ) - try: - start_time, end_time = self._failure_handler_helper_fn( - exception=exception, - traceback_exception=traceback_exception, - start_time=start_time, - end_time=end_time, - ) - callbacks = [] # init this to empty incase it's not created - - if self.dynamic_failure_callbacks is not None and isinstance( - self.dynamic_failure_callbacks, list - ): - callbacks = self.dynamic_failure_callbacks - ## keep the internal functions ## - for callback in litellm.failure_callback: - if ( - isinstance(callback, CustomLogger) - and "_PROXY_" in callback.__class__.__name__ - ): - callbacks.append(callback) - else: - callbacks = litellm.failure_callback - - result = None # result sent to all loggers, init this to None incase it's not created - - result = redact_message_input_output_from_logging( - result=result, litellm_logging_obj=self - ) - for callback in callbacks: - try: - if callback == "lite_debugger": - print_verbose("reaches lite_debugger for logging!") - print_verbose(f"liteDebuggerClient: {liteDebuggerClient}") - result = { - "model": self.model, - "created": time.time(), - "error": traceback_exception, - "usage": { - "prompt_tokens": prompt_token_calculator( - self.model, messages=self.messages - ), - "completion_tokens": 0, - }, - } - liteDebuggerClient.log_event( - model=self.model, - messages=self.messages, - end_user=self.model_call_details.get("user", "default"), - response_obj=result, - start_time=start_time, - end_time=end_time, - litellm_call_id=self.litellm_call_id, - print_verbose=print_verbose, - call_type=self.call_type, - stream=self.stream, - ) - if callback == "lunary": - print_verbose("reaches lunary for logging error!") - - model = self.model - - input = self.model_call_details["input"] - - _type = ( - "embed" - if self.call_type == CallTypes.embedding.value - else "llm" - ) - - lunaryLogger.log_event( - type=_type, - event="error", - user_id=self.model_call_details.get("user", "default"), - model=model, - input=input, - error=traceback_exception, - run_id=self.litellm_call_id, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - if callback == "sentry": - print_verbose("sending exception to sentry") - if capture_exception: - capture_exception(exception) - else: - print_verbose( - f"capture exception not initialized: {capture_exception}" - ) - if callable(callback): # custom logger functions - customLogger.log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - callback_func=callback, - ) - if ( - isinstance(callback, CustomLogger) - and self.model_call_details.get("litellm_params", {}).get( - "acompletion", False - ) - == False - and self.model_call_details.get("litellm_params", {}).get( - "aembedding", False - ) - == False - ): # custom logger class - callback.log_failure_event( - start_time=start_time, - end_time=end_time, - response_obj=result, - kwargs=self.model_call_details, - ) - if callback == "langfuse": - global langFuseLogger - verbose_logger.debug("reaches langfuse for logging failure") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if langFuseLogger is None or ( - ( - self.langfuse_public_key is not None - and self.langfuse_public_key - != langFuseLogger.public_key - ) - and ( - self.langfuse_public_key is not None - and self.langfuse_public_key - != langFuseLogger.public_key - ) - ): - langFuseLogger = LangFuseLogger( - langfuse_public_key=self.langfuse_public_key, - langfuse_secret=self.langfuse_secret, - ) - langFuseLogger.log_event( - start_time=start_time, - end_time=end_time, - response_obj=None, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - status_message=str(exception), - level="ERROR", - kwargs=self.model_call_details, - ) - if callback == "traceloop": - traceloopLogger.log_event( - start_time=start_time, - end_time=end_time, - response_obj=None, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - status_message=str(exception), - level="ERROR", - kwargs=self.model_call_details, - ) - if callback == "prometheus": - global prometheusLogger - verbose_logger.debug("reaches prometheus for success logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - kwargs["exception"] = str(exception) - prometheusLogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - ) - - if callback == "logfire": - global logfireLogger - verbose_logger.debug("reaches logfire for failure logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - kwargs["exception"] = exception - - logfireLogger.log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - level=LogfireLevel.ERROR.value, - print_verbose=print_verbose, - ) - except Exception as e: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}" - ) - print_verbose( - f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" - ) - if capture_exception: # log this error to sentry for debugging - capture_exception(e) - except Exception as e: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging {traceback.format_exc()}" - ) - pass - - async def async_failure_handler( - self, exception, traceback_exception, start_time=None, end_time=None - ): - """ - Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions. - """ - start_time, end_time = self._failure_handler_helper_fn( - exception=exception, - traceback_exception=traceback_exception, - start_time=start_time, - end_time=end_time, - ) - result = None # result sent to all loggers, init this to None incase it's not created - for callback in litellm._async_failure_callback: - try: - if isinstance(callback, CustomLogger): # custom logger class - await callback.async_log_failure_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - ) # type: ignore - if callable(callback): # custom logger functions - await customLogger.async_log_event( - kwargs=self.model_call_details, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - callback_func=callback, - ) - except Exception as e: - print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}" - ) def exception_logging( @@ -2848,6 +346,11 @@ def _init_custom_logger_compatible_class( def function_setup( original_function: str, rules_obj, start_time, *args, **kwargs ): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc. + ### NOTICES ### + if litellm.set_verbose is True: + verbose_logger.warning( + "`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs." + ) try: global callback_list, add_breadcrumb, user_logger_fn, Logging function_id = kwargs["id"] if "id" in kwargs else None @@ -3033,7 +536,7 @@ def function_setup( ): messages = kwargs.get("input", "speech") stream = True if "stream" in kwargs and kwargs["stream"] == True else False - logging_obj = Logging( + logging_obj = litellm.litellm_core_utils.litellm_logging.Logging( model=model, messages=messages, stream=stream, @@ -3451,11 +954,6 @@ def client(original_function): logging_obj.failure_handler( e, traceback_exception, start_time, end_time ) # DO NOT MAKE THREADED - router retry fallback relies on this! - my_thread = threading.Thread( - target=handle_failure, - args=(e, traceback_exception, start_time, end_time, args, kwargs), - ) # don't interrupt execution of main thread - my_thread.start() if hasattr(e, "message"): if ( liteDebuggerClient and liteDebuggerClient.dashboard_url != None @@ -4323,229 +1821,6 @@ def token_counter( return num_tokens -def _cost_per_token_custom_pricing_helper( - prompt_tokens=0, - completion_tokens=0, - response_time_ms=None, - ### CUSTOM PRICING ### - custom_cost_per_token: Optional[CostPerToken] = None, - custom_cost_per_second: Optional[float] = None, -) -> Optional[Tuple[float, float]]: - """Internal helper function for calculating cost, if custom pricing given""" - if custom_cost_per_token is None and custom_cost_per_second is None: - return None - - if custom_cost_per_token is not None: - input_cost = custom_cost_per_token["input_cost_per_token"] * prompt_tokens - output_cost = custom_cost_per_token["output_cost_per_token"] * completion_tokens - return input_cost, output_cost - elif custom_cost_per_second is not None: - output_cost = custom_cost_per_second * response_time_ms / 1000 # type: ignore - return 0, output_cost - - return None - - -def cost_per_token( - model: str = "", - prompt_tokens=0, - completion_tokens=0, - response_time_ms=None, - custom_llm_provider=None, - region_name=None, - ### CUSTOM PRICING ### - custom_cost_per_token: Optional[CostPerToken] = None, - custom_cost_per_second: Optional[float] = None, -) -> Tuple[float, float]: - """ - Calculates the cost per token for a given model, prompt tokens, and completion tokens. - - Parameters: - model (str): The name of the model to use. Default is "" - prompt_tokens (int): The number of tokens in the prompt. - completion_tokens (int): The number of tokens in the completion. - response_time (float): The amount of time, in milliseconds, it took the call to complete. - custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list) - custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call. - custom_cost_per_second: Optional[float]: the cost per second for the llm api call. - - Returns: - tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively. - """ - if model is None: - raise Exception("Invalid arg. Model cannot be none.") - ## CUSTOM PRICING ## - response_cost = _cost_per_token_custom_pricing_helper( - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - response_time_ms=response_time_ms, - custom_cost_per_second=custom_cost_per_second, - custom_cost_per_token=custom_cost_per_token, - ) - if response_cost is not None: - return response_cost[0], response_cost[1] - - # given - prompt_tokens_cost_usd_dollar: float = 0 - completion_tokens_cost_usd_dollar: float = 0 - model_cost_ref = litellm.model_cost - model_with_provider = model - if custom_llm_provider is not None: - model_with_provider = custom_llm_provider + "/" + model - if region_name is not None: - model_with_provider_and_region = ( - f"{custom_llm_provider}/{region_name}/{model}" - ) - if ( - model_with_provider_and_region in model_cost_ref - ): # use region based pricing, if it's available - model_with_provider = model_with_provider_and_region - - model_without_prefix = model - model_parts = model.split("/") - if len(model_parts) > 1: - model_without_prefix = model_parts[1] - else: - model_without_prefix = model - """ - Code block that formats model to lookup in litellm.model_cost - Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1 - Option2. model = "openai/gpt-4" - model = provider/model - Option3. model = "anthropic.claude-3" - model = model - """ - if ( - model_with_provider in model_cost_ref - ): # Option 2. use model with provider, model = "openai/gpt-4" - model = model_with_provider - elif model in model_cost_ref: # Option 1. use model passed, model="gpt-4" - model = model - elif ( - model_without_prefix in model_cost_ref - ): # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3" - model = model_without_prefix - - # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models - print_verbose(f"Looking up model={model} in model_cost_map") - if model in model_cost_ref: - print_verbose(f"Success: model={model} in model_cost_map") - print_verbose( - f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}" - ) - if ( - model_cost_ref[model].get("input_cost_per_token", None) is not None - and model_cost_ref[model].get("output_cost_per_token", None) is not None - ): - ## COST PER TOKEN ## - prompt_tokens_cost_usd_dollar = ( - model_cost_ref[model]["input_cost_per_token"] * prompt_tokens - ) - completion_tokens_cost_usd_dollar = ( - model_cost_ref[model]["output_cost_per_token"] * completion_tokens - ) - elif ( - model_cost_ref[model].get("output_cost_per_second", None) is not None - and response_time_ms is not None - ): - print_verbose( - f"For model={model} - output_cost_per_second: {model_cost_ref[model].get('output_cost_per_second')}; response time: {response_time_ms}" - ) - ## COST PER SECOND ## - prompt_tokens_cost_usd_dollar = 0 - completion_tokens_cost_usd_dollar = ( - model_cost_ref[model]["output_cost_per_second"] - * response_time_ms - / 1000 - ) - elif ( - model_cost_ref[model].get("input_cost_per_second", None) is not None - and response_time_ms is not None - ): - print_verbose( - f"For model={model} - input_cost_per_second: {model_cost_ref[model].get('input_cost_per_second')}; response time: {response_time_ms}" - ) - ## COST PER SECOND ## - prompt_tokens_cost_usd_dollar = ( - model_cost_ref[model]["input_cost_per_second"] * response_time_ms / 1000 - ) - completion_tokens_cost_usd_dollar = 0.0 - print_verbose( - f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}" - ) - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar - elif "ft:gpt-3.5-turbo" in model: - print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM") - # fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm - prompt_tokens_cost_usd_dollar = ( - model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens - ) - completion_tokens_cost_usd_dollar = ( - model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"] - * completion_tokens - ) - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar - elif "ft:davinci-002" in model: - print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM") - # fuzzy match ft:davinci-002:abcd-id-cool-litellm - prompt_tokens_cost_usd_dollar = ( - model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens - ) - completion_tokens_cost_usd_dollar = ( - model_cost_ref["ft:davinci-002"]["output_cost_per_token"] - * completion_tokens - ) - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar - elif "ft:babbage-002" in model: - print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM") - # fuzzy match ft:babbage-002:abcd-id-cool-litellm - prompt_tokens_cost_usd_dollar = ( - model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens - ) - completion_tokens_cost_usd_dollar = ( - model_cost_ref["ft:babbage-002"]["output_cost_per_token"] - * completion_tokens - ) - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar - elif model in litellm.azure_llms: - verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM") - model = litellm.azure_llms[model] - verbose_logger.debug( - f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}" - ) - prompt_tokens_cost_usd_dollar = ( - model_cost_ref[model]["input_cost_per_token"] * prompt_tokens - ) - verbose_logger.debug( - f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}" - ) - completion_tokens_cost_usd_dollar = ( - model_cost_ref[model]["output_cost_per_token"] * completion_tokens - ) - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar - elif model in litellm.azure_embedding_models: - verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model") - model = litellm.azure_embedding_models[model] - prompt_tokens_cost_usd_dollar = ( - model_cost_ref[model]["input_cost_per_token"] * prompt_tokens - ) - completion_tokens_cost_usd_dollar = ( - model_cost_ref[model]["output_cost_per_token"] * completion_tokens - ) - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar - else: - # if model is not in model_prices_and_context_window.json. Raise an exception-let users know - error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n" - raise litellm.exceptions.NotFoundError( # type: ignore - message=error_str, - model=model, - response=httpx.Response( - status_code=404, - content=error_str, - request=httpx.Request(method="cost_per_token", url="https://github.com/BerriAI/litellm"), # type: ignore - ), - llm_provider="", - ) - - def supports_httpx_timeout(custom_llm_provider: str) -> bool: """ Helper function to know if a provider implementation supports httpx timeout @@ -7624,153 +4899,6 @@ def set_callbacks(callback_list, function_id=None): raise e -# NOTE: DEPRECATING this in favor of using failure_handler() in Logging: -def handle_failure(exception, traceback_exception, start_time, end_time, args, kwargs): - global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger - try: - # print_verbose(f"handle_failure args: {args}") - # print_verbose(f"handle_failure kwargs: {kwargs}") - - success_handler = additional_details.pop("success_handler", None) - failure_handler = additional_details.pop("failure_handler", None) - - additional_details["Event_Name"] = additional_details.pop( - "failed_event_name", "litellm.failed_query" - ) - print_verbose(f"self.failure_callback: {litellm.failure_callback}") - for callback in litellm.failure_callback: - try: - if callback == "slack": - slack_msg = "" - if len(kwargs) > 0: - for key in kwargs: - slack_msg += f"{key}: {kwargs[key]}\n" - if len(args) > 0: - for i, arg in enumerate(args): - slack_msg += f"LiteLLM_Args_{str(i)}: {arg}" - for detail in additional_details: - slack_msg += f"{detail}: {additional_details[detail]}\n" - slack_msg += f"Traceback: {traceback_exception}" - truncated_slack_msg = textwrap.shorten( - slack_msg, width=512, placeholder="..." - ) - slack_app.client.chat_postMessage( - channel=alerts_channel, text=truncated_slack_msg - ) - elif callback == "sentry": - capture_exception(exception) - elif callback == "posthog": - print_verbose( - f"inside posthog, additional_details: {len(additional_details.keys())}" - ) - ph_obj = {} - if len(kwargs) > 0: - ph_obj = kwargs - if len(args) > 0: - for i, arg in enumerate(args): - ph_obj["litellm_args_" + str(i)] = arg - for detail in additional_details: - ph_obj[detail] = additional_details[detail] - event_name = additional_details["Event_Name"] - print_verbose(f"ph_obj: {ph_obj}") - print_verbose(f"PostHog Event Name: {event_name}") - if "user_id" in additional_details: - posthog.capture( - additional_details["user_id"], event_name, ph_obj - ) - else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python - unique_id = str(uuid.uuid4()) - posthog.capture(unique_id, event_name) - print_verbose(f"successfully logged to PostHog!") - elif callback == "berrispend": - print_verbose("reaches berrispend for logging!") - model = args[0] if len(args) > 0 else kwargs["model"] - messages = args[1] if len(args) > 1 else kwargs["messages"] - result = { - "model": model, - "created": time.time(), - "error": traceback_exception, - "usage": { - "prompt_tokens": prompt_token_calculator( - model, messages=messages - ), - "completion_tokens": 0, - }, - } - berrispendLogger.log_event( - model=model, - messages=messages, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - elif callback == "aispend": - print_verbose("reaches aispend for logging!") - model = args[0] if len(args) > 0 else kwargs["model"] - messages = args[1] if len(args) > 1 else kwargs["messages"] - result = { - "model": model, - "created": time.time(), - "usage": { - "prompt_tokens": prompt_token_calculator( - model, messages=messages - ), - "completion_tokens": 0, - }, - } - aispendLogger.log_event( - model=model, - response_obj=result, - start_time=start_time, - end_time=end_time, - print_verbose=print_verbose, - ) - elif callback == "supabase": - print_verbose("reaches supabase for logging!") - print_verbose(f"supabaseClient: {supabaseClient}") - model = args[0] if len(args) > 0 else kwargs["model"] - messages = args[1] if len(args) > 1 else kwargs["messages"] - result = { - "model": model, - "created": time.time(), - "error": traceback_exception, - "usage": { - "prompt_tokens": prompt_token_calculator( - model, messages=messages - ), - "completion_tokens": 0, - }, - } - supabaseClient.log_event( - model=model, - messages=messages, - end_user=kwargs.get("user", "default"), - response_obj=result, - start_time=start_time, - end_time=end_time, - litellm_call_id=kwargs["litellm_call_id"], - print_verbose=print_verbose, - ) - except: - print_verbose( - f"Error Occurred while logging failure: {traceback.format_exc()}" - ) - pass - - if failure_handler and callable(failure_handler): - call_details = { - "exception": exception, - "additional_details": additional_details, - } - failure_handler(call_details) - pass - except Exception as e: - # LOGGING - exception_logging(logger_fn=user_logger_fn, exception=e) - pass - - async def convert_to_streaming_response_async(response_object: Optional[dict] = None): """ Asynchronously converts a response object to a streaming response.