diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2d85031b5..74f165bdd 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -24,10 +24,10 @@ repos:
language: system
types: [python]
files: ^litellm/
- # - id: check-file-length
- # name: Check file length
- # entry: python check_file_length.py
- # args: ["10000"] # set your desired maximum number of lines
- # language: python
- # files: litellm/.*\.py
- # exclude: ^litellm/tests/
\ No newline at end of file
+ - id: check-file-length
+ name: Check file length
+ entry: python check_file_length.py
+ args: ["10000"] # set your desired maximum number of lines
+ language: python
+ files: litellm/.*\.py
+ exclude: ^litellm/tests/
\ No newline at end of file
diff --git a/litellm/__init__.py b/litellm/__init__.py
index 6ecf70d0d..353d7ac5b 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -715,6 +715,7 @@ openai_image_generation_models = ["dall-e-2", "dall-e-3"]
from .timeout import timeout
from .cost_calculator import completion_cost
+from litellm.litellm_core_utils.litellm_logging import Logging
from .utils import (
client,
exception_type,
@@ -723,12 +724,10 @@ from .utils import (
token_counter,
create_pretrained_tokenizer,
create_tokenizer,
- cost_per_token,
supports_function_calling,
supports_parallel_function_calling,
supports_vision,
get_litellm_params,
- Logging,
acreate,
get_model_list,
get_max_tokens,
@@ -748,9 +747,10 @@ from .utils import (
get_first_chars_messages,
ModelResponse,
ImageResponse,
- ImageObject,
get_provider_fields,
)
+
+from .types.utils import ImageObject
from .llms.huggingface_restapi import HuggingfaceConfig
from .llms.anthropic import AnthropicConfig
from .llms.databricks import DatabricksConfig, DatabricksEmbeddingConfig
@@ -827,4 +827,4 @@ from .router import Router
from .assistants.main import *
from .batches.main import *
from .scheduler import *
-from .cost_calculator import response_cost_calculator
+from .cost_calculator import response_cost_calculator, cost_per_token
diff --git a/litellm/_logging.py b/litellm/_logging.py
index ab7a08f97..52a445b49 100644
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@@ -3,10 +3,17 @@ from logging import Formatter
import traceback
set_verbose = False
+
+if set_verbose is True:
+ logging.warning(
+ "`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs."
+ )
json_logs = bool(os.getenv("JSON_LOGS", False))
# Create a handler for the logger (you may need to adapt this based on your needs)
+log_level = os.getenv("LITELLM_LOG", "ERROR")
+numeric_level: str = getattr(logging, log_level.upper())
handler = logging.StreamHandler()
-handler.setLevel(logging.DEBUG)
+handler.setLevel(numeric_level)
class JsonFormatter(Formatter):
diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index d1e2dab52..c84df53e8 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -1,6 +1,6 @@
# What is this?
## File for 'response_cost' calculation in Logging
-from typing import Optional, Union, Literal, List
+from typing import Optional, Union, Literal, List, Tuple
import litellm._logging
from litellm.utils import (
ModelResponse,
@@ -9,7 +9,6 @@ from litellm.utils import (
TranscriptionResponse,
TextCompletionResponse,
CallTypes,
- cost_per_token,
print_verbose,
CostPerToken,
token_counter,
@@ -18,6 +17,224 @@ import litellm
from litellm import verbose_logger
+def _cost_per_token_custom_pricing_helper(
+ prompt_tokens=0,
+ completion_tokens=0,
+ response_time_ms=None,
+ ### CUSTOM PRICING ###
+ custom_cost_per_token: Optional[CostPerToken] = None,
+ custom_cost_per_second: Optional[float] = None,
+) -> Optional[Tuple[float, float]]:
+ """Internal helper function for calculating cost, if custom pricing given"""
+ if custom_cost_per_token is None and custom_cost_per_second is None:
+ return None
+
+ if custom_cost_per_token is not None:
+ input_cost = custom_cost_per_token["input_cost_per_token"] * prompt_tokens
+ output_cost = custom_cost_per_token["output_cost_per_token"] * completion_tokens
+ return input_cost, output_cost
+ elif custom_cost_per_second is not None:
+ output_cost = custom_cost_per_second * response_time_ms / 1000 # type: ignore
+ return 0, output_cost
+
+ return None
+
+
+def cost_per_token(
+ model: str = "",
+ prompt_tokens=0,
+ completion_tokens=0,
+ response_time_ms=None,
+ custom_llm_provider=None,
+ region_name=None,
+ ### CUSTOM PRICING ###
+ custom_cost_per_token: Optional[CostPerToken] = None,
+ custom_cost_per_second: Optional[float] = None,
+) -> Tuple[float, float]:
+ """
+ Calculates the cost per token for a given model, prompt tokens, and completion tokens.
+
+ Parameters:
+ model (str): The name of the model to use. Default is ""
+ prompt_tokens (int): The number of tokens in the prompt.
+ completion_tokens (int): The number of tokens in the completion.
+ response_time (float): The amount of time, in milliseconds, it took the call to complete.
+ custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
+ custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
+ custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
+
+ Returns:
+ tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
+ """
+ if model is None:
+ raise Exception("Invalid arg. Model cannot be none.")
+ ## CUSTOM PRICING ##
+ response_cost = _cost_per_token_custom_pricing_helper(
+ prompt_tokens=prompt_tokens,
+ completion_tokens=completion_tokens,
+ response_time_ms=response_time_ms,
+ custom_cost_per_second=custom_cost_per_second,
+ custom_cost_per_token=custom_cost_per_token,
+ )
+ if response_cost is not None:
+ return response_cost[0], response_cost[1]
+
+ # given
+ prompt_tokens_cost_usd_dollar: float = 0
+ completion_tokens_cost_usd_dollar: float = 0
+ model_cost_ref = litellm.model_cost
+ model_with_provider = model
+ if custom_llm_provider is not None:
+ model_with_provider = custom_llm_provider + "/" + model
+ if region_name is not None:
+ model_with_provider_and_region = (
+ f"{custom_llm_provider}/{region_name}/{model}"
+ )
+ if (
+ model_with_provider_and_region in model_cost_ref
+ ): # use region based pricing, if it's available
+ model_with_provider = model_with_provider_and_region
+
+ model_without_prefix = model
+ model_parts = model.split("/")
+ if len(model_parts) > 1:
+ model_without_prefix = model_parts[1]
+ else:
+ model_without_prefix = model
+ """
+ Code block that formats model to lookup in litellm.model_cost
+ Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1
+ Option2. model = "openai/gpt-4" - model = provider/model
+ Option3. model = "anthropic.claude-3" - model = model
+ """
+ if (
+ model_with_provider in model_cost_ref
+ ): # Option 2. use model with provider, model = "openai/gpt-4"
+ model = model_with_provider
+ elif model in model_cost_ref: # Option 1. use model passed, model="gpt-4"
+ model = model
+ elif (
+ model_without_prefix in model_cost_ref
+ ): # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3"
+ model = model_without_prefix
+
+ # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
+ print_verbose(f"Looking up model={model} in model_cost_map")
+ if model in model_cost_ref:
+ print_verbose(f"Success: model={model} in model_cost_map")
+ print_verbose(
+ f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}"
+ )
+ if (
+ model_cost_ref[model].get("input_cost_per_token", None) is not None
+ and model_cost_ref[model].get("output_cost_per_token", None) is not None
+ ):
+ ## COST PER TOKEN ##
+ prompt_tokens_cost_usd_dollar = (
+ model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
+ )
+ completion_tokens_cost_usd_dollar = (
+ model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+ )
+ elif (
+ model_cost_ref[model].get("output_cost_per_second", None) is not None
+ and response_time_ms is not None
+ ):
+ print_verbose(
+ f"For model={model} - output_cost_per_second: {model_cost_ref[model].get('output_cost_per_second')}; response time: {response_time_ms}"
+ )
+ ## COST PER SECOND ##
+ prompt_tokens_cost_usd_dollar = 0
+ completion_tokens_cost_usd_dollar = (
+ model_cost_ref[model]["output_cost_per_second"]
+ * response_time_ms
+ / 1000
+ )
+ elif (
+ model_cost_ref[model].get("input_cost_per_second", None) is not None
+ and response_time_ms is not None
+ ):
+ print_verbose(
+ f"For model={model} - input_cost_per_second: {model_cost_ref[model].get('input_cost_per_second')}; response time: {response_time_ms}"
+ )
+ ## COST PER SECOND ##
+ prompt_tokens_cost_usd_dollar = (
+ model_cost_ref[model]["input_cost_per_second"] * response_time_ms / 1000
+ )
+ completion_tokens_cost_usd_dollar = 0.0
+ print_verbose(
+ f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
+ )
+ return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+ elif "ft:gpt-3.5-turbo" in model:
+ print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
+ # fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
+ prompt_tokens_cost_usd_dollar = (
+ model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
+ )
+ completion_tokens_cost_usd_dollar = (
+ model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"]
+ * completion_tokens
+ )
+ return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+ elif "ft:davinci-002" in model:
+ print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
+ # fuzzy match ft:davinci-002:abcd-id-cool-litellm
+ prompt_tokens_cost_usd_dollar = (
+ model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens
+ )
+ completion_tokens_cost_usd_dollar = (
+ model_cost_ref["ft:davinci-002"]["output_cost_per_token"]
+ * completion_tokens
+ )
+ return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+ elif "ft:babbage-002" in model:
+ print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
+ # fuzzy match ft:babbage-002:abcd-id-cool-litellm
+ prompt_tokens_cost_usd_dollar = (
+ model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens
+ )
+ completion_tokens_cost_usd_dollar = (
+ model_cost_ref["ft:babbage-002"]["output_cost_per_token"]
+ * completion_tokens
+ )
+ return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+ elif model in litellm.azure_llms:
+ verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM")
+ model = litellm.azure_llms[model]
+ verbose_logger.debug(
+ f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
+ )
+ prompt_tokens_cost_usd_dollar = (
+ model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
+ )
+ verbose_logger.debug(
+ f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}"
+ )
+ completion_tokens_cost_usd_dollar = (
+ model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+ )
+ return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+ elif model in litellm.azure_embedding_models:
+ verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model")
+ model = litellm.azure_embedding_models[model]
+ prompt_tokens_cost_usd_dollar = (
+ model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
+ )
+ completion_tokens_cost_usd_dollar = (
+ model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+ )
+ return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+ else:
+ # if model is not in model_prices_and_context_window.json. Raise an exception-let users know
+ error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
+ raise litellm.exceptions.NotFoundError( # type: ignore
+ message=error_str,
+ model=model,
+ llm_provider="",
+ )
+
+
# Extract the number of billion parameters from the model name
# only used for together_computer LLMs
def get_model_params_and_category(model_name) -> str:
diff --git a/litellm/litellm_core_utils/core_helpers.py b/litellm/litellm_core_utils/core_helpers.py
new file mode 100644
index 000000000..7b911895d
--- /dev/null
+++ b/litellm/litellm_core_utils/core_helpers.py
@@ -0,0 +1,41 @@
+# What is this?
+## Helper utilities for the model response objects
+
+
+def map_finish_reason(
+ finish_reason: str,
+): # openai supports 5 stop sequences - 'stop', 'length', 'function_call', 'content_filter', 'null'
+ # anthropic mapping
+ if finish_reason == "stop_sequence":
+ return "stop"
+ # cohere mapping - https://docs.cohere.com/reference/generate
+ elif finish_reason == "COMPLETE":
+ return "stop"
+ elif finish_reason == "MAX_TOKENS": # cohere + vertex ai
+ return "length"
+ elif finish_reason == "ERROR_TOXIC":
+ return "content_filter"
+ elif (
+ finish_reason == "ERROR"
+ ): # openai currently doesn't support an 'error' finish reason
+ return "stop"
+ # huggingface mapping https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/generate_stream
+ elif finish_reason == "eos_token" or finish_reason == "stop_sequence":
+ return "stop"
+ elif (
+ finish_reason == "FINISH_REASON_UNSPECIFIED" or finish_reason == "STOP"
+ ): # vertex ai - got from running `print(dir(response_obj.candidates[0].finish_reason))`: ['FINISH_REASON_UNSPECIFIED', 'MAX_TOKENS', 'OTHER', 'RECITATION', 'SAFETY', 'STOP',]
+ return "stop"
+ elif finish_reason == "SAFETY": # vertex ai
+ return "content_filter"
+ elif finish_reason == "STOP": # vertex ai
+ return "stop"
+ elif finish_reason == "end_turn" or finish_reason == "stop_sequence": # anthropic
+ return "stop"
+ elif finish_reason == "max_tokens": # anthropic
+ return "length"
+ elif finish_reason == "tool_use": # anthropic
+ return "tool_calls"
+ elif finish_reason == "content_filtered":
+ return "content_filter"
+ return finish_reason
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
new file mode 100644
index 000000000..f99303abb
--- /dev/null
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -0,0 +1,1780 @@
+# What is this?
+## Common Utility file for Logging handler
+# Logging function -> log the exact model details + what's being sent | Non-Blocking
+from litellm.types.utils import CallTypes
+from typing import Optional
+import datetime
+from litellm import (
+ verbose_logger,
+ json_logs,
+ log_raw_request_response,
+ turn_off_message_logging,
+)
+import traceback
+import litellm
+import copy
+import sys
+import uuid
+import os
+from litellm.integrations.custom_logger import CustomLogger
+import json
+import time
+from litellm.litellm_core_utils.redact_messages import (
+ redact_message_input_output_from_logging,
+)
+from litellm.utils import (
+ _get_base_model_from_metadata,
+ supabaseClient,
+ liteDebuggerClient,
+ promptLayerLogger,
+ weightsBiasesLogger,
+ langsmithLogger,
+ logfireLogger,
+ capture_exception,
+ add_breadcrumb,
+ lunaryLogger,
+ prometheusLogger,
+ print_verbose,
+ customLogger,
+ prompt_token_calculator,
+)
+from litellm.types.utils import (
+ ModelResponse,
+ EmbeddingResponse,
+ ImageResponse,
+ TranscriptionResponse,
+ TextCompletionResponse,
+)
+import subprocess
+from ..integrations.traceloop import TraceloopLogger
+from ..integrations.athina import AthinaLogger
+from ..integrations.helicone import HeliconeLogger
+from ..integrations.aispend import AISpendLogger
+from ..integrations.berrispend import BerriSpendLogger
+from ..integrations.supabase import Supabase
+from ..integrations.lunary import LunaryLogger
+from ..integrations.prompt_layer import PromptLayerLogger
+from ..integrations.langsmith import LangsmithLogger
+from ..integrations.logfire_logger import LogfireLogger, LogfireLevel
+from ..integrations.weights_biases import WeightsBiasesLogger
+from ..integrations.custom_logger import CustomLogger
+from ..integrations.langfuse import LangFuseLogger
+from ..integrations.openmeter import OpenMeterLogger
+from ..integrations.lago import LagoLogger
+from ..integrations.datadog import DataDogLogger
+from ..integrations.prometheus import PrometheusLogger
+from ..integrations.prometheus_services import PrometheusServicesLogger
+from ..integrations.dynamodb import DyanmoDBLogger
+from ..integrations.s3 import S3Logger
+from ..integrations.clickhouse import ClickhouseLogger
+from ..integrations.greenscale import GreenscaleLogger
+from ..integrations.litedebugger import LiteDebugger
+
+
+class Logging:
+ global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger, logfireLogger, prometheusLogger, slack_app
+ custom_pricing: bool = False
+ stream_options = None
+
+ def __init__(
+ self,
+ model,
+ messages,
+ stream,
+ call_type,
+ start_time,
+ litellm_call_id,
+ function_id,
+ dynamic_success_callbacks=None,
+ dynamic_failure_callbacks=None,
+ dynamic_async_success_callbacks=None,
+ langfuse_public_key=None,
+ langfuse_secret=None,
+ ):
+ if call_type not in [item.value for item in CallTypes]:
+ allowed_values = ", ".join([item.value for item in CallTypes])
+ raise ValueError(
+ f"Invalid call_type {call_type}. Allowed values: {allowed_values}"
+ )
+ if messages is not None:
+ if isinstance(messages, str):
+ messages = [
+ {"role": "user", "content": messages}
+ ] # convert text completion input to the chat completion format
+ elif (
+ isinstance(messages, list)
+ and len(messages) > 0
+ and isinstance(messages[0], str)
+ ):
+ new_messages = []
+ for m in messages:
+ new_messages.append({"role": "user", "content": m})
+ messages = new_messages
+ self.model = model
+ self.messages = messages
+ self.stream = stream
+ self.start_time = start_time # log the call start time
+ self.call_type = call_type
+ self.litellm_call_id = litellm_call_id
+ self.function_id = function_id
+ self.streaming_chunks = [] # for generating complete stream response
+ self.sync_streaming_chunks = [] # for generating complete stream response
+ self.model_call_details = {}
+ self.dynamic_input_callbacks = [] # [TODO] callbacks set for just that call
+ self.dynamic_failure_callbacks = dynamic_failure_callbacks
+ self.dynamic_success_callbacks = (
+ dynamic_success_callbacks # callbacks set for just that call
+ )
+ self.dynamic_async_success_callbacks = (
+ dynamic_async_success_callbacks # callbacks set for just that call
+ )
+ ## DYNAMIC LANGFUSE KEYS ##
+ self.langfuse_public_key = langfuse_public_key
+ self.langfuse_secret = langfuse_secret
+ ## TIME TO FIRST TOKEN LOGGING ##
+ self.completion_start_time: Optional[datetime.datetime] = None
+
+ def update_environment_variables(
+ self, model, user, optional_params, litellm_params, **additional_params
+ ):
+ self.optional_params = optional_params
+ self.model = model
+ self.user = user
+ self.litellm_params = litellm_params
+ self.logger_fn = litellm_params.get("logger_fn", None)
+ verbose_logger.debug(f"self.optional_params: {self.optional_params}")
+
+ self.model_call_details = {
+ "model": self.model,
+ "messages": self.messages,
+ "optional_params": self.optional_params,
+ "litellm_params": self.litellm_params,
+ "start_time": self.start_time,
+ "stream": self.stream,
+ "user": user,
+ "call_type": str(self.call_type),
+ "litellm_call_id": self.litellm_call_id,
+ "completion_start_time": self.completion_start_time,
+ **self.optional_params,
+ **additional_params,
+ }
+
+ ## check if stream options is set ## - used by CustomStreamWrapper for easy instrumentation
+ if "stream_options" in additional_params:
+ self.stream_options = additional_params["stream_options"]
+ ## check if custom pricing set ##
+ if (
+ litellm_params.get("input_cost_per_token") is not None
+ or litellm_params.get("input_cost_per_second") is not None
+ or litellm_params.get("output_cost_per_token") is not None
+ or litellm_params.get("output_cost_per_second") is not None
+ ):
+ self.custom_pricing = True
+
+ def _pre_call(self, input, api_key, model=None, additional_args={}):
+ """
+ Common helper function across the sync + async pre-call function
+ """
+ self.model_call_details["input"] = input
+ self.model_call_details["api_key"] = api_key
+ self.model_call_details["additional_args"] = additional_args
+ self.model_call_details["log_event_type"] = "pre_api_call"
+ if (
+ model
+ ): # if model name was changes pre-call, overwrite the initial model call name with the new one
+ self.model_call_details["model"] = model
+
+ def pre_call(self, input, api_key, model=None, additional_args={}):
+ # Log the exact input to the LLM API
+ litellm.error_logs["PRE_CALL"] = locals()
+ try:
+ self._pre_call(
+ input=input,
+ api_key=api_key,
+ model=model,
+ additional_args=additional_args,
+ )
+
+ # User Logging -> if you pass in a custom logging function
+ headers = additional_args.get("headers", {})
+ if headers is None:
+ headers = {}
+ data = additional_args.get("complete_input_dict", {})
+ api_base = additional_args.get("api_base", "")
+ self.model_call_details["litellm_params"]["api_base"] = str(
+ api_base
+ ) # used for alerting
+ masked_headers = {
+ k: (
+ (v[:-44] + "*" * 44)
+ if (isinstance(v, str) and len(v) > 44)
+ else "*****"
+ )
+ for k, v in headers.items()
+ }
+ formatted_headers = " ".join(
+ [f"-H '{k}: {v}'" for k, v in masked_headers.items()]
+ )
+
+ verbose_logger.debug(f"PRE-API-CALL ADDITIONAL ARGS: {additional_args}")
+
+ curl_command = "\n\nPOST Request Sent from LiteLLM:\n"
+ curl_command += "curl -X POST \\\n"
+ curl_command += f"{api_base} \\\n"
+ curl_command += (
+ f"{formatted_headers} \\\n" if formatted_headers.strip() != "" else ""
+ )
+ curl_command += f"-d '{str(data)}'\n"
+ if additional_args.get("request_str", None) is not None:
+ # print the sagemaker / bedrock client request
+ curl_command = "\nRequest Sent from LiteLLM:\n"
+ curl_command += additional_args.get("request_str", None)
+ elif api_base == "":
+ curl_command = self.model_call_details
+
+ if json_logs:
+ verbose_logger.debug(
+ "POST Request Sent from LiteLLM",
+ extra={"api_base": {api_base}, **masked_headers},
+ )
+ else:
+ verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
+ # log raw request to provider (like LangFuse) -- if opted in.
+ if log_raw_request_response is True:
+ try:
+ # [Non-blocking Extra Debug Information in metadata]
+ _litellm_params = self.model_call_details.get("litellm_params", {})
+ _metadata = _litellm_params.get("metadata", {}) or {}
+ if (
+ turn_off_message_logging is not None
+ and turn_off_message_logging is True
+ ):
+ _metadata["raw_request"] = (
+ "redacted by litellm. \
+ 'litellm.turn_off_message_logging=True'"
+ )
+ else:
+ _metadata["raw_request"] = str(curl_command)
+ except Exception as e:
+ _metadata["raw_request"] = (
+ "Unable to Log \
+ raw request: {}".format(
+ str(e)
+ )
+ )
+ if self.logger_fn and callable(self.logger_fn):
+ try:
+ self.logger_fn(
+ self.model_call_details
+ ) # Expectation: any logger function passed in by the user should accept a dict object
+ except Exception as e:
+ verbose_logger.error(
+ "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {}\n{}".format(
+ str(e), traceback.format_exc()
+ )
+ )
+ # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
+ callbacks = litellm.input_callback + self.dynamic_input_callbacks
+ for callback in callbacks:
+ try:
+ if callback == "supabase":
+ verbose_logger.debug("reaches supabase for logging!")
+ model = self.model_call_details["model"]
+ messages = self.model_call_details["input"]
+ verbose_logger.debug(f"supabaseClient: {supabaseClient}")
+ supabaseClient.input_log_event(
+ model=model,
+ messages=messages,
+ end_user=self.model_call_details.get("user", "default"),
+ litellm_call_id=self.litellm_params["litellm_call_id"],
+ print_verbose=print_verbose,
+ )
+ elif callback == "sentry" and add_breadcrumb:
+ try:
+ details_to_log = copy.deepcopy(self.model_call_details)
+ except:
+ details_to_log = self.model_call_details
+ if litellm.turn_off_message_logging:
+ # make a copy of the _model_Call_details and log it
+ details_to_log.pop("messages", None)
+ details_to_log.pop("input", None)
+ details_to_log.pop("prompt", None)
+
+ add_breadcrumb(
+ category="litellm.llm_call",
+ message=f"Model Call Details pre-call: {details_to_log}",
+ level="info",
+ )
+ elif isinstance(callback, CustomLogger): # custom logger class
+ callback.log_pre_api_call(
+ model=self.model,
+ messages=self.messages,
+ kwargs=self.model_call_details,
+ )
+ elif callable(callback): # custom logger functions
+ customLogger.log_input_event(
+ model=self.model,
+ messages=self.messages,
+ kwargs=self.model_call_details,
+ print_verbose=print_verbose,
+ callback_func=callback,
+ )
+ except Exception as e:
+ verbose_logger.error(
+ "litellm.Logging.pre_call(): Exception occured - {}\n{}".format(
+ str(e), traceback.format_exc()
+ )
+ )
+ verbose_logger.debug(
+ f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+ )
+ if capture_exception: # log this error to sentry for debugging
+ capture_exception(e)
+ except Exception:
+ verbose_logger.error(
+ "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {}\n{}".format(
+ str(e), traceback.format_exc()
+ )
+ )
+ verbose_logger.error(
+ f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+ )
+ if capture_exception: # log this error to sentry for debugging
+ capture_exception(e)
+
+ def post_call(
+ self, original_response, input=None, api_key=None, additional_args={}
+ ):
+ # Log the exact result from the LLM API, for streaming - log the type of response received
+ litellm.error_logs["POST_CALL"] = locals()
+ if isinstance(original_response, dict):
+ original_response = json.dumps(original_response)
+ try:
+ self.model_call_details["input"] = input
+ self.model_call_details["api_key"] = api_key
+ self.model_call_details["original_response"] = original_response
+ self.model_call_details["additional_args"] = additional_args
+ self.model_call_details["log_event_type"] = "post_api_call"
+
+ verbose_logger.debug(
+ "RAW RESPONSE:\n{}\n\n".format(
+ self.model_call_details.get(
+ "original_response", self.model_call_details
+ )
+ ),
+ )
+ if self.logger_fn and callable(self.logger_fn):
+ try:
+ self.logger_fn(
+ self.model_call_details
+ ) # Expectation: any logger function passed in by the user should accept a dict object
+ except Exception as e:
+ verbose_logger.debug(
+ "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {}\n{}".format(
+ str(e), traceback.format_exc()
+ )
+ )
+ original_response = redact_message_input_output_from_logging(
+ litellm_logging_obj=self, result=original_response
+ )
+ # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
+
+ callbacks = litellm.input_callback + self.dynamic_input_callbacks
+ for callback in callbacks:
+ try:
+ if callback == "sentry" and add_breadcrumb:
+ verbose_logger.debug("reaches sentry breadcrumbing")
+ try:
+ details_to_log = copy.deepcopy(self.model_call_details)
+ except:
+ details_to_log = self.model_call_details
+ if litellm.turn_off_message_logging:
+ # make a copy of the _model_Call_details and log it
+ details_to_log.pop("messages", None)
+ details_to_log.pop("input", None)
+ details_to_log.pop("prompt", None)
+
+ add_breadcrumb(
+ category="litellm.llm_call",
+ message=f"Model Call Details post-call: {details_to_log}",
+ level="info",
+ )
+ elif isinstance(callback, CustomLogger): # custom logger class
+ callback.log_post_api_call(
+ kwargs=self.model_call_details,
+ response_obj=None,
+ start_time=self.start_time,
+ end_time=None,
+ )
+ except Exception as e:
+ verbose_logger.error(
+ "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while post-call logging with integrations {}\n{}".format(
+ str(e), traceback.format_exc()
+ )
+ )
+ verbose_logger.debug(
+ f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+ )
+ if capture_exception: # log this error to sentry for debugging
+ capture_exception(e)
+ except Exception as e:
+ verbose_logger.error(
+ "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {}\n{}".format(
+ str(e), traceback.format_exc()
+ )
+ )
+
+ def _success_handler_helper_fn(
+ self, result=None, start_time=None, end_time=None, cache_hit=None
+ ):
+ try:
+ if start_time is None:
+ start_time = self.start_time
+ if end_time is None:
+ end_time = datetime.datetime.now()
+ if self.completion_start_time is None:
+ self.completion_start_time = end_time
+ self.model_call_details["completion_start_time"] = (
+ self.completion_start_time
+ )
+ self.model_call_details["log_event_type"] = "successful_api_call"
+ self.model_call_details["end_time"] = end_time
+ self.model_call_details["cache_hit"] = cache_hit
+ ## if model in model cost map - log the response cost
+ ## else set cost to None
+ verbose_logger.debug(f"Model={self.model};")
+ if (
+ result is not None
+ and (
+ isinstance(result, ModelResponse)
+ or isinstance(result, EmbeddingResponse)
+ or isinstance(result, ImageResponse)
+ or isinstance(result, TranscriptionResponse)
+ or isinstance(result, TextCompletionResponse)
+ )
+ and self.stream != True
+ ): # handle streaming separately
+ self.model_call_details["response_cost"] = (
+ litellm.response_cost_calculator(
+ response_object=result,
+ model=self.model,
+ cache_hit=self.model_call_details.get("cache_hit", False),
+ custom_llm_provider=self.model_call_details.get(
+ "custom_llm_provider", None
+ ),
+ base_model=_get_base_model_from_metadata(
+ model_call_details=self.model_call_details
+ ),
+ call_type=self.call_type,
+ optional_params=self.optional_params,
+ )
+ )
+ else: # streaming chunks + image gen.
+ self.model_call_details["response_cost"] = None
+
+ if (
+ litellm.max_budget
+ and self.stream == False
+ and result is not None
+ and "content" in result
+ ):
+ time_diff = (end_time - start_time).total_seconds()
+ float_diff = float(time_diff)
+ litellm._current_cost += litellm.completion_cost(
+ model=self.model,
+ prompt="",
+ completion=result["content"],
+ total_time=float_diff,
+ )
+
+ return start_time, end_time, result
+ except Exception as e:
+ raise Exception(f"[Non-Blocking] LiteLLM.Success_Call Error: {str(e)}")
+
+ def success_handler(
+ self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
+ ):
+ verbose_logger.debug(
+ f"Logging Details LiteLLM-Success Call: Cache_hit={cache_hit}"
+ )
+ start_time, end_time, result = self._success_handler_helper_fn(
+ start_time=start_time,
+ end_time=end_time,
+ result=result,
+ cache_hit=cache_hit,
+ )
+ # print(f"original response in success handler: {self.model_call_details['original_response']}")
+ try:
+ verbose_logger.debug(f"success callbacks: {litellm.success_callback}")
+ ## BUILD COMPLETE STREAMED RESPONSE
+ complete_streaming_response = None
+ if self.stream and isinstance(result, ModelResponse):
+ if (
+ result.choices[0].finish_reason is not None
+ ): # if it's the last chunk
+ self.sync_streaming_chunks.append(result)
+ # print_verbose(f"final set of received chunks: {self.sync_streaming_chunks}")
+ try:
+ complete_streaming_response = litellm.stream_chunk_builder(
+ self.sync_streaming_chunks,
+ messages=self.model_call_details.get("messages", None),
+ start_time=start_time,
+ end_time=end_time,
+ )
+ except Exception as e:
+ verbose_logger.error(
+ "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format(
+ str(e), traceback.format_exc()
+ ),
+ log_level="ERROR",
+ )
+ complete_streaming_response = None
+ else:
+ self.sync_streaming_chunks.append(result)
+
+ if complete_streaming_response is not None:
+ verbose_logger.debug(
+ f"Logging Details LiteLLM-Success Call streaming complete"
+ )
+ self.model_call_details["complete_streaming_response"] = (
+ complete_streaming_response
+ )
+ self.model_call_details["response_cost"] = (
+ litellm.response_cost_calculator(
+ response_object=complete_streaming_response,
+ model=self.model,
+ cache_hit=self.model_call_details.get("cache_hit", False),
+ custom_llm_provider=self.model_call_details.get(
+ "custom_llm_provider", None
+ ),
+ base_model=_get_base_model_from_metadata(
+ model_call_details=self.model_call_details
+ ),
+ call_type=self.call_type,
+ optional_params=self.optional_params,
+ )
+ )
+ if self.dynamic_success_callbacks is not None and isinstance(
+ self.dynamic_success_callbacks, list
+ ):
+ callbacks = self.dynamic_success_callbacks
+ ## keep the internal functions ##
+ for callback in litellm.success_callback:
+ if (
+ isinstance(callback, CustomLogger)
+ and "_PROXY_" in callback.__class__.__name__
+ ):
+ callbacks.append(callback)
+ else:
+ callbacks = litellm.success_callback
+
+ result = redact_message_input_output_from_logging(
+ result=result, litellm_logging_obj=self
+ )
+
+ for callback in callbacks:
+ try:
+ litellm_params = self.model_call_details.get("litellm_params", {})
+ if litellm_params.get("no-log", False) == True:
+ # proxy cost tracking cal backs should run
+ if not (
+ isinstance(callback, CustomLogger)
+ and "_PROXY_" in callback.__class__.__name__
+ ):
+ print_verbose("no-log request, skipping logging")
+ continue
+ if callback == "lite_debugger":
+ print_verbose("reaches lite_debugger for logging!")
+ print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
+ print_verbose(
+ f"liteDebuggerClient details function {self.call_type} and stream set to {self.stream}"
+ )
+ liteDebuggerClient.log_event(
+ end_user=kwargs.get("user", "default"),
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ litellm_call_id=self.litellm_call_id,
+ print_verbose=print_verbose,
+ call_type=self.call_type,
+ stream=self.stream,
+ )
+ if callback == "promptlayer":
+ print_verbose("reaches promptlayer for logging!")
+ promptLayerLogger.log_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ )
+ if callback == "supabase":
+ print_verbose("reaches supabase for logging!")
+ kwargs = self.model_call_details
+
+ # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+ if self.stream:
+ if "complete_streaming_response" not in kwargs:
+ continue
+ else:
+ print_verbose("reaches supabase for streaming logging!")
+ result = kwargs["complete_streaming_response"]
+
+ model = kwargs["model"]
+ messages = kwargs["messages"]
+ optional_params = kwargs.get("optional_params", {})
+ litellm_params = kwargs.get("litellm_params", {})
+ supabaseClient.log_event(
+ model=model,
+ messages=messages,
+ end_user=optional_params.get("user", "default"),
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ litellm_call_id=litellm_params.get(
+ "litellm_call_id", str(uuid.uuid4())
+ ),
+ print_verbose=print_verbose,
+ )
+ if callback == "wandb":
+ print_verbose("reaches wandb for logging!")
+ weightsBiasesLogger.log_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ )
+ if callback == "langsmith":
+ print_verbose("reaches langsmith for logging!")
+ if self.stream:
+ if "complete_streaming_response" not in kwargs:
+ continue
+ else:
+ print_verbose(
+ "reaches langsmith for streaming logging!"
+ )
+ result = kwargs["complete_streaming_response"]
+ langsmithLogger.log_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ )
+ if callback == "logfire":
+ global logfireLogger
+ verbose_logger.debug("reaches logfire for success logging!")
+ kwargs = {}
+ for k, v in self.model_call_details.items():
+ if (
+ k != "original_response"
+ ): # copy.deepcopy raises errors as this could be a coroutine
+ kwargs[k] = v
+
+ # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+ if self.stream:
+ if "complete_streaming_response" not in kwargs:
+ continue
+ else:
+ print_verbose("reaches logfire for streaming logging!")
+ result = kwargs["complete_streaming_response"]
+
+ logfireLogger.log_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ level=LogfireLevel.INFO.value,
+ )
+
+ if callback == "lunary":
+ print_verbose("reaches lunary for logging!")
+ model = self.model
+ kwargs = self.model_call_details
+
+ input = kwargs.get("messages", kwargs.get("input", None))
+
+ type = (
+ "embed"
+ if self.call_type == CallTypes.embedding.value
+ else "llm"
+ )
+
+ # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+ if self.stream:
+ if "complete_streaming_response" not in kwargs:
+ continue
+ else:
+ result = kwargs["complete_streaming_response"]
+
+ lunaryLogger.log_event(
+ type=type,
+ kwargs=kwargs,
+ event="end",
+ model=model,
+ input=input,
+ user_id=kwargs.get("user", None),
+ # user_props=self.model_call_details.get("user_props", None),
+ extra=kwargs.get("optional_params", {}),
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ run_id=self.litellm_call_id,
+ print_verbose=print_verbose,
+ )
+ if callback == "helicone":
+ print_verbose("reaches helicone for logging!")
+ model = self.model
+ messages = self.model_call_details["input"]
+ heliconeLogger.log_success(
+ model=model,
+ messages=messages,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ )
+ if callback == "langfuse":
+ global langFuseLogger
+ verbose_logger.debug("reaches langfuse for success logging!")
+ kwargs = {}
+ for k, v in self.model_call_details.items():
+ if (
+ k != "original_response"
+ ): # copy.deepcopy raises errors as this could be a coroutine
+ kwargs[k] = v
+ # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+ if self.stream:
+ verbose_logger.debug(
+ f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+ )
+ if complete_streaming_response is None:
+ continue
+ else:
+ print_verbose("reaches langfuse for streaming logging!")
+ result = kwargs["complete_streaming_response"]
+ if langFuseLogger is None or (
+ (
+ self.langfuse_public_key is not None
+ and self.langfuse_public_key
+ != langFuseLogger.public_key
+ )
+ and (
+ self.langfuse_public_key is not None
+ and self.langfuse_public_key
+ != langFuseLogger.public_key
+ )
+ ):
+ langFuseLogger = LangFuseLogger(
+ langfuse_public_key=self.langfuse_public_key,
+ langfuse_secret=self.langfuse_secret,
+ )
+ langFuseLogger.log_event(
+ kwargs=kwargs,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ user_id=kwargs.get("user", None),
+ print_verbose=print_verbose,
+ )
+ if callback == "datadog":
+ global dataDogLogger
+ verbose_logger.debug("reaches datadog for success logging!")
+ kwargs = {}
+ for k, v in self.model_call_details.items():
+ if (
+ k != "original_response"
+ ): # copy.deepcopy raises errors as this could be a coroutine
+ kwargs[k] = v
+ # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+ if self.stream:
+ verbose_logger.debug(
+ f"datadog: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+ )
+ if complete_streaming_response is None:
+ continue
+ else:
+ print_verbose("reaches datadog for streaming logging!")
+ result = kwargs["complete_streaming_response"]
+ dataDogLogger.log_event(
+ kwargs=kwargs,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ user_id=kwargs.get("user", None),
+ print_verbose=print_verbose,
+ )
+ if callback == "prometheus":
+ verbose_logger.debug("reaches prometheus for success logging!")
+ kwargs = {}
+ for k, v in self.model_call_details.items():
+ if (
+ k != "original_response"
+ ): # copy.deepcopy raises errors as this could be a coroutine
+ kwargs[k] = v
+ # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+ if self.stream:
+ verbose_logger.debug(
+ f"prometheus: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+ )
+ if complete_streaming_response is None:
+ continue
+ else:
+ print_verbose(
+ "reaches prometheus for streaming logging!"
+ )
+ result = kwargs["complete_streaming_response"]
+ prometheusLogger.log_event(
+ kwargs=kwargs,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ user_id=kwargs.get("user", None),
+ print_verbose=print_verbose,
+ )
+ if callback == "generic":
+ global genericAPILogger
+ verbose_logger.debug("reaches langfuse for success logging!")
+ kwargs = {}
+ for k, v in self.model_call_details.items():
+ if (
+ k != "original_response"
+ ): # copy.deepcopy raises errors as this could be a coroutine
+ kwargs[k] = v
+ # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+ if self.stream:
+ verbose_logger.debug(
+ f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+ )
+ if complete_streaming_response is None:
+ continue
+ else:
+ print_verbose("reaches langfuse for streaming logging!")
+ result = kwargs["complete_streaming_response"]
+ if genericAPILogger is None:
+ genericAPILogger = GenericAPILogger()
+ genericAPILogger.log_event(
+ kwargs=kwargs,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ user_id=kwargs.get("user", None),
+ print_verbose=print_verbose,
+ )
+ if callback == "clickhouse":
+ global clickHouseLogger
+ verbose_logger.debug("reaches clickhouse for success logging!")
+ kwargs = {}
+ for k, v in self.model_call_details.items():
+ if (
+ k != "original_response"
+ ): # copy.deepcopy raises errors as this could be a coroutine
+ kwargs[k] = v
+ # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+ if self.stream:
+ verbose_logger.debug(
+ f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+ )
+ if complete_streaming_response is None:
+ continue
+ else:
+ print_verbose(
+ "reaches clickhouse for streaming logging!"
+ )
+ result = kwargs["complete_streaming_response"]
+ if clickHouseLogger is None:
+ clickHouseLogger = ClickhouseLogger()
+ clickHouseLogger.log_event(
+ kwargs=kwargs,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ user_id=kwargs.get("user", None),
+ print_verbose=print_verbose,
+ )
+ if callback == "greenscale":
+ kwargs = {}
+ for k, v in self.model_call_details.items():
+ if (
+ k != "original_response"
+ ): # copy.deepcopy raises errors as this could be a coroutine
+ kwargs[k] = v
+ # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+ if self.stream:
+ verbose_logger.debug(
+ f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
+ )
+ if complete_streaming_response is None:
+ continue
+ else:
+ print_verbose(
+ "reaches greenscale for streaming logging!"
+ )
+ result = kwargs["complete_streaming_response"]
+
+ greenscaleLogger.log_event(
+ kwargs=kwargs,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ )
+ if callback == "cache" and litellm.cache is not None:
+ # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+ print_verbose("success_callback: reaches cache for logging!")
+ kwargs = self.model_call_details
+ if self.stream:
+ if "complete_streaming_response" not in kwargs:
+ print_verbose(
+ f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
+ )
+ pass
+ else:
+ print_verbose(
+ "success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
+ )
+ result = kwargs["complete_streaming_response"]
+ # only add to cache once we have a complete streaming response
+ litellm.cache.add_cache(result, **kwargs)
+ if callback == "athina":
+ deep_copy = {}
+ for k, v in self.model_call_details.items():
+ deep_copy[k] = v
+ athinaLogger.log_event(
+ kwargs=deep_copy,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ )
+ if callback == "traceloop":
+ deep_copy = {}
+ for k, v in self.model_call_details.items():
+ if k != "original_response":
+ deep_copy[k] = v
+ traceloopLogger.log_event(
+ kwargs=deep_copy,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ user_id=kwargs.get("user", None),
+ print_verbose=print_verbose,
+ )
+ if callback == "s3":
+ global s3Logger
+ if s3Logger is None:
+ s3Logger = S3Logger()
+ if self.stream:
+ if "complete_streaming_response" in self.model_call_details:
+ print_verbose(
+ "S3Logger Logger: Got Stream Event - Completed Stream Response"
+ )
+ s3Logger.log_event(
+ kwargs=self.model_call_details,
+ response_obj=self.model_call_details[
+ "complete_streaming_response"
+ ],
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ )
+ else:
+ print_verbose(
+ "S3Logger Logger: Got Stream Event - No complete stream response as yet"
+ )
+ else:
+ s3Logger.log_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ )
+ if (
+ callback == "openmeter"
+ and self.model_call_details.get("litellm_params", {}).get(
+ "acompletion", False
+ )
+ == False
+ and self.model_call_details.get("litellm_params", {}).get(
+ "aembedding", False
+ )
+ == False
+ and self.model_call_details.get("litellm_params", {}).get(
+ "aimage_generation", False
+ )
+ == False
+ and self.model_call_details.get("litellm_params", {}).get(
+ "atranscription", False
+ )
+ == False
+ ):
+ global openMeterLogger
+ if openMeterLogger is None:
+ print_verbose("Instantiates openmeter client")
+ openMeterLogger = OpenMeterLogger()
+ if self.stream and complete_streaming_response is None:
+ openMeterLogger.log_stream_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ )
+ else:
+ if self.stream and complete_streaming_response:
+ self.model_call_details["complete_response"] = (
+ self.model_call_details.get(
+ "complete_streaming_response", {}
+ )
+ )
+ result = self.model_call_details["complete_response"]
+ openMeterLogger.log_success_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ )
+
+ if (
+ isinstance(callback, CustomLogger)
+ and self.model_call_details.get("litellm_params", {}).get(
+ "acompletion", False
+ )
+ == False
+ and self.model_call_details.get("litellm_params", {}).get(
+ "aembedding", False
+ )
+ == False
+ and self.model_call_details.get("litellm_params", {}).get(
+ "aimage_generation", False
+ )
+ == False
+ and self.model_call_details.get("litellm_params", {}).get(
+ "atranscription", False
+ )
+ == False
+ ): # custom logger class
+ if self.stream and complete_streaming_response is None:
+ callback.log_stream_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ )
+ else:
+ if self.stream and complete_streaming_response:
+ self.model_call_details["complete_response"] = (
+ self.model_call_details.get(
+ "complete_streaming_response", {}
+ )
+ )
+ result = self.model_call_details["complete_response"]
+ callback.log_success_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ )
+ if (
+ callable(callback) == True
+ and self.model_call_details.get("litellm_params", {}).get(
+ "acompletion", False
+ )
+ == False
+ and self.model_call_details.get("litellm_params", {}).get(
+ "aembedding", False
+ )
+ == False
+ and self.model_call_details.get("litellm_params", {}).get(
+ "aimage_generation", False
+ )
+ == False
+ and self.model_call_details.get("litellm_params", {}).get(
+ "atranscription", False
+ )
+ == False
+ ): # custom logger functions
+ print_verbose(
+ f"success callbacks: Running Custom Callback Function"
+ )
+ customLogger.log_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ callback_func=callback,
+ )
+
+ except Exception as e:
+ print_verbose(
+ f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging with integrations {traceback.format_exc()}"
+ )
+ print_verbose(
+ f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+ )
+ if capture_exception: # log this error to sentry for debugging
+ capture_exception(e)
+ except:
+ verbose_logger.error(
+ "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {}\n{}".format(
+ str(e), traceback.format_exc()
+ ),
+ )
+
+ async def async_success_handler(
+ self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
+ ):
+ """
+ Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions.
+ """
+ print_verbose("Logging Details LiteLLM-Async Success Call")
+ start_time, end_time, result = self._success_handler_helper_fn(
+ start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit
+ )
+ ## BUILD COMPLETE STREAMED RESPONSE
+ complete_streaming_response = None
+ if self.stream:
+ if result.choices[0].finish_reason is not None: # if it's the last chunk
+ self.streaming_chunks.append(result)
+ # verbose_logger.debug(f"final set of received chunks: {self.streaming_chunks}")
+ try:
+ complete_streaming_response = litellm.stream_chunk_builder(
+ self.streaming_chunks,
+ messages=self.model_call_details.get("messages", None),
+ start_time=start_time,
+ end_time=end_time,
+ )
+ except Exception as e:
+ print_verbose(
+ "Error occurred building stream chunk in success logging: {}\n{}".format(
+ str(e), traceback.format_exc()
+ ),
+ log_level="ERROR",
+ )
+ complete_streaming_response = None
+ else:
+ self.streaming_chunks.append(result)
+ if complete_streaming_response is not None:
+ print_verbose("Async success callbacks: Got a complete streaming response")
+ self.model_call_details["async_complete_streaming_response"] = (
+ complete_streaming_response
+ )
+ try:
+ if self.model_call_details.get("cache_hit", False) is True:
+ self.model_call_details["response_cost"] = 0.0
+ else:
+ # check if base_model set on azure
+ base_model = _get_base_model_from_metadata(
+ model_call_details=self.model_call_details
+ )
+ # base_model defaults to None if not set on model_info
+ self.model_call_details["response_cost"] = litellm.completion_cost(
+ completion_response=complete_streaming_response,
+ model=base_model,
+ )
+ verbose_logger.debug(
+ f"Model={self.model}; cost={self.model_call_details['response_cost']}"
+ )
+ except litellm.NotFoundError as e:
+ verbose_logger.error(
+ f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None"
+ )
+ self.model_call_details["response_cost"] = None
+
+ if self.dynamic_async_success_callbacks is not None and isinstance(
+ self.dynamic_async_success_callbacks, list
+ ):
+ callbacks = self.dynamic_async_success_callbacks
+ ## keep the internal functions ##
+ for callback in litellm._async_success_callback:
+ callback_name = ""
+ if isinstance(callback, CustomLogger):
+ callback_name = callback.__class__.__name__
+ if callable(callback):
+ callback_name = callback.__name__
+ if "_PROXY_" in callback_name:
+ callbacks.append(callback)
+ else:
+ callbacks = litellm._async_success_callback
+
+ result = redact_message_input_output_from_logging(
+ result=result, litellm_logging_obj=self
+ )
+
+ for callback in callbacks:
+ # check if callback can run for this request
+ litellm_params = self.model_call_details.get("litellm_params", {})
+ if litellm_params.get("no-log", False) == True:
+ # proxy cost tracking cal backs should run
+ if not (
+ isinstance(callback, CustomLogger)
+ and "_PROXY_" in callback.__class__.__name__
+ ):
+ print_verbose("no-log request, skipping logging")
+ continue
+ try:
+ if kwargs.get("no-log", False) == True:
+ print_verbose("no-log request, skipping logging")
+ continue
+ if callback == "cache" and litellm.cache is not None:
+ # set_cache once complete streaming response is built
+ print_verbose("async success_callback: reaches cache for logging!")
+ kwargs = self.model_call_details
+ if self.stream:
+ if "async_complete_streaming_response" not in kwargs:
+ print_verbose(
+ f"async success_callback: reaches cache for logging, there is no async_complete_streaming_response. Kwargs={kwargs}\n\n"
+ )
+ pass
+ else:
+ print_verbose(
+ "async success_callback: reaches cache for logging, there is a async_complete_streaming_response. Adding to cache"
+ )
+ result = kwargs["async_complete_streaming_response"]
+ # only add to cache once we have a complete streaming response
+ if litellm.cache is not None and not isinstance(
+ litellm.cache.cache, S3Cache
+ ):
+ await litellm.cache.async_add_cache(result, **kwargs)
+ else:
+ litellm.cache.add_cache(result, **kwargs)
+ if callback == "openmeter":
+ global openMeterLogger
+ if self.stream == True:
+ if (
+ "async_complete_streaming_response"
+ in self.model_call_details
+ ):
+ await openMeterLogger.async_log_success_event(
+ kwargs=self.model_call_details,
+ response_obj=self.model_call_details[
+ "async_complete_streaming_response"
+ ],
+ start_time=start_time,
+ end_time=end_time,
+ )
+ else:
+ await openMeterLogger.async_log_stream_event( # [TODO]: move this to being an async log stream event function
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ )
+ else:
+ await openMeterLogger.async_log_success_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ )
+ if isinstance(callback, CustomLogger): # custom logger class
+ if self.stream == True:
+ if (
+ "async_complete_streaming_response"
+ in self.model_call_details
+ ):
+ await callback.async_log_success_event(
+ kwargs=self.model_call_details,
+ response_obj=self.model_call_details[
+ "async_complete_streaming_response"
+ ],
+ start_time=start_time,
+ end_time=end_time,
+ )
+ else:
+ await callback.async_log_stream_event( # [TODO]: move this to being an async log stream event function
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ )
+ else:
+ await callback.async_log_success_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ )
+ if callable(callback): # custom logger functions
+ if self.stream:
+ if (
+ "async_complete_streaming_response"
+ in self.model_call_details
+ ):
+ await customLogger.async_log_event(
+ kwargs=self.model_call_details,
+ response_obj=self.model_call_details[
+ "async_complete_streaming_response"
+ ],
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ callback_func=callback,
+ )
+ else:
+ await customLogger.async_log_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ callback_func=callback,
+ )
+ if callback == "dynamodb":
+ global dynamoLogger
+ if dynamoLogger is None:
+ dynamoLogger = DyanmoDBLogger()
+ if self.stream:
+ if (
+ "async_complete_streaming_response"
+ in self.model_call_details
+ ):
+ print_verbose(
+ "DynamoDB Logger: Got Stream Event - Completed Stream Response"
+ )
+ await dynamoLogger._async_log_event(
+ kwargs=self.model_call_details,
+ response_obj=self.model_call_details[
+ "async_complete_streaming_response"
+ ],
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ )
+ else:
+ print_verbose(
+ "DynamoDB Logger: Got Stream Event - No complete stream response as yet"
+ )
+ else:
+ await dynamoLogger._async_log_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ )
+ except Exception as e:
+ verbose_logger.error(
+ f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
+ )
+ pass
+
+ def _failure_handler_helper_fn(
+ self, exception, traceback_exception, start_time=None, end_time=None
+ ):
+ if start_time is None:
+ start_time = self.start_time
+ if end_time is None:
+ end_time = datetime.datetime.now()
+
+ # on some exceptions, model_call_details is not always initialized, this ensures that we still log those exceptions
+ if not hasattr(self, "model_call_details"):
+ self.model_call_details = {}
+
+ self.model_call_details["log_event_type"] = "failed_api_call"
+ self.model_call_details["exception"] = exception
+ self.model_call_details["traceback_exception"] = traceback_exception
+ self.model_call_details["end_time"] = end_time
+ self.model_call_details.setdefault("original_response", None)
+ return start_time, end_time
+
+ def failure_handler(
+ self, exception, traceback_exception, start_time=None, end_time=None
+ ):
+ verbose_logger.debug(
+ f"Logging Details LiteLLM-Failure Call: {litellm.failure_callback}"
+ )
+ try:
+ start_time, end_time = self._failure_handler_helper_fn(
+ exception=exception,
+ traceback_exception=traceback_exception,
+ start_time=start_time,
+ end_time=end_time,
+ )
+ callbacks = [] # init this to empty incase it's not created
+
+ if self.dynamic_failure_callbacks is not None and isinstance(
+ self.dynamic_failure_callbacks, list
+ ):
+ callbacks = self.dynamic_failure_callbacks
+ ## keep the internal functions ##
+ for callback in litellm.failure_callback:
+ if (
+ isinstance(callback, CustomLogger)
+ and "_PROXY_" in callback.__class__.__name__
+ ):
+ callbacks.append(callback)
+ else:
+ callbacks = litellm.failure_callback
+
+ result = None # result sent to all loggers, init this to None incase it's not created
+
+ result = redact_message_input_output_from_logging(
+ result=result, litellm_logging_obj=self
+ )
+ for callback in callbacks:
+ try:
+ if callback == "lite_debugger":
+ print_verbose("reaches lite_debugger for logging!")
+ print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
+ result = {
+ "model": self.model,
+ "created": time.time(),
+ "error": traceback_exception,
+ "usage": {
+ "prompt_tokens": prompt_token_calculator(
+ self.model, messages=self.messages
+ ),
+ "completion_tokens": 0,
+ },
+ }
+ liteDebuggerClient.log_event(
+ model=self.model,
+ messages=self.messages,
+ end_user=self.model_call_details.get("user", "default"),
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ litellm_call_id=self.litellm_call_id,
+ print_verbose=print_verbose,
+ call_type=self.call_type,
+ stream=self.stream,
+ )
+ if callback == "lunary":
+ print_verbose("reaches lunary for logging error!")
+
+ model = self.model
+
+ input = self.model_call_details["input"]
+
+ _type = (
+ "embed"
+ if self.call_type == CallTypes.embedding.value
+ else "llm"
+ )
+
+ lunaryLogger.log_event(
+ type=_type,
+ event="error",
+ user_id=self.model_call_details.get("user", "default"),
+ model=model,
+ input=input,
+ error=traceback_exception,
+ run_id=self.litellm_call_id,
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ )
+ if callback == "sentry":
+ print_verbose("sending exception to sentry")
+ if capture_exception:
+ capture_exception(exception)
+ else:
+ print_verbose(
+ f"capture exception not initialized: {capture_exception}"
+ )
+ elif callback == "supabase":
+ print_verbose("reaches supabase for logging!")
+ print_verbose(f"supabaseClient: {supabaseClient}")
+ result = {
+ "model": model,
+ "created": time.time(),
+ "error": traceback_exception,
+ "usage": {
+ "prompt_tokens": prompt_token_calculator(
+ model, messages=self.messages
+ ),
+ "completion_tokens": 0,
+ },
+ }
+ supabaseClient.log_event(
+ model=self.model,
+ messages=self.messages,
+ end_user=self.model_call_details.get("user", "default"),
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ litellm_call_id=self.model_call_details["litellm_call_id"],
+ print_verbose=print_verbose,
+ )
+ if callable(callback): # custom logger functions
+ customLogger.log_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ callback_func=callback,
+ )
+ if (
+ isinstance(callback, CustomLogger)
+ and self.model_call_details.get("litellm_params", {}).get(
+ "acompletion", False
+ )
+ == False
+ and self.model_call_details.get("litellm_params", {}).get(
+ "aembedding", False
+ )
+ == False
+ ): # custom logger class
+ callback.log_failure_event(
+ start_time=start_time,
+ end_time=end_time,
+ response_obj=result,
+ kwargs=self.model_call_details,
+ )
+ if callback == "langfuse":
+ global langFuseLogger
+ verbose_logger.debug("reaches langfuse for logging failure")
+ kwargs = {}
+ for k, v in self.model_call_details.items():
+ if (
+ k != "original_response"
+ ): # copy.deepcopy raises errors as this could be a coroutine
+ kwargs[k] = v
+ # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+ if langFuseLogger is None or (
+ (
+ self.langfuse_public_key is not None
+ and self.langfuse_public_key
+ != langFuseLogger.public_key
+ )
+ and (
+ self.langfuse_public_key is not None
+ and self.langfuse_public_key
+ != langFuseLogger.public_key
+ )
+ ):
+ langFuseLogger = LangFuseLogger(
+ langfuse_public_key=self.langfuse_public_key,
+ langfuse_secret=self.langfuse_secret,
+ )
+ langFuseLogger.log_event(
+ start_time=start_time,
+ end_time=end_time,
+ response_obj=None,
+ user_id=kwargs.get("user", None),
+ print_verbose=print_verbose,
+ status_message=str(exception),
+ level="ERROR",
+ kwargs=self.model_call_details,
+ )
+ if callback == "traceloop":
+ traceloopLogger.log_event(
+ start_time=start_time,
+ end_time=end_time,
+ response_obj=None,
+ user_id=kwargs.get("user", None),
+ print_verbose=print_verbose,
+ status_message=str(exception),
+ level="ERROR",
+ kwargs=self.model_call_details,
+ )
+ if callback == "prometheus":
+ global prometheusLogger
+ verbose_logger.debug("reaches prometheus for success logging!")
+ kwargs = {}
+ for k, v in self.model_call_details.items():
+ if (
+ k != "original_response"
+ ): # copy.deepcopy raises errors as this could be a coroutine
+ kwargs[k] = v
+ kwargs["exception"] = str(exception)
+ prometheusLogger.log_event(
+ kwargs=kwargs,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ user_id=kwargs.get("user", None),
+ print_verbose=print_verbose,
+ )
+
+ if callback == "logfire":
+ verbose_logger.debug("reaches logfire for failure logging!")
+ kwargs = {}
+ for k, v in self.model_call_details.items():
+ if (
+ k != "original_response"
+ ): # copy.deepcopy raises errors as this could be a coroutine
+ kwargs[k] = v
+ kwargs["exception"] = exception
+
+ logfireLogger.log_event(
+ kwargs=kwargs,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ level=LogfireLevel.ERROR.value,
+ print_verbose=print_verbose,
+ )
+ except Exception as e:
+ print_verbose(
+ f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}"
+ )
+ print_verbose(
+ f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+ )
+ if capture_exception: # log this error to sentry for debugging
+ capture_exception(e)
+ except Exception as e:
+ verbose_logger.error(
+ "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging {}\n{}".format(
+ str(e), traceback.format_exc()
+ )
+ )
+
+ async def async_failure_handler(
+ self, exception, traceback_exception, start_time=None, end_time=None
+ ):
+ """
+ Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions.
+ """
+ start_time, end_time = self._failure_handler_helper_fn(
+ exception=exception,
+ traceback_exception=traceback_exception,
+ start_time=start_time,
+ end_time=end_time,
+ )
+ result = None # result sent to all loggers, init this to None incase it's not created
+ for callback in litellm._async_failure_callback:
+ try:
+ if isinstance(callback, CustomLogger): # custom logger class
+ await callback.async_log_failure_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ ) # type: ignore
+ if callable(callback): # custom logger functions
+ await customLogger.async_log_event(
+ kwargs=self.model_call_details,
+ response_obj=result,
+ start_time=start_time,
+ end_time=end_time,
+ print_verbose=print_verbose,
+ callback_func=callback,
+ )
+ except Exception as e:
+ verbose_logger.error(
+ "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success \
+ logging {}\n{}\nCallback={}".format(
+ str(e), traceback.format_exc(), callback
+ )
+ )
+
+
+def set_callbacks(callback_list, function_id=None):
+ """
+ Globally sets the callback client
+ """
+ global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger
+
+ try:
+ for callback in callback_list:
+ print_verbose(f"init callback list: {callback}")
+ if callback == "sentry":
+ try:
+ import sentry_sdk
+ except ImportError:
+ print_verbose("Package 'sentry_sdk' is missing. Installing it...")
+ subprocess.check_call(
+ [sys.executable, "-m", "pip", "install", "sentry_sdk"]
+ )
+ import sentry_sdk
+ sentry_sdk_instance = sentry_sdk
+ sentry_trace_rate = (
+ os.environ.get("SENTRY_API_TRACE_RATE")
+ if "SENTRY_API_TRACE_RATE" in os.environ
+ else "1.0"
+ )
+ sentry_sdk_instance.init(
+ dsn=os.environ.get("SENTRY_DSN"),
+ traces_sample_rate=float(sentry_trace_rate),
+ )
+ capture_exception = sentry_sdk_instance.capture_exception
+ add_breadcrumb = sentry_sdk_instance.add_breadcrumb
+ elif callback == "posthog":
+ try:
+ from posthog import Posthog
+ except ImportError:
+ print_verbose("Package 'posthog' is missing. Installing it...")
+ subprocess.check_call(
+ [sys.executable, "-m", "pip", "install", "posthog"]
+ )
+ from posthog import Posthog
+ posthog = Posthog(
+ project_api_key=os.environ.get("POSTHOG_API_KEY"),
+ host=os.environ.get("POSTHOG_API_URL"),
+ )
+ elif callback == "slack":
+ try:
+ from slack_bolt import App
+ except ImportError:
+ print_verbose("Package 'slack_bolt' is missing. Installing it...")
+ subprocess.check_call(
+ [sys.executable, "-m", "pip", "install", "slack_bolt"]
+ )
+ from slack_bolt import App
+ slack_app = App(
+ token=os.environ.get("SLACK_API_TOKEN"),
+ signing_secret=os.environ.get("SLACK_API_SECRET"),
+ )
+ alerts_channel = os.environ["SLACK_API_CHANNEL"]
+ print_verbose(f"Initialized Slack App: {slack_app}")
+ elif callback == "traceloop":
+ traceloopLogger = TraceloopLogger()
+ elif callback == "athina":
+ athinaLogger = AthinaLogger()
+ print_verbose("Initialized Athina Logger")
+ elif callback == "helicone":
+ heliconeLogger = HeliconeLogger()
+ elif callback == "lunary":
+ lunaryLogger = LunaryLogger()
+ elif callback == "promptlayer":
+ promptLayerLogger = PromptLayerLogger()
+ elif callback == "langfuse":
+ langFuseLogger = LangFuseLogger()
+ elif callback == "openmeter":
+ openMeterLogger = OpenMeterLogger()
+ elif callback == "datadog":
+ dataDogLogger = DataDogLogger()
+ elif callback == "prometheus":
+ if prometheusLogger is None:
+ prometheusLogger = PrometheusLogger()
+ elif callback == "dynamodb":
+ dynamoLogger = DyanmoDBLogger()
+ elif callback == "s3":
+ s3Logger = S3Logger()
+ elif callback == "wandb":
+ weightsBiasesLogger = WeightsBiasesLogger()
+ elif callback == "langsmith":
+ langsmithLogger = LangsmithLogger()
+ elif callback == "logfire":
+ logfireLogger = LogfireLogger()
+ elif callback == "aispend":
+ aispendLogger = AISpendLogger()
+ elif callback == "berrispend":
+ berrispendLogger = BerriSpendLogger()
+ elif callback == "supabase":
+ print_verbose("instantiating supabase")
+ supabaseClient = Supabase()
+ elif callback == "greenscale":
+ greenscaleLogger = GreenscaleLogger()
+ print_verbose("Initialized Greenscale Logger")
+ elif callback == "lite_debugger":
+ print_verbose("instantiating lite_debugger")
+ if function_id:
+ liteDebuggerClient = LiteDebugger(email=function_id)
+ elif litellm.token:
+ liteDebuggerClient = LiteDebugger(email=litellm.token)
+ elif litellm.email:
+ liteDebuggerClient = LiteDebugger(email=litellm.email)
+ else:
+ liteDebuggerClient = LiteDebugger(email=str(uuid.uuid4()))
+ elif callable(callback):
+ customLogger = CustomLogger()
+ except Exception as e:
+ raise e
diff --git a/litellm/litellm_core_utils/redact_messages.py b/litellm/litellm_core_utils/redact_messages.py
index 9c0df2011..8f270d8be 100644
--- a/litellm/litellm_core_utils/redact_messages.py
+++ b/litellm/litellm_core_utils/redact_messages.py
@@ -12,7 +12,9 @@ from typing import TYPE_CHECKING, Any
import litellm
if TYPE_CHECKING:
- from litellm.utils import Logging as _LiteLLMLoggingObject
+ from litellm.litellm_core_utils.litellm_logging import (
+ Logging as _LiteLLMLoggingObject,
+ )
LiteLLMLoggingObject = _LiteLLMLoggingObject
else:
diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py
index 236f7cd4f..808813c05 100644
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@@ -5,7 +5,9 @@ import requests, copy # type: ignore
import time
from functools import partial
from typing import Callable, Optional, List, Union
-from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
+import litellm.litellm_core_utils
+from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
import litellm
from .prompt_templates.factory import prompt_factory, custom_prompt
from litellm.llms.custom_httpx.http_handler import (
@@ -205,7 +207,7 @@ class AnthropicChatCompletion(BaseLLM):
response: Union[requests.Response, httpx.Response],
model_response: ModelResponse,
stream: bool,
- logging_obj: litellm.utils.Logging,
+ logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
optional_params: dict,
api_key: str,
data: Union[dict, str],
@@ -320,7 +322,7 @@ class AnthropicChatCompletion(BaseLLM):
response: Union[requests.Response, httpx.Response],
model_response: ModelResponse,
stream: bool,
- logging_obj: litellm.utils.Logging,
+ logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
optional_params: dict,
api_key: str,
data: Union[dict, str],
diff --git a/litellm/llms/base.py b/litellm/llms/base.py
index 8c2f5101e..0222d2366 100644
--- a/litellm/llms/base.py
+++ b/litellm/llms/base.py
@@ -2,7 +2,7 @@
import litellm
import httpx, requests
from typing import Optional, Union
-from litellm.utils import Logging
+from litellm.litellm_core_utils.litellm_logging import Logging
class BaseLLM:
diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py
index 4314032e7..73fa18023 100644
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@@ -5,12 +5,10 @@ import time, uuid
from typing import Callable, Optional, Any, Union, List
import litellm
from litellm.utils import (
- ModelResponse,
get_secret,
- Usage,
- ImageResponse,
- map_finish_reason,
)
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
+from litellm.types.utils import ImageResponse, ModelResponse, Usage
from .prompt_templates.factory import (
prompt_factory,
custom_prompt,
@@ -633,7 +631,11 @@ def init_bedrock_client(
config = boto3.session.Config()
### CHECK STS ###
- if aws_web_identity_token is not None and aws_role_name is not None and aws_session_name is not None:
+ if (
+ aws_web_identity_token is not None
+ and aws_role_name is not None
+ and aws_session_name is not None
+ ):
oidc_token = get_secret(aws_web_identity_token)
if oidc_token is None:
@@ -642,9 +644,7 @@ def init_bedrock_client(
status_code=401,
)
- sts_client = boto3.client(
- "sts"
- )
+ sts_client = boto3.client("sts")
# https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sts/client/assume_role_with_web_identity.html
diff --git a/litellm/llms/bedrock_httpx.py b/litellm/llms/bedrock_httpx.py
index 7c7210f84..510bf7c7c 100644
--- a/litellm/llms/bedrock_httpx.py
+++ b/litellm/llms/bedrock_httpx.py
@@ -22,13 +22,12 @@ from typing import (
from litellm.utils import (
ModelResponse,
Usage,
- map_finish_reason,
CustomStreamWrapper,
- Message,
- Choices,
get_secret,
- Logging,
)
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
+from litellm.litellm_core_utils.litellm_logging import Logging
+from litellm.types.utils import Message, Choices
import litellm, uuid
from .prompt_templates.factory import (
prompt_factory,
diff --git a/litellm/llms/databricks.py b/litellm/llms/databricks.py
index 4fe475259..1ab09246b 100644
--- a/litellm/llms/databricks.py
+++ b/litellm/llms/databricks.py
@@ -10,10 +10,10 @@ from typing import Callable, Optional, List, Union, Tuple, Literal
from litellm.utils import (
ModelResponse,
Usage,
- map_finish_reason,
CustomStreamWrapper,
EmbeddingResponse,
)
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
import litellm
from .prompt_templates.factory import prompt_factory, custom_prompt
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
@@ -289,7 +289,7 @@ class DatabricksChatCompletion(BaseLLM):
response: Union[requests.Response, httpx.Response],
model_response: ModelResponse,
stream: bool,
- logging_obj: litellm.utils.Logging,
+ logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
optional_params: dict,
api_key: str,
data: Union[dict, str],
diff --git a/litellm/llms/predibase.py b/litellm/llms/predibase.py
index 66c28acee..8ad294457 100644
--- a/litellm/llms/predibase.py
+++ b/litellm/llms/predibase.py
@@ -12,11 +12,11 @@ from typing import Callable, Optional, List, Literal, Union
from litellm.utils import (
ModelResponse,
Usage,
- map_finish_reason,
CustomStreamWrapper,
Message,
Choices,
)
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
import litellm
from .prompt_templates.factory import prompt_factory, custom_prompt
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
@@ -198,7 +198,7 @@ class PredibaseChatCompletion(BaseLLM):
response: Union[requests.Response, httpx.Response],
model_response: ModelResponse,
stream: bool,
- logging_obj: litellm.utils.Logging,
+ logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
optional_params: dict,
api_key: str,
data: Union[dict, str],
diff --git a/litellm/llms/triton.py b/litellm/llms/triton.py
index 711186b3f..d647c9c43 100644
--- a/litellm/llms/triton.py
+++ b/litellm/llms/triton.py
@@ -4,7 +4,6 @@ from enum import Enum
import requests, copy # type: ignore
import time
from typing import Callable, Optional, List
-from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
import litellm
from .prompt_templates.factory import prompt_factory, custom_prompt
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py
index 67a8a4519..60d3d5897 100644
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@@ -5,7 +5,8 @@ import requests # type: ignore
import time
from typing import Callable, Optional, Union, List, Literal, Any
from pydantic import BaseModel
-from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
+from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
import litellm, uuid
import httpx, inspect # type: ignore
from litellm.types.llms.vertex_ai import *
diff --git a/litellm/llms/vertex_ai_anthropic.py b/litellm/llms/vertex_ai_anthropic.py
index 065294280..fd43d4378 100644
--- a/litellm/llms/vertex_ai_anthropic.py
+++ b/litellm/llms/vertex_ai_anthropic.py
@@ -6,7 +6,8 @@ from enum import Enum
import requests, copy # type: ignore
import time, uuid
from typing import Callable, Optional, List
-from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
+from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
import litellm
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
from .prompt_templates.factory import (
diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py
index b1c38f0bc..c9e48f3e1 100644
--- a/litellm/llms/vertex_httpx.py
+++ b/litellm/llms/vertex_httpx.py
@@ -8,7 +8,10 @@ from enum import Enum
import requests # type: ignore
import time
from typing import Callable, Optional, Union, List, Any, Tuple
-from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
+import litellm.litellm_core_utils
+import litellm.litellm_core_utils.litellm_logging
+from litellm.utils import ModelResponse, Usage, CustomStreamWrapper
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
import litellm, uuid
import httpx, inspect # type: ignore
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
@@ -320,7 +323,7 @@ class VertexLLM(BaseLLM):
model: str,
response: httpx.Response,
model_response: ModelResponse,
- logging_obj: litellm.utils.Logging,
+ logging_obj: litellm.litellm_core_utils.litellm_logging.Logging,
optional_params: dict,
api_key: str,
data: Union[dict, str],
diff --git a/litellm/proxy/_experimental/out/404.html b/litellm/proxy/_experimental/out/404.html
deleted file mode 100644
index 794e8d66a..000000000
--- a/litellm/proxy/_experimental/out/404.html
+++ /dev/null
@@ -1 +0,0 @@
-
404: This page could not be found.LiteLLM Dashboard404
This page could not be found.
\ No newline at end of file
diff --git a/litellm/proxy/_experimental/out/model_hub.html b/litellm/proxy/_experimental/out/model_hub.html
deleted file mode 100644
index dda9f78c2..000000000
--- a/litellm/proxy/_experimental/out/model_hub.html
+++ /dev/null
@@ -1 +0,0 @@
-LiteLLM Dashboard
\ No newline at end of file
diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html
deleted file mode 100644
index 61a21232d..000000000
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ /dev/null
@@ -1 +0,0 @@
-LiteLLM Dashboard
\ No newline at end of file
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index c0b0543e8..badc77546 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -12,6 +12,8 @@ import litellm
import backoff
import traceback
from pydantic import BaseModel
+import litellm.litellm_core_utils
+import litellm.litellm_core_utils.litellm_logging
from litellm.proxy._types import (
UserAPIKeyAuth,
DynamoDBArgs,
@@ -266,7 +268,9 @@ class ProxyLogging:
+ litellm.failure_callback
)
)
- litellm.utils.set_callbacks(callback_list=callback_list)
+ litellm.litellm_core_utils.litellm_logging.set_callbacks(
+ callback_list=callback_list
+ )
# The actual implementation of the function
async def pre_call_hook(
@@ -331,7 +335,9 @@ class ProxyLogging:
return data
except Exception as e:
if "litellm_logging_obj" in data:
- logging_obj: litellm.utils.Logging = data["litellm_logging_obj"]
+ logging_obj: litellm.litellm_core_utils.litellm_logging.Logging = data[
+ "litellm_logging_obj"
+ ]
## ASYNC FAILURE HANDLER ##
error_message = ""
diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py
index c0be350f9..3f7288854 100644
--- a/litellm/tests/test_completion_cost.py
+++ b/litellm/tests/test_completion_cost.py
@@ -13,7 +13,7 @@ from litellm import (
open_ai_chat_completion_models,
TranscriptionResponse,
)
-from litellm.utils import CustomLogger
+from litellm.litellm_core_utils.litellm_logging import CustomLogger
import pytest, asyncio
diff --git a/litellm/tests/test_utils.py b/litellm/tests/test_utils.py
index 7d581a0fb..bf84ba994 100644
--- a/litellm/tests/test_utils.py
+++ b/litellm/tests/test_utils.py
@@ -412,7 +412,7 @@ def test_redact_msgs_from_logs():
from litellm.litellm_core_utils.redact_messages import (
redact_message_input_output_from_logging,
)
- from litellm.utils import Logging
+ from litellm.litellm_core_utils.litellm_logging import Logging
litellm.turn_off_message_logging = True
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index 1fbb375d3..29d21143e 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -3,6 +3,16 @@ from typing_extensions import TypedDict
from enum import Enum
from typing_extensions import override, Required, Dict
from .llms.openai import ChatCompletionUsageBlock, ChatCompletionToolCallChunk
+from ..litellm_core_utils.core_helpers import map_finish_reason
+from openai._models import BaseModel as OpenAIObject
+from pydantic import ConfigDict
+import uuid
+import json
+import time
+
+
+def _generate_id(): # private helper function
+ return "chatcmpl-" + str(uuid.uuid4())
class LiteLLMCommonStrings(Enum):
@@ -48,3 +58,904 @@ class GenericStreamingChunk(TypedDict):
finish_reason: Required[str]
usage: Optional[ChatCompletionUsageBlock]
index: int
+
+
+from enum import Enum
+
+
+class CallTypes(Enum):
+ embedding = "embedding"
+ aembedding = "aembedding"
+ completion = "completion"
+ acompletion = "acompletion"
+ atext_completion = "atext_completion"
+ text_completion = "text_completion"
+ image_generation = "image_generation"
+ aimage_generation = "aimage_generation"
+ moderation = "moderation"
+ amoderation = "amoderation"
+ atranscription = "atranscription"
+ transcription = "transcription"
+ aspeech = "aspeech"
+ speech = "speech"
+
+
+class TopLogprob(OpenAIObject):
+ token: str
+ """The token."""
+
+ bytes: Optional[List[int]] = None
+ """A list of integers representing the UTF-8 bytes representation of the token.
+
+ Useful in instances where characters are represented by multiple tokens and
+ their byte representations must be combined to generate the correct text
+ representation. Can be `null` if there is no bytes representation for the token.
+ """
+
+ logprob: float
+ """The log probability of this token, if it is within the top 20 most likely
+ tokens.
+
+ Otherwise, the value `-9999.0` is used to signify that the token is very
+ unlikely.
+ """
+
+
+class ChatCompletionTokenLogprob(OpenAIObject):
+ token: str
+ """The token."""
+
+ bytes: Optional[List[int]] = None
+ """A list of integers representing the UTF-8 bytes representation of the token.
+
+ Useful in instances where characters are represented by multiple tokens and
+ their byte representations must be combined to generate the correct text
+ representation. Can be `null` if there is no bytes representation for the token.
+ """
+
+ logprob: float
+ """The log probability of this token, if it is within the top 20 most likely
+ tokens.
+
+ Otherwise, the value `-9999.0` is used to signify that the token is very
+ unlikely.
+ """
+
+ top_logprobs: List[TopLogprob]
+ """List of the most likely tokens and their log probability, at this token
+ position.
+
+ In rare cases, there may be fewer than the number of requested `top_logprobs`
+ returned.
+ """
+
+
+class ChoiceLogprobs(OpenAIObject):
+ content: Optional[List[ChatCompletionTokenLogprob]] = None
+ """A list of message content tokens with log probability information."""
+
+
+class FunctionCall(OpenAIObject):
+ arguments: str
+ name: Optional[str] = None
+
+
+class Function(OpenAIObject):
+ arguments: str
+ name: Optional[str] = None
+
+ def __init__(
+ self,
+ arguments: Union[Dict, str],
+ name: Optional[str] = None,
+ **params,
+ ):
+ if isinstance(arguments, Dict):
+ arguments = json.dumps(arguments)
+ else:
+ arguments = arguments
+
+ name = name
+
+ # Build a dictionary with the structure your BaseModel expects
+ data = {"arguments": arguments, "name": name, **params}
+
+ super(Function, self).__init__(**data)
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class ChatCompletionDeltaToolCall(OpenAIObject):
+ id: Optional[str] = None
+ function: Function
+ type: Optional[str] = None
+ index: int
+
+
+class HiddenParams(OpenAIObject):
+ original_response: Optional[str] = None
+ model_id: Optional[str] = None # used in Router for individual deployments
+ api_base: Optional[str] = None # returns api base used for making completion call
+
+ model_config = ConfigDict(extra="allow", protected_namespaces=())
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+ def json(self, **kwargs):
+ try:
+ return self.model_dump() # noqa
+ except:
+ # if using pydantic v1
+ return self.dict()
+
+
+class ChatCompletionMessageToolCall(OpenAIObject):
+ def __init__(
+ self,
+ function: Union[Dict, Function],
+ id: Optional[str] = None,
+ type: Optional[str] = None,
+ **params,
+ ):
+ super(ChatCompletionMessageToolCall, self).__init__(**params)
+ if isinstance(function, Dict):
+ self.function = Function(**function)
+ else:
+ self.function = function
+
+ if id is not None:
+ self.id = id
+ else:
+ self.id = f"{uuid.uuid4()}"
+
+ if type is not None:
+ self.type = type
+ else:
+ self.type = "function"
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class Message(OpenAIObject):
+ def __init__(
+ self,
+ content: Optional[str] = "default",
+ role="assistant",
+ logprobs=None,
+ function_call=None,
+ tool_calls=None,
+ **params,
+ ):
+ super(Message, self).__init__(**params)
+ self.content = content
+ self.role = role
+ if function_call is not None:
+ self.function_call = FunctionCall(**function_call)
+
+ if tool_calls is not None:
+ self.tool_calls = []
+ for tool_call in tool_calls:
+ self.tool_calls.append(ChatCompletionMessageToolCall(**tool_call))
+
+ if logprobs is not None:
+ self._logprobs = ChoiceLogprobs(**logprobs)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+ def json(self, **kwargs):
+ try:
+ return self.model_dump() # noqa
+ except:
+ # if using pydantic v1
+ return self.dict()
+
+
+class Delta(OpenAIObject):
+ def __init__(
+ self,
+ content=None,
+ role=None,
+ function_call=None,
+ tool_calls=None,
+ **params,
+ ):
+ super(Delta, self).__init__(**params)
+ self.content = content
+ self.role = role
+
+ if function_call is not None and isinstance(function_call, dict):
+ self.function_call = FunctionCall(**function_call)
+ else:
+ self.function_call = function_call
+ if tool_calls is not None and isinstance(tool_calls, list):
+ self.tool_calls = []
+ for tool_call in tool_calls:
+ if isinstance(tool_call, dict):
+ if tool_call.get("index", None) is None:
+ tool_call["index"] = 0
+ self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
+ elif isinstance(tool_call, ChatCompletionDeltaToolCall):
+ self.tool_calls.append(tool_call)
+ else:
+ self.tool_calls = tool_calls
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class Choices(OpenAIObject):
+ def __init__(
+ self,
+ finish_reason=None,
+ index=0,
+ message: Optional[Union[Message, dict]] = None,
+ logprobs=None,
+ enhancements=None,
+ **params,
+ ):
+ super(Choices, self).__init__(**params)
+ if finish_reason is not None:
+ self.finish_reason = map_finish_reason(
+ finish_reason
+ ) # set finish_reason for all responses
+ else:
+ self.finish_reason = "stop"
+ self.index = index
+ if message is None:
+ self.message = Message()
+ else:
+ if isinstance(message, Message):
+ self.message = message
+ elif isinstance(message, dict):
+ self.message = Message(**message)
+ if logprobs is not None:
+ self.logprobs = logprobs
+ if enhancements is not None:
+ self.enhancements = enhancements
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class Usage(OpenAIObject):
+ def __init__(
+ self, prompt_tokens=None, completion_tokens=None, total_tokens=None, **params
+ ):
+ super(Usage, self).__init__(**params)
+ if prompt_tokens:
+ self.prompt_tokens = prompt_tokens
+ if completion_tokens:
+ self.completion_tokens = completion_tokens
+ if total_tokens:
+ self.total_tokens = total_tokens
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class StreamingChoices(OpenAIObject):
+ def __init__(
+ self,
+ finish_reason=None,
+ index=0,
+ delta: Optional[Delta] = None,
+ logprobs=None,
+ enhancements=None,
+ **params,
+ ):
+ super(StreamingChoices, self).__init__(**params)
+ if finish_reason:
+ self.finish_reason = finish_reason
+ else:
+ self.finish_reason = None
+ self.index = index
+ if delta is not None:
+ if isinstance(delta, Delta):
+ self.delta = delta
+ elif isinstance(delta, dict):
+ self.delta = Delta(**delta)
+ else:
+ self.delta = Delta()
+ if enhancements is not None:
+ self.enhancements = enhancements
+
+ if logprobs is not None and isinstance(logprobs, dict):
+ self.logprobs = ChoiceLogprobs(**logprobs)
+ else:
+ self.logprobs = logprobs # type: ignore
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class ModelResponse(OpenAIObject):
+ id: str
+ """A unique identifier for the completion."""
+
+ choices: List[Union[Choices, StreamingChoices]]
+ """The list of completion choices the model generated for the input prompt."""
+
+ created: int
+ """The Unix timestamp (in seconds) of when the completion was created."""
+
+ model: Optional[str] = None
+ """The model used for completion."""
+
+ object: str
+ """The object type, which is always "text_completion" """
+
+ system_fingerprint: Optional[str] = None
+ """This fingerprint represents the backend configuration that the model runs with.
+
+ Can be used in conjunction with the `seed` request parameter to understand when
+ backend changes have been made that might impact determinism.
+ """
+
+ _hidden_params: dict = {}
+
+ def __init__(
+ self,
+ id=None,
+ choices=None,
+ created=None,
+ model=None,
+ object=None,
+ system_fingerprint=None,
+ usage=None,
+ stream=None,
+ stream_options=None,
+ response_ms=None,
+ hidden_params=None,
+ **params,
+ ):
+ if stream is not None and stream is True:
+ object = "chat.completion.chunk"
+ if choices is not None and isinstance(choices, list):
+ new_choices = []
+ for choice in choices:
+ if isinstance(choice, StreamingChoices):
+ _new_choice = choice
+ elif isinstance(choice, dict):
+ _new_choice = StreamingChoices(**choice)
+ new_choices.append(_new_choice)
+ choices = new_choices
+ else:
+ choices = [StreamingChoices()]
+ else:
+ object = "chat.completion"
+ if choices is not None and isinstance(choices, list):
+ new_choices = []
+ for choice in choices:
+ if isinstance(choice, Choices):
+ _new_choice = choice
+ elif isinstance(choice, dict):
+ _new_choice = Choices(**choice)
+ new_choices.append(_new_choice)
+ choices = new_choices
+ else:
+ choices = [Choices()]
+ if id is None:
+ id = _generate_id()
+ else:
+ id = id
+ if created is None:
+ created = int(time.time())
+ else:
+ created = created
+ model = model
+ if usage is not None:
+ if isinstance(usage, dict):
+ usage = Usage(**usage)
+ else:
+ usage = usage
+ elif stream is None or stream is False:
+ usage = Usage()
+ if hidden_params:
+ self._hidden_params = hidden_params
+
+ init_values = {
+ "id": id,
+ "choices": choices,
+ "created": created,
+ "model": model,
+ "object": object,
+ "system_fingerprint": system_fingerprint,
+ }
+
+ if usage is not None:
+ init_values["usage"] = usage
+
+ super().__init__(
+ **init_values,
+ **params,
+ )
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+ def json(self, **kwargs):
+ try:
+ return self.model_dump() # noqa
+ except:
+ # if using pydantic v1
+ return self.dict()
+
+
+class Embedding(OpenAIObject):
+ embedding: Union[list, str] = []
+ index: int
+ object: str
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class EmbeddingResponse(OpenAIObject):
+ model: Optional[str] = None
+ """The model used for embedding."""
+
+ data: Optional[List] = None
+ """The actual embedding value"""
+
+ object: str
+ """The object type, which is always "embedding" """
+
+ usage: Optional[Usage] = None
+ """Usage statistics for the embedding request."""
+
+ _hidden_params: dict = {}
+
+ def __init__(
+ self,
+ model=None,
+ usage=None,
+ stream=False,
+ response_ms=None,
+ data=None,
+ **params,
+ ):
+ object = "list"
+ if response_ms:
+ _response_ms = response_ms
+ else:
+ _response_ms = None
+ if data:
+ data = data
+ else:
+ data = None
+
+ if usage:
+ usage = usage
+ else:
+ usage = Usage()
+
+ model = model
+ super().__init__(model=model, object=object, data=data, usage=usage)
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+ def json(self, **kwargs):
+ try:
+ return self.model_dump() # noqa
+ except:
+ # if using pydantic v1
+ return self.dict()
+
+
+class Logprobs(OpenAIObject):
+ text_offset: List[int]
+ token_logprobs: List[float]
+ tokens: List[str]
+ top_logprobs: List[Dict[str, float]]
+
+
+class TextChoices(OpenAIObject):
+ def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params):
+ super(TextChoices, self).__init__(**params)
+ if finish_reason:
+ self.finish_reason = map_finish_reason(finish_reason)
+ else:
+ self.finish_reason = None
+ self.index = index
+ if text is not None:
+ self.text = text
+ else:
+ self.text = None
+ if logprobs is None:
+ self.logprobs = None
+ else:
+ if isinstance(logprobs, dict):
+ self.logprobs = Logprobs(**logprobs)
+ else:
+ self.logprobs = logprobs
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+ def json(self, **kwargs):
+ try:
+ return self.model_dump() # noqa
+ except:
+ # if using pydantic v1
+ return self.dict()
+
+
+class TextCompletionResponse(OpenAIObject):
+ """
+ {
+ "id": response["id"],
+ "object": "text_completion",
+ "created": response["created"],
+ "model": response["model"],
+ "choices": [
+ {
+ "text": response["choices"][0]["message"]["content"],
+ "index": response["choices"][0]["index"],
+ "logprobs": transformed_logprobs,
+ "finish_reason": response["choices"][0]["finish_reason"]
+ }
+ ],
+ "usage": response["usage"]
+ }
+ """
+
+ id: str
+ object: str
+ created: int
+ model: Optional[str]
+ choices: List[TextChoices]
+ usage: Optional[Usage]
+ _response_ms: Optional[int] = None
+ _hidden_params: HiddenParams
+
+ def __init__(
+ self,
+ id=None,
+ choices=None,
+ created=None,
+ model=None,
+ usage=None,
+ stream=False,
+ response_ms=None,
+ object=None,
+ **params,
+ ):
+ if stream:
+ object = "text_completion.chunk"
+ choices = [TextChoices()]
+ else:
+ object = "text_completion"
+ if choices is not None and isinstance(choices, list):
+ new_choices = []
+ for choice in choices:
+ if isinstance(choice, TextChoices):
+ _new_choice = choice
+ elif isinstance(choice, dict):
+ _new_choice = TextChoices(**choice)
+ new_choices.append(_new_choice)
+ choices = new_choices
+ else:
+ choices = [TextChoices()]
+ if object is not None:
+ object = object
+ if id is None:
+ id = _generate_id()
+ else:
+ id = id
+ if created is None:
+ created = int(time.time())
+ else:
+ created = created
+
+ model = model
+ if usage:
+ usage = usage
+ else:
+ usage = Usage()
+
+ super(TextCompletionResponse, self).__init__(
+ id=id,
+ object=object,
+ created=created,
+ model=model,
+ choices=choices,
+ usage=usage,
+ **params,
+ )
+
+ if response_ms:
+ self._response_ms = response_ms
+ else:
+ self._response_ms = None
+ self._hidden_params = HiddenParams()
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+
+class ImageObject(OpenAIObject):
+ """
+ Represents the url or the content of an image generated by the OpenAI API.
+
+ Attributes:
+ b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json.
+ url: The URL of the generated image, if response_format is url (default).
+ revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt.
+
+ https://platform.openai.com/docs/api-reference/images/object
+ """
+
+ b64_json: Optional[str] = None
+ url: Optional[str] = None
+ revised_prompt: Optional[str] = None
+
+ def __init__(self, b64_json=None, url=None, revised_prompt=None):
+ super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt)
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+ def json(self, **kwargs):
+ try:
+ return self.model_dump() # noqa
+ except:
+ # if using pydantic v1
+ return self.dict()
+
+
+class ImageResponse(OpenAIObject):
+ created: Optional[int] = None
+
+ data: Optional[List[ImageObject]] = None
+
+ usage: Optional[dict] = None
+
+ _hidden_params: dict = {}
+
+ def __init__(self, created=None, data=None, response_ms=None):
+ if response_ms:
+ _response_ms = response_ms
+ else:
+ _response_ms = None
+ if data:
+ data = data
+ else:
+ data = None
+
+ if created:
+ created = created
+ else:
+ created = None
+
+ super().__init__(data=data, created=created)
+ self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+ def json(self, **kwargs):
+ try:
+ return self.model_dump() # noqa
+ except:
+ # if using pydantic v1
+ return self.dict()
+
+
+class TranscriptionResponse(OpenAIObject):
+ text: Optional[str] = None
+
+ _hidden_params: dict = {}
+
+ def __init__(self, text=None):
+ super().__init__(text=text)
+
+ def __contains__(self, key):
+ # Define custom behavior for the 'in' operator
+ return hasattr(self, key)
+
+ def get(self, key, default=None):
+ # Custom .get() method to access attributes with a default value if the attribute doesn't exist
+ return getattr(self, key, default)
+
+ def __getitem__(self, key):
+ # Allow dictionary-style access to attributes
+ return getattr(self, key)
+
+ def __setitem__(self, key, value):
+ # Allow dictionary-style assignment of attributes
+ setattr(self, key, value)
+
+ def json(self, **kwargs):
+ try:
+ return self.model_dump() # noqa
+ except:
+ # if using pydantic v1
+ return self.dict()
diff --git a/litellm/utils.py b/litellm/utils.py
index 7f37bcf7c..6bc33d73d 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -6,6 +6,9 @@
# +-----------------------------------------------+
#
# Thank you users! We ❤️ you! - Krrish & Ishaan
+
+# What is this?
+## Generic utils.py file. Problem-specific utils (e.g. 'cost calculation), should all be in `litellm_core_utils/`.
import sys, re, binascii, struct
import litellm
import dotenv, json, traceback, threading, base64, ast
@@ -18,7 +21,7 @@ from functools import wraps, lru_cache
import datetime, time
import tiktoken
import uuid
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel
import aiohttp
import textwrap
import logging
@@ -32,9 +35,29 @@ from dataclasses import (
)
import os
import litellm._service_logger # for storing API inputs, outputs, and metadata
+import litellm.litellm_core_utils
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
from litellm.caching import DualCache
-from litellm.types.utils import CostPerToken, ProviderField, ModelInfo
+from litellm.types.utils import (
+ CostPerToken,
+ ProviderField,
+ ModelInfo,
+ CallTypes,
+ ModelResponse,
+ EmbeddingResponse,
+ ImageResponse,
+ TranscriptionResponse,
+ TextCompletionResponse,
+ ChatCompletionDeltaToolCall,
+ Message,
+ Delta,
+ Choices,
+ Usage,
+ StreamingChoices,
+ Embedding,
+ TextChoices,
+)
from litellm.litellm_core_utils.redact_messages import (
redact_message_input_output_from_logging,
)
@@ -71,32 +94,9 @@ from .types.llms.openai import (
ChatCompletionToolCallFunctionChunk,
ChatCompletionDeltaToolCallChunk,
)
-from .integrations.traceloop import TraceloopLogger
-from .integrations.athina import AthinaLogger
-from .integrations.helicone import HeliconeLogger
-from .integrations.aispend import AISpendLogger
-from .integrations.berrispend import BerriSpendLogger
-from .integrations.supabase import Supabase
-from .integrations.lunary import LunaryLogger
-from .integrations.prompt_layer import PromptLayerLogger
-from .integrations.langsmith import LangsmithLogger
-from .integrations.logfire_logger import LogfireLogger, LogfireLevel
-from .integrations.weights_biases import WeightsBiasesLogger
-from .integrations.custom_logger import CustomLogger
-from .integrations.langfuse import LangFuseLogger
-from .integrations.openmeter import OpenMeterLogger
-from .integrations.lago import LagoLogger
-from .integrations.datadog import DataDogLogger
-from .integrations.prometheus import PrometheusLogger
-from .integrations.prometheus_services import PrometheusServicesLogger
-from .integrations.dynamodb import DyanmoDBLogger
-from .integrations.s3 import S3Logger
-from .integrations.clickhouse import ClickhouseLogger
-from .integrations.greenscale import GreenscaleLogger
-from .integrations.litedebugger import LiteDebugger
+
from .proxy._types import KeyManagementSystem
from openai import OpenAIError as OriginalError
-from openai._models import BaseModel as OpenAIObject
from .caching import S3Cache, RedisSemanticCache, RedisCache
from .exceptions import (
AuthenticationError,
@@ -179,6 +179,8 @@ local_cache: Optional[Dict[str, str]] = {}
last_fetched_at = None
last_fetched_at_keys = None
######## Model Response #########################
+
+
# All liteLLM Model responses will be in this format, Follows the OpenAI Format
# https://docs.litellm.ai/docs/completion/output
# {
@@ -209,933 +211,6 @@ class UnsupportedParamsError(Exception):
) # Call the base class constructor with the parameters it needs
-def _generate_id(): # private helper function
- return "chatcmpl-" + str(uuid.uuid4())
-
-
-def map_finish_reason(
- finish_reason: str,
-): # openai supports 5 stop sequences - 'stop', 'length', 'function_call', 'content_filter', 'null'
- # anthropic mapping
- if finish_reason == "stop_sequence":
- return "stop"
- # cohere mapping - https://docs.cohere.com/reference/generate
- elif finish_reason == "COMPLETE":
- return "stop"
- elif finish_reason == "MAX_TOKENS": # cohere + vertex ai
- return "length"
- elif finish_reason == "ERROR_TOXIC":
- return "content_filter"
- elif (
- finish_reason == "ERROR"
- ): # openai currently doesn't support an 'error' finish reason
- return "stop"
- # huggingface mapping https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/generate_stream
- elif finish_reason == "eos_token" or finish_reason == "stop_sequence":
- return "stop"
- elif (
- finish_reason == "FINISH_REASON_UNSPECIFIED" or finish_reason == "STOP"
- ): # vertex ai - got from running `print(dir(response_obj.candidates[0].finish_reason))`: ['FINISH_REASON_UNSPECIFIED', 'MAX_TOKENS', 'OTHER', 'RECITATION', 'SAFETY', 'STOP',]
- return "stop"
- elif finish_reason == "SAFETY": # vertex ai
- return "content_filter"
- elif finish_reason == "STOP": # vertex ai
- return "stop"
- elif finish_reason == "end_turn" or finish_reason == "stop_sequence": # anthropic
- return "stop"
- elif finish_reason == "max_tokens": # anthropic
- return "length"
- elif finish_reason == "tool_use": # anthropic
- return "tool_calls"
- elif finish_reason == "content_filtered":
- return "content_filter"
- return finish_reason
-
-
-class TopLogprob(OpenAIObject):
- token: str
- """The token."""
-
- bytes: Optional[List[int]] = None
- """A list of integers representing the UTF-8 bytes representation of the token.
-
- Useful in instances where characters are represented by multiple tokens and
- their byte representations must be combined to generate the correct text
- representation. Can be `null` if there is no bytes representation for the token.
- """
-
- logprob: float
- """The log probability of this token, if it is within the top 20 most likely
- tokens.
-
- Otherwise, the value `-9999.0` is used to signify that the token is very
- unlikely.
- """
-
-
-class ChatCompletionTokenLogprob(OpenAIObject):
- token: str
- """The token."""
-
- bytes: Optional[List[int]] = None
- """A list of integers representing the UTF-8 bytes representation of the token.
-
- Useful in instances where characters are represented by multiple tokens and
- their byte representations must be combined to generate the correct text
- representation. Can be `null` if there is no bytes representation for the token.
- """
-
- logprob: float
- """The log probability of this token, if it is within the top 20 most likely
- tokens.
-
- Otherwise, the value `-9999.0` is used to signify that the token is very
- unlikely.
- """
-
- top_logprobs: List[TopLogprob]
- """List of the most likely tokens and their log probability, at this token
- position.
-
- In rare cases, there may be fewer than the number of requested `top_logprobs`
- returned.
- """
-
-
-class ChoiceLogprobs(OpenAIObject):
- content: Optional[List[ChatCompletionTokenLogprob]] = None
- """A list of message content tokens with log probability information."""
-
-
-class FunctionCall(OpenAIObject):
- arguments: str
- name: Optional[str] = None
-
-
-class Function(OpenAIObject):
- arguments: str
- name: Optional[str] = None
-
- def __init__(
- self,
- arguments: Union[Dict, str],
- name: Optional[str] = None,
- **params,
- ):
- if isinstance(arguments, Dict):
- arguments = json.dumps(arguments)
- else:
- arguments = arguments
-
- name = name
-
- # Build a dictionary with the structure your BaseModel expects
- data = {"arguments": arguments, "name": name, **params}
-
- super(Function, self).__init__(**data)
-
- def __contains__(self, key):
- # Define custom behavior for the 'in' operator
- return hasattr(self, key)
-
- def get(self, key, default=None):
- # Custom .get() method to access attributes with a default value if the attribute doesn't exist
- return getattr(self, key, default)
-
- def __getitem__(self, key):
- # Allow dictionary-style access to attributes
- return getattr(self, key)
-
- def __setitem__(self, key, value):
- # Allow dictionary-style assignment of attributes
- setattr(self, key, value)
-
-
-class ChatCompletionDeltaToolCall(OpenAIObject):
- id: Optional[str] = None
- function: Function
- type: Optional[str] = None
- index: int
-
-
-class HiddenParams(OpenAIObject):
- original_response: Optional[str] = None
- model_id: Optional[str] = None # used in Router for individual deployments
- api_base: Optional[str] = None # returns api base used for making completion call
-
- model_config = ConfigDict(extra="allow", protected_namespaces=())
-
- def get(self, key, default=None):
- # Custom .get() method to access attributes with a default value if the attribute doesn't exist
- return getattr(self, key, default)
-
- def __getitem__(self, key):
- # Allow dictionary-style access to attributes
- return getattr(self, key)
-
- def __setitem__(self, key, value):
- # Allow dictionary-style assignment of attributes
- setattr(self, key, value)
-
- def json(self, **kwargs):
- try:
- return self.model_dump() # noqa
- except:
- # if using pydantic v1
- return self.dict()
-
-
-class ChatCompletionMessageToolCall(OpenAIObject):
- def __init__(
- self,
- function: Union[Dict, Function],
- id: Optional[str] = None,
- type: Optional[str] = None,
- **params,
- ):
- super(ChatCompletionMessageToolCall, self).__init__(**params)
- if isinstance(function, Dict):
- self.function = Function(**function)
- else:
- self.function = function
-
- if id is not None:
- self.id = id
- else:
- self.id = f"{uuid.uuid4()}"
-
- if type is not None:
- self.type = type
- else:
- self.type = "function"
-
- def __contains__(self, key):
- # Define custom behavior for the 'in' operator
- return hasattr(self, key)
-
- def get(self, key, default=None):
- # Custom .get() method to access attributes with a default value if the attribute doesn't exist
- return getattr(self, key, default)
-
- def __getitem__(self, key):
- # Allow dictionary-style access to attributes
- return getattr(self, key)
-
- def __setitem__(self, key, value):
- # Allow dictionary-style assignment of attributes
- setattr(self, key, value)
-
-
-class Message(OpenAIObject):
- def __init__(
- self,
- content: Optional[str] = "default",
- role="assistant",
- logprobs=None,
- function_call=None,
- tool_calls=None,
- **params,
- ):
- super(Message, self).__init__(**params)
- self.content = content
- self.role = role
- if function_call is not None:
- self.function_call = FunctionCall(**function_call)
-
- if tool_calls is not None:
- self.tool_calls = []
- for tool_call in tool_calls:
- self.tool_calls.append(ChatCompletionMessageToolCall(**tool_call))
-
- if logprobs is not None:
- self._logprobs = ChoiceLogprobs(**logprobs)
-
- def get(self, key, default=None):
- # Custom .get() method to access attributes with a default value if the attribute doesn't exist
- return getattr(self, key, default)
-
- def __getitem__(self, key):
- # Allow dictionary-style access to attributes
- return getattr(self, key)
-
- def __setitem__(self, key, value):
- # Allow dictionary-style assignment of attributes
- setattr(self, key, value)
-
- def json(self, **kwargs):
- try:
- return self.model_dump() # noqa
- except:
- # if using pydantic v1
- return self.dict()
-
-
-class Delta(OpenAIObject):
- def __init__(
- self,
- content=None,
- role=None,
- function_call=None,
- tool_calls=None,
- **params,
- ):
- super(Delta, self).__init__(**params)
- self.content = content
- self.role = role
-
- if function_call is not None and isinstance(function_call, dict):
- self.function_call = FunctionCall(**function_call)
- else:
- self.function_call = function_call
- if tool_calls is not None and isinstance(tool_calls, list):
- self.tool_calls = []
- for tool_call in tool_calls:
- if isinstance(tool_call, dict):
- if tool_call.get("index", None) is None:
- tool_call["index"] = 0
- self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call))
- elif isinstance(tool_call, ChatCompletionDeltaToolCall):
- self.tool_calls.append(tool_call)
- else:
- self.tool_calls = tool_calls
-
- def __contains__(self, key):
- # Define custom behavior for the 'in' operator
- return hasattr(self, key)
-
- def get(self, key, default=None):
- # Custom .get() method to access attributes with a default value if the attribute doesn't exist
- return getattr(self, key, default)
-
- def __getitem__(self, key):
- # Allow dictionary-style access to attributes
- return getattr(self, key)
-
- def __setitem__(self, key, value):
- # Allow dictionary-style assignment of attributes
- setattr(self, key, value)
-
-
-class Choices(OpenAIObject):
- def __init__(
- self,
- finish_reason=None,
- index=0,
- message: Optional[Union[Message, dict]] = None,
- logprobs=None,
- enhancements=None,
- **params,
- ):
- super(Choices, self).__init__(**params)
- if finish_reason is not None:
- self.finish_reason = map_finish_reason(
- finish_reason
- ) # set finish_reason for all responses
- else:
- self.finish_reason = "stop"
- self.index = index
- if message is None:
- self.message = Message()
- else:
- if isinstance(message, Message):
- self.message = message
- elif isinstance(message, dict):
- self.message = Message(**message)
- if logprobs is not None:
- self.logprobs = logprobs
- if enhancements is not None:
- self.enhancements = enhancements
-
- def __contains__(self, key):
- # Define custom behavior for the 'in' operator
- return hasattr(self, key)
-
- def get(self, key, default=None):
- # Custom .get() method to access attributes with a default value if the attribute doesn't exist
- return getattr(self, key, default)
-
- def __getitem__(self, key):
- # Allow dictionary-style access to attributes
- return getattr(self, key)
-
- def __setitem__(self, key, value):
- # Allow dictionary-style assignment of attributes
- setattr(self, key, value)
-
-
-class Usage(OpenAIObject):
- def __init__(
- self, prompt_tokens=None, completion_tokens=None, total_tokens=None, **params
- ):
- super(Usage, self).__init__(**params)
- if prompt_tokens:
- self.prompt_tokens = prompt_tokens
- if completion_tokens:
- self.completion_tokens = completion_tokens
- if total_tokens:
- self.total_tokens = total_tokens
-
- def __contains__(self, key):
- # Define custom behavior for the 'in' operator
- return hasattr(self, key)
-
- def get(self, key, default=None):
- # Custom .get() method to access attributes with a default value if the attribute doesn't exist
- return getattr(self, key, default)
-
- def __getitem__(self, key):
- # Allow dictionary-style access to attributes
- return getattr(self, key)
-
- def __setitem__(self, key, value):
- # Allow dictionary-style assignment of attributes
- setattr(self, key, value)
-
-
-class StreamingChoices(OpenAIObject):
- def __init__(
- self,
- finish_reason=None,
- index=0,
- delta: Optional[Delta] = None,
- logprobs=None,
- enhancements=None,
- **params,
- ):
- super(StreamingChoices, self).__init__(**params)
- if finish_reason:
- self.finish_reason = finish_reason
- else:
- self.finish_reason = None
- self.index = index
- if delta is not None:
- if isinstance(delta, Delta):
- self.delta = delta
- elif isinstance(delta, dict):
- self.delta = Delta(**delta)
- else:
- self.delta = Delta()
- if enhancements is not None:
- self.enhancements = enhancements
-
- if logprobs is not None and isinstance(logprobs, dict):
- self.logprobs = ChoiceLogprobs(**logprobs)
- else:
- self.logprobs = logprobs # type: ignore
-
- def __contains__(self, key):
- # Define custom behavior for the 'in' operator
- return hasattr(self, key)
-
- def get(self, key, default=None):
- # Custom .get() method to access attributes with a default value if the attribute doesn't exist
- return getattr(self, key, default)
-
- def __getitem__(self, key):
- # Allow dictionary-style access to attributes
- return getattr(self, key)
-
- def __setitem__(self, key, value):
- # Allow dictionary-style assignment of attributes
- setattr(self, key, value)
-
-
-class ModelResponse(OpenAIObject):
- id: str
- """A unique identifier for the completion."""
-
- choices: List[Union[Choices, StreamingChoices]]
- """The list of completion choices the model generated for the input prompt."""
-
- created: int
- """The Unix timestamp (in seconds) of when the completion was created."""
-
- model: Optional[str] = None
- """The model used for completion."""
-
- object: str
- """The object type, which is always "text_completion" """
-
- system_fingerprint: Optional[str] = None
- """This fingerprint represents the backend configuration that the model runs with.
-
- Can be used in conjunction with the `seed` request parameter to understand when
- backend changes have been made that might impact determinism.
- """
-
- _hidden_params: dict = {}
-
- def __init__(
- self,
- id=None,
- choices=None,
- created=None,
- model=None,
- object=None,
- system_fingerprint=None,
- usage=None,
- stream=None,
- stream_options=None,
- response_ms=None,
- hidden_params=None,
- **params,
- ):
- if stream is not None and stream == True:
- object = "chat.completion.chunk"
- if choices is not None and isinstance(choices, list):
- new_choices = []
- for choice in choices:
- if isinstance(choice, StreamingChoices):
- _new_choice = choice
- elif isinstance(choice, dict):
- _new_choice = StreamingChoices(**choice)
- new_choices.append(_new_choice)
- choices = new_choices
- else:
- choices = [StreamingChoices()]
- else:
- if model in litellm.open_ai_embedding_models:
- object = "embedding"
- else:
- object = "chat.completion"
- if choices is not None and isinstance(choices, list):
- new_choices = []
- for choice in choices:
- if isinstance(choice, Choices):
- _new_choice = choice
- elif isinstance(choice, dict):
- _new_choice = Choices(**choice)
- new_choices.append(_new_choice)
- choices = new_choices
- else:
- choices = [Choices()]
- if id is None:
- id = _generate_id()
- else:
- id = id
- if created is None:
- created = int(time.time())
- else:
- created = created
- model = model
- if usage is not None:
- if isinstance(usage, dict):
- usage = Usage(**usage)
- else:
- usage = usage
- elif stream is None or stream == False:
- usage = Usage()
- if hidden_params:
- self._hidden_params = hidden_params
-
- init_values = {
- "id": id,
- "choices": choices,
- "created": created,
- "model": model,
- "object": object,
- "system_fingerprint": system_fingerprint,
- }
-
- if usage is not None:
- init_values["usage"] = usage
-
- super().__init__(
- **init_values,
- **params,
- )
-
- def __contains__(self, key):
- # Define custom behavior for the 'in' operator
- return hasattr(self, key)
-
- def get(self, key, default=None):
- # Custom .get() method to access attributes with a default value if the attribute doesn't exist
- return getattr(self, key, default)
-
- def __getitem__(self, key):
- # Allow dictionary-style access to attributes
- return getattr(self, key)
-
- def __setitem__(self, key, value):
- # Allow dictionary-style assignment of attributes
- setattr(self, key, value)
-
- def json(self, **kwargs):
- try:
- return self.model_dump() # noqa
- except:
- # if using pydantic v1
- return self.dict()
-
-
-class Embedding(OpenAIObject):
- embedding: Union[list, str] = []
- index: int
- object: str
-
- def get(self, key, default=None):
- # Custom .get() method to access attributes with a default value if the attribute doesn't exist
- return getattr(self, key, default)
-
- def __getitem__(self, key):
- # Allow dictionary-style access to attributes
- return getattr(self, key)
-
- def __setitem__(self, key, value):
- # Allow dictionary-style assignment of attributes
- setattr(self, key, value)
-
-
-class EmbeddingResponse(OpenAIObject):
- model: Optional[str] = None
- """The model used for embedding."""
-
- data: Optional[List] = None
- """The actual embedding value"""
-
- object: str
- """The object type, which is always "embedding" """
-
- usage: Optional[Usage] = None
- """Usage statistics for the embedding request."""
-
- _hidden_params: dict = {}
-
- def __init__(
- self,
- model=None,
- usage=None,
- stream=False,
- response_ms=None,
- data=None,
- **params,
- ):
- object = "list"
- if response_ms:
- _response_ms = response_ms
- else:
- _response_ms = None
- if data:
- data = data
- else:
- data = None
-
- if usage:
- usage = usage
- else:
- usage = Usage()
-
- model = model
- super().__init__(model=model, object=object, data=data, usage=usage)
-
- def __contains__(self, key):
- # Define custom behavior for the 'in' operator
- return hasattr(self, key)
-
- def get(self, key, default=None):
- # Custom .get() method to access attributes with a default value if the attribute doesn't exist
- return getattr(self, key, default)
-
- def __getitem__(self, key):
- # Allow dictionary-style access to attributes
- return getattr(self, key)
-
- def __setitem__(self, key, value):
- # Allow dictionary-style assignment of attributes
- setattr(self, key, value)
-
- def json(self, **kwargs):
- try:
- return self.model_dump() # noqa
- except:
- # if using pydantic v1
- return self.dict()
-
-
-class Logprobs(OpenAIObject):
- text_offset: List[int]
- token_logprobs: List[float]
- tokens: List[str]
- top_logprobs: List[Dict[str, float]]
-
-
-class TextChoices(OpenAIObject):
- def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params):
- super(TextChoices, self).__init__(**params)
- if finish_reason:
- self.finish_reason = map_finish_reason(finish_reason)
- else:
- self.finish_reason = None
- self.index = index
- if text is not None:
- self.text = text
- else:
- self.text = None
- if logprobs is None:
- self.logprobs = None
- else:
- if isinstance(logprobs, dict):
- self.logprobs = Logprobs(**logprobs)
- else:
- self.logprobs = logprobs
-
- def __contains__(self, key):
- # Define custom behavior for the 'in' operator
- return hasattr(self, key)
-
- def get(self, key, default=None):
- # Custom .get() method to access attributes with a default value if the attribute doesn't exist
- return getattr(self, key, default)
-
- def __getitem__(self, key):
- # Allow dictionary-style access to attributes
- return getattr(self, key)
-
- def __setitem__(self, key, value):
- # Allow dictionary-style assignment of attributes
- setattr(self, key, value)
-
- def json(self, **kwargs):
- try:
- return self.model_dump() # noqa
- except:
- # if using pydantic v1
- return self.dict()
-
-
-class TextCompletionResponse(OpenAIObject):
- """
- {
- "id": response["id"],
- "object": "text_completion",
- "created": response["created"],
- "model": response["model"],
- "choices": [
- {
- "text": response["choices"][0]["message"]["content"],
- "index": response["choices"][0]["index"],
- "logprobs": transformed_logprobs,
- "finish_reason": response["choices"][0]["finish_reason"]
- }
- ],
- "usage": response["usage"]
- }
- """
-
- id: str
- object: str
- created: int
- model: Optional[str]
- choices: List[TextChoices]
- usage: Optional[Usage]
- _response_ms: Optional[int] = None
- _hidden_params: HiddenParams
-
- def __init__(
- self,
- id=None,
- choices=None,
- created=None,
- model=None,
- usage=None,
- stream=False,
- response_ms=None,
- object=None,
- **params,
- ):
- if stream:
- object = "text_completion.chunk"
- choices = [TextChoices()]
- else:
- object = "text_completion"
- if choices is not None and isinstance(choices, list):
- new_choices = []
- for choice in choices:
- if isinstance(choice, TextChoices):
- _new_choice = choice
- elif isinstance(choice, dict):
- _new_choice = TextChoices(**choice)
- new_choices.append(_new_choice)
- choices = new_choices
- else:
- choices = [TextChoices()]
- if object is not None:
- object = object
- if id is None:
- id = _generate_id()
- else:
- id = id
- if created is None:
- created = int(time.time())
- else:
- created = created
-
- model = model
- if usage:
- usage = usage
- else:
- usage = Usage()
-
- super(TextCompletionResponse, self).__init__(
- id=id,
- object=object,
- created=created,
- model=model,
- choices=choices,
- usage=usage,
- **params,
- )
-
- if response_ms:
- self._response_ms = response_ms
- else:
- self._response_ms = None
- self._hidden_params = HiddenParams()
-
- def __contains__(self, key):
- # Define custom behavior for the 'in' operator
- return hasattr(self, key)
-
- def get(self, key, default=None):
- # Custom .get() method to access attributes with a default value if the attribute doesn't exist
- return getattr(self, key, default)
-
- def __getitem__(self, key):
- # Allow dictionary-style access to attributes
- return getattr(self, key)
-
- def __setitem__(self, key, value):
- # Allow dictionary-style assignment of attributes
- setattr(self, key, value)
-
-
-class ImageObject(OpenAIObject):
- """
- Represents the url or the content of an image generated by the OpenAI API.
-
- Attributes:
- b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json.
- url: The URL of the generated image, if response_format is url (default).
- revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt.
-
- https://platform.openai.com/docs/api-reference/images/object
- """
-
- b64_json: Optional[str] = None
- url: Optional[str] = None
- revised_prompt: Optional[str] = None
-
- def __init__(self, b64_json=None, url=None, revised_prompt=None):
- super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt)
-
- def __contains__(self, key):
- # Define custom behavior for the 'in' operator
- return hasattr(self, key)
-
- def get(self, key, default=None):
- # Custom .get() method to access attributes with a default value if the attribute doesn't exist
- return getattr(self, key, default)
-
- def __getitem__(self, key):
- # Allow dictionary-style access to attributes
- return getattr(self, key)
-
- def __setitem__(self, key, value):
- # Allow dictionary-style assignment of attributes
- setattr(self, key, value)
-
- def json(self, **kwargs):
- try:
- return self.model_dump() # noqa
- except:
- # if using pydantic v1
- return self.dict()
-
-
-class ImageResponse(OpenAIObject):
- created: Optional[int] = None
-
- data: Optional[List[ImageObject]] = None
-
- usage: Optional[dict] = None
-
- _hidden_params: dict = {}
-
- def __init__(self, created=None, data=None, response_ms=None):
- if response_ms:
- _response_ms = response_ms
- else:
- _response_ms = None
- if data:
- data = data
- else:
- data = None
-
- if created:
- created = created
- else:
- created = None
-
- super().__init__(data=data, created=created)
- self.usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
-
- def __contains__(self, key):
- # Define custom behavior for the 'in' operator
- return hasattr(self, key)
-
- def get(self, key, default=None):
- # Custom .get() method to access attributes with a default value if the attribute doesn't exist
- return getattr(self, key, default)
-
- def __getitem__(self, key):
- # Allow dictionary-style access to attributes
- return getattr(self, key)
-
- def __setitem__(self, key, value):
- # Allow dictionary-style assignment of attributes
- setattr(self, key, value)
-
- def json(self, **kwargs):
- try:
- return self.model_dump() # noqa
- except:
- # if using pydantic v1
- return self.dict()
-
-
-class TranscriptionResponse(OpenAIObject):
- text: Optional[str] = None
-
- _hidden_params: dict = {}
-
- def __init__(self, text=None):
- super().__init__(text=text)
-
- def __contains__(self, key):
- # Define custom behavior for the 'in' operator
- return hasattr(self, key)
-
- def get(self, key, default=None):
- # Custom .get() method to access attributes with a default value if the attribute doesn't exist
- return getattr(self, key, default)
-
- def __getitem__(self, key):
- # Allow dictionary-style access to attributes
- return getattr(self, key)
-
- def __setitem__(self, key, value):
- # Allow dictionary-style assignment of attributes
- setattr(self, key, value)
-
- def json(self, **kwargs):
- try:
- return self.model_dump() # noqa
- except:
- # if using pydantic v1
- return self.dict()
-
-
############################################################
def print_verbose(
print_statement,
@@ -1156,1602 +231,6 @@ def print_verbose(
####### LOGGING ###################
-from enum import Enum
-
-
-class CallTypes(Enum):
- embedding = "embedding"
- aembedding = "aembedding"
- completion = "completion"
- acompletion = "acompletion"
- atext_completion = "atext_completion"
- text_completion = "text_completion"
- image_generation = "image_generation"
- aimage_generation = "aimage_generation"
- moderation = "moderation"
- amoderation = "amoderation"
- atranscription = "atranscription"
- transcription = "transcription"
- aspeech = "aspeech"
- speech = "speech"
-
-
-# Logging function -> log the exact model details + what's being sent | Non-BlockingP
-class Logging:
- global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, capture_exception, add_breadcrumb, lunaryLogger
-
- custom_pricing: bool = False
- stream_options = None
-
- def __init__(
- self,
- model,
- messages,
- stream,
- call_type,
- start_time,
- litellm_call_id,
- function_id,
- dynamic_success_callbacks=None,
- dynamic_failure_callbacks=None,
- dynamic_async_success_callbacks=None,
- langfuse_public_key=None,
- langfuse_secret=None,
- ):
- if call_type not in [item.value for item in CallTypes]:
- allowed_values = ", ".join([item.value for item in CallTypes])
- raise ValueError(
- f"Invalid call_type {call_type}. Allowed values: {allowed_values}"
- )
- if messages is not None:
- if isinstance(messages, str):
- messages = [
- {"role": "user", "content": messages}
- ] # convert text completion input to the chat completion format
- elif (
- isinstance(messages, list)
- and len(messages) > 0
- and isinstance(messages[0], str)
- ):
- new_messages = []
- for m in messages:
- new_messages.append({"role": "user", "content": m})
- messages = new_messages
- self.model = model
- self.messages = messages
- self.stream = stream
- self.start_time = start_time # log the call start time
- self.call_type = call_type
- self.litellm_call_id = litellm_call_id
- self.function_id = function_id
- self.streaming_chunks = [] # for generating complete stream response
- self.sync_streaming_chunks = [] # for generating complete stream response
- self.model_call_details = {}
- self.dynamic_input_callbacks = [] # [TODO] callbacks set for just that call
- self.dynamic_failure_callbacks = dynamic_failure_callbacks
- self.dynamic_success_callbacks = (
- dynamic_success_callbacks # callbacks set for just that call
- )
- self.dynamic_async_success_callbacks = (
- dynamic_async_success_callbacks # callbacks set for just that call
- )
- ## DYNAMIC LANGFUSE KEYS ##
- self.langfuse_public_key = langfuse_public_key
- self.langfuse_secret = langfuse_secret
- ## TIME TO FIRST TOKEN LOGGING ##
- self.completion_start_time: Optional[datetime.datetime] = None
-
- def update_environment_variables(
- self, model, user, optional_params, litellm_params, **additional_params
- ):
- self.optional_params = optional_params
- self.model = model
- self.user = user
- self.litellm_params = litellm_params
- self.logger_fn = litellm_params.get("logger_fn", None)
- print_verbose(f"self.optional_params: {self.optional_params}")
-
- self.model_call_details = {
- "model": self.model,
- "messages": self.messages,
- "optional_params": self.optional_params,
- "litellm_params": self.litellm_params,
- "start_time": self.start_time,
- "stream": self.stream,
- "user": user,
- "call_type": str(self.call_type),
- "litellm_call_id": self.litellm_call_id,
- "completion_start_time": self.completion_start_time,
- **self.optional_params,
- **additional_params,
- }
-
- ## check if stream options is set ## - used by CustomStreamWrapper for easy instrumentation
- if "stream_options" in additional_params:
- self.stream_options = additional_params["stream_options"]
- ## check if custom pricing set ##
- if (
- litellm_params.get("input_cost_per_token") is not None
- or litellm_params.get("input_cost_per_second") is not None
- or litellm_params.get("output_cost_per_token") is not None
- or litellm_params.get("output_cost_per_second") is not None
- ):
- self.custom_pricing = True
-
- def _pre_call(self, input, api_key, model=None, additional_args={}):
- """
- Common helper function across the sync + async pre-call function
- """
- # print_verbose(f"logging pre call for model: {self.model} with call type: {self.call_type}")
- self.model_call_details["input"] = input
- self.model_call_details["api_key"] = api_key
- self.model_call_details["additional_args"] = additional_args
- self.model_call_details["log_event_type"] = "pre_api_call"
- if (
- model
- ): # if model name was changes pre-call, overwrite the initial model call name with the new one
- self.model_call_details["model"] = model
-
- def pre_call(self, input, api_key, model=None, additional_args={}):
- # Log the exact input to the LLM API
- litellm.error_logs["PRE_CALL"] = locals()
- try:
- self._pre_call(
- input=input,
- api_key=api_key,
- model=model,
- additional_args=additional_args,
- )
-
- # User Logging -> if you pass in a custom logging function
- headers = additional_args.get("headers", {})
- if headers is None:
- headers = {}
- data = additional_args.get("complete_input_dict", {})
- api_base = additional_args.get("api_base", "")
- self.model_call_details["litellm_params"]["api_base"] = str(
- api_base
- ) # used for alerting
- masked_headers = {
- k: (
- (v[:-44] + "*" * 44)
- if (isinstance(v, str) and len(v) > 44)
- else "*****"
- )
- for k, v in headers.items()
- }
- formatted_headers = " ".join(
- [f"-H '{k}: {v}'" for k, v in masked_headers.items()]
- )
-
- verbose_logger.debug(f"PRE-API-CALL ADDITIONAL ARGS: {additional_args}")
-
- curl_command = "\n\nPOST Request Sent from LiteLLM:\n"
- curl_command += "curl -X POST \\\n"
- curl_command += f"{api_base} \\\n"
- curl_command += (
- f"{formatted_headers} \\\n" if formatted_headers.strip() != "" else ""
- )
- curl_command += f"-d '{str(data)}'\n"
- if additional_args.get("request_str", None) is not None:
- # print the sagemaker / bedrock client request
- curl_command = "\nRequest Sent from LiteLLM:\n"
- curl_command += additional_args.get("request_str", None)
- elif api_base == "":
- curl_command = self.model_call_details
-
- # only print verbose if verbose logger is not set
- if verbose_logger.level == 0:
- # this means verbose logger was not switched on - user is in litellm.set_verbose=True
- print_verbose(f"\033[92m{curl_command}\033[0m\n")
-
- if litellm.json_logs:
- verbose_logger.debug(
- "POST Request Sent from LiteLLM",
- extra={"api_base": {api_base}, **masked_headers},
- )
- else:
- verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
- # log raw request to provider (like LangFuse) -- if opted in.
- if litellm.log_raw_request_response is True:
- try:
- # [Non-blocking Extra Debug Information in metadata]
- _litellm_params = self.model_call_details.get("litellm_params", {})
- _metadata = _litellm_params.get("metadata", {}) or {}
- if (
- litellm.turn_off_message_logging is not None
- and litellm.turn_off_message_logging is True
- ):
- _metadata["raw_request"] = (
- "redacted by litellm. \
- 'litellm.turn_off_message_logging=True'"
- )
- else:
- _metadata["raw_request"] = str(curl_command)
- except Exception as e:
- _metadata["raw_request"] = (
- "Unable to Log \
- raw request: {}".format(
- str(e)
- )
- )
- if self.logger_fn and callable(self.logger_fn):
- try:
- self.logger_fn(
- self.model_call_details
- ) # Expectation: any logger function passed in by the user should accept a dict object
- except Exception as e:
- print_verbose(
- f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
- )
- # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
- callbacks = litellm.input_callback + self.dynamic_input_callbacks
- for callback in callbacks:
- try:
- if callback == "supabase":
- print_verbose("reaches supabase for logging!")
- model = self.model_call_details["model"]
- messages = self.model_call_details["input"]
- print_verbose(f"supabaseClient: {supabaseClient}")
- supabaseClient.input_log_event(
- model=model,
- messages=messages,
- end_user=self.model_call_details.get("user", "default"),
- litellm_call_id=self.litellm_params["litellm_call_id"],
- print_verbose=print_verbose,
- )
- elif callback == "sentry" and add_breadcrumb:
- try:
- details_to_log = copy.deepcopy(self.model_call_details)
- except:
- details_to_log = self.model_call_details
- if litellm.turn_off_message_logging:
- # make a copy of the _model_Call_details and log it
- details_to_log.pop("messages", None)
- details_to_log.pop("input", None)
- details_to_log.pop("prompt", None)
-
- add_breadcrumb(
- category="litellm.llm_call",
- message=f"Model Call Details pre-call: {details_to_log}",
- level="info",
- )
- elif isinstance(callback, CustomLogger): # custom logger class
- callback.log_pre_api_call(
- model=self.model,
- messages=self.messages,
- kwargs=self.model_call_details,
- )
- elif callable(callback): # custom logger functions
- customLogger.log_input_event(
- model=self.model,
- messages=self.messages,
- kwargs=self.model_call_details,
- print_verbose=print_verbose,
- callback_func=callback,
- )
- except Exception as e:
- verbose_logger.error(
- "litellm.Logging.pre_call(): Exception occured - {}".format(
- str(e)
- )
- )
- verbose_logger.debug(
- f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}"
- )
- print_verbose(
- f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
- )
- if capture_exception: # log this error to sentry for debugging
- capture_exception(e)
- except:
- print_verbose(
- f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
- )
- print_verbose(
- f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
- )
- if capture_exception: # log this error to sentry for debugging
- capture_exception(e)
-
- def post_call(
- self, original_response, input=None, api_key=None, additional_args={}
- ):
- # Log the exact result from the LLM API, for streaming - log the type of response received
- litellm.error_logs["POST_CALL"] = locals()
- if isinstance(original_response, dict):
- original_response = json.dumps(original_response)
- try:
- self.model_call_details["input"] = input
- self.model_call_details["api_key"] = api_key
- self.model_call_details["original_response"] = original_response
- self.model_call_details["additional_args"] = additional_args
- self.model_call_details["log_event_type"] = "post_api_call"
- # User Logging -> if you pass in a custom logging function
- print_verbose(
- f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n",
- log_level="DEBUG",
- )
- if self.logger_fn and callable(self.logger_fn):
- try:
- self.logger_fn(
- self.model_call_details
- ) # Expectation: any logger function passed in by the user should accept a dict object
- except Exception as e:
- print_verbose(
- f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
- )
- original_response = redact_message_input_output_from_logging(
- litellm_logging_obj=self, result=original_response
- )
- # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made
-
- callbacks = litellm.input_callback + self.dynamic_input_callbacks
- for callback in callbacks:
- try:
- if callback == "lite_debugger":
- print_verbose("reaches litedebugger for post-call logging!")
- print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
- liteDebuggerClient.post_call_log_event(
- original_response=original_response,
- litellm_call_id=self.litellm_params["litellm_call_id"],
- print_verbose=print_verbose,
- call_type=self.call_type,
- stream=self.stream,
- )
- elif callback == "sentry" and add_breadcrumb:
- print_verbose("reaches sentry breadcrumbing")
- try:
- details_to_log = copy.deepcopy(self.model_call_details)
- except:
- details_to_log = self.model_call_details
- if litellm.turn_off_message_logging:
- # make a copy of the _model_Call_details and log it
- details_to_log.pop("messages", None)
- details_to_log.pop("input", None)
- details_to_log.pop("prompt", None)
-
- add_breadcrumb(
- category="litellm.llm_call",
- message=f"Model Call Details post-call: {details_to_log}",
- level="info",
- )
- elif isinstance(callback, CustomLogger): # custom logger class
- callback.log_post_api_call(
- kwargs=self.model_call_details,
- response_obj=None,
- start_time=self.start_time,
- end_time=None,
- )
- except Exception as e:
- print_verbose(
- f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while post-call logging with integrations {traceback.format_exc()}"
- )
- print_verbose(
- f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
- )
- if capture_exception: # log this error to sentry for debugging
- capture_exception(e)
- except:
- print_verbose(
- f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
- )
- pass
-
- def _success_handler_helper_fn(
- self, result=None, start_time=None, end_time=None, cache_hit=None
- ):
- try:
- if start_time is None:
- start_time = self.start_time
- if end_time is None:
- end_time = datetime.datetime.now()
- if self.completion_start_time is None:
- self.completion_start_time = end_time
- self.model_call_details["completion_start_time"] = (
- self.completion_start_time
- )
- self.model_call_details["log_event_type"] = "successful_api_call"
- self.model_call_details["end_time"] = end_time
- self.model_call_details["cache_hit"] = cache_hit
- ## if model in model cost map - log the response cost
- ## else set cost to None
- verbose_logger.debug(f"Model={self.model};")
- if (
- result is not None
- and (
- isinstance(result, ModelResponse)
- or isinstance(result, EmbeddingResponse)
- or isinstance(result, ImageResponse)
- or isinstance(result, TranscriptionResponse)
- or isinstance(result, TextCompletionResponse)
- )
- and self.stream != True
- ): # handle streaming separately
- self.model_call_details["response_cost"] = (
- litellm.response_cost_calculator(
- response_object=result,
- model=self.model,
- cache_hit=self.model_call_details.get("cache_hit", False),
- custom_llm_provider=self.model_call_details.get(
- "custom_llm_provider", None
- ),
- base_model=_get_base_model_from_metadata(
- model_call_details=self.model_call_details
- ),
- call_type=self.call_type,
- optional_params=self.optional_params,
- )
- )
- else: # streaming chunks + image gen.
- self.model_call_details["response_cost"] = None
-
- if (
- litellm.max_budget
- and self.stream == False
- and result is not None
- and "content" in result
- ):
- time_diff = (end_time - start_time).total_seconds()
- float_diff = float(time_diff)
- litellm._current_cost += litellm.completion_cost(
- model=self.model,
- prompt="",
- completion=result["content"],
- total_time=float_diff,
- )
-
- return start_time, end_time, result
- except Exception as e:
- raise Exception(f"[Non-Blocking] LiteLLM.Success_Call Error: {str(e)}")
-
- def success_handler(
- self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
- ):
- print_verbose(f"Logging Details LiteLLM-Success Call: {cache_hit}")
- start_time, end_time, result = self._success_handler_helper_fn(
- start_time=start_time,
- end_time=end_time,
- result=result,
- cache_hit=cache_hit,
- )
- # print(f"original response in success handler: {self.model_call_details['original_response']}")
- try:
- print_verbose(f"success callbacks: {litellm.success_callback}")
- ## BUILD COMPLETE STREAMED RESPONSE
- complete_streaming_response = None
- if self.stream and isinstance(result, ModelResponse):
- if (
- result.choices[0].finish_reason is not None
- ): # if it's the last chunk
- self.sync_streaming_chunks.append(result)
- # print_verbose(f"final set of received chunks: {self.sync_streaming_chunks}")
- try:
- complete_streaming_response = litellm.stream_chunk_builder(
- self.sync_streaming_chunks,
- messages=self.model_call_details.get("messages", None),
- start_time=start_time,
- end_time=end_time,
- )
- except Exception as e:
- print_verbose(
- "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format(
- str(e), traceback.format_exc()
- ),
- log_level="ERROR",
- )
- complete_streaming_response = None
- else:
- self.sync_streaming_chunks.append(result)
-
- if complete_streaming_response is not None:
- print_verbose(
- f"Logging Details LiteLLM-Success Call streaming complete"
- )
- self.model_call_details["complete_streaming_response"] = (
- complete_streaming_response
- )
- self.model_call_details["response_cost"] = (
- litellm.response_cost_calculator(
- response_object=complete_streaming_response,
- model=self.model,
- cache_hit=self.model_call_details.get("cache_hit", False),
- custom_llm_provider=self.model_call_details.get(
- "custom_llm_provider", None
- ),
- base_model=_get_base_model_from_metadata(
- model_call_details=self.model_call_details
- ),
- call_type=self.call_type,
- optional_params=self.optional_params,
- )
- )
- if self.dynamic_success_callbacks is not None and isinstance(
- self.dynamic_success_callbacks, list
- ):
- callbacks = self.dynamic_success_callbacks
- ## keep the internal functions ##
- for callback in litellm.success_callback:
- if (
- isinstance(callback, CustomLogger)
- and "_PROXY_" in callback.__class__.__name__
- ):
- callbacks.append(callback)
- else:
- callbacks = litellm.success_callback
-
- result = redact_message_input_output_from_logging(
- result=result, litellm_logging_obj=self
- )
-
- for callback in callbacks:
- try:
- litellm_params = self.model_call_details.get("litellm_params", {})
- if litellm_params.get("no-log", False) == True:
- # proxy cost tracking cal backs should run
- if not (
- isinstance(callback, CustomLogger)
- and "_PROXY_" in callback.__class__.__name__
- ):
- print_verbose("no-log request, skipping logging")
- continue
- if callback == "lite_debugger":
- print_verbose("reaches lite_debugger for logging!")
- print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
- print_verbose(
- f"liteDebuggerClient details function {self.call_type} and stream set to {self.stream}"
- )
- liteDebuggerClient.log_event(
- end_user=kwargs.get("user", "default"),
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- litellm_call_id=self.litellm_call_id,
- print_verbose=print_verbose,
- call_type=self.call_type,
- stream=self.stream,
- )
- if callback == "promptlayer":
- print_verbose("reaches promptlayer for logging!")
- promptLayerLogger.log_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- )
- if callback == "supabase":
- print_verbose("reaches supabase for logging!")
- kwargs = self.model_call_details
-
- # this only logs streaming once, complete_streaming_response exists i.e when stream ends
- if self.stream:
- if "complete_streaming_response" not in kwargs:
- continue
- else:
- print_verbose("reaches supabase for streaming logging!")
- result = kwargs["complete_streaming_response"]
-
- model = kwargs["model"]
- messages = kwargs["messages"]
- optional_params = kwargs.get("optional_params", {})
- litellm_params = kwargs.get("litellm_params", {})
- supabaseClient.log_event(
- model=model,
- messages=messages,
- end_user=optional_params.get("user", "default"),
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- litellm_call_id=litellm_params.get(
- "litellm_call_id", str(uuid.uuid4())
- ),
- print_verbose=print_verbose,
- )
- if callback == "wandb":
- print_verbose("reaches wandb for logging!")
- weightsBiasesLogger.log_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- )
- if callback == "langsmith":
- print_verbose("reaches langsmith for logging!")
- if self.stream:
- if "complete_streaming_response" not in kwargs:
- continue
- else:
- print_verbose(
- "reaches langsmith for streaming logging!"
- )
- result = kwargs["complete_streaming_response"]
- langsmithLogger.log_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- )
- if callback == "logfire":
- global logfireLogger
- verbose_logger.debug("reaches logfire for success logging!")
- kwargs = {}
- for k, v in self.model_call_details.items():
- if (
- k != "original_response"
- ): # copy.deepcopy raises errors as this could be a coroutine
- kwargs[k] = v
-
- # this only logs streaming once, complete_streaming_response exists i.e when stream ends
- if self.stream:
- if "complete_streaming_response" not in kwargs:
- continue
- else:
- print_verbose("reaches logfire for streaming logging!")
- result = kwargs["complete_streaming_response"]
-
- logfireLogger.log_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- level=LogfireLevel.INFO.value,
- )
-
- if callback == "lunary":
- print_verbose("reaches lunary for logging!")
- model = self.model
- kwargs = self.model_call_details
-
- input = kwargs.get("messages", kwargs.get("input", None))
-
- type = (
- "embed"
- if self.call_type == CallTypes.embedding.value
- else "llm"
- )
-
- # this only logs streaming once, complete_streaming_response exists i.e when stream ends
- if self.stream:
- if "complete_streaming_response" not in kwargs:
- continue
- else:
- result = kwargs["complete_streaming_response"]
-
- lunaryLogger.log_event(
- type=type,
- kwargs=kwargs,
- event="end",
- model=model,
- input=input,
- user_id=kwargs.get("user", None),
- # user_props=self.model_call_details.get("user_props", None),
- extra=kwargs.get("optional_params", {}),
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- run_id=self.litellm_call_id,
- print_verbose=print_verbose,
- )
- if callback == "helicone":
- print_verbose("reaches helicone for logging!")
- model = self.model
- messages = self.model_call_details["input"]
- heliconeLogger.log_success(
- model=model,
- messages=messages,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- )
- if callback == "langfuse":
- global langFuseLogger
- verbose_logger.debug("reaches langfuse for success logging!")
- kwargs = {}
- for k, v in self.model_call_details.items():
- if (
- k != "original_response"
- ): # copy.deepcopy raises errors as this could be a coroutine
- kwargs[k] = v
- # this only logs streaming once, complete_streaming_response exists i.e when stream ends
- if self.stream:
- verbose_logger.debug(
- f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
- )
- if complete_streaming_response is None:
- continue
- else:
- print_verbose("reaches langfuse for streaming logging!")
- result = kwargs["complete_streaming_response"]
- if langFuseLogger is None or (
- (
- self.langfuse_public_key is not None
- and self.langfuse_public_key
- != langFuseLogger.public_key
- )
- and (
- self.langfuse_public_key is not None
- and self.langfuse_public_key
- != langFuseLogger.public_key
- )
- ):
- langFuseLogger = LangFuseLogger(
- langfuse_public_key=self.langfuse_public_key,
- langfuse_secret=self.langfuse_secret,
- )
- langFuseLogger.log_event(
- kwargs=kwargs,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- user_id=kwargs.get("user", None),
- print_verbose=print_verbose,
- )
- if callback == "datadog":
- global dataDogLogger
- verbose_logger.debug("reaches datadog for success logging!")
- kwargs = {}
- for k, v in self.model_call_details.items():
- if (
- k != "original_response"
- ): # copy.deepcopy raises errors as this could be a coroutine
- kwargs[k] = v
- # this only logs streaming once, complete_streaming_response exists i.e when stream ends
- if self.stream:
- verbose_logger.debug(
- f"datadog: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
- )
- if complete_streaming_response is None:
- continue
- else:
- print_verbose("reaches datadog for streaming logging!")
- result = kwargs["complete_streaming_response"]
- dataDogLogger.log_event(
- kwargs=kwargs,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- user_id=kwargs.get("user", None),
- print_verbose=print_verbose,
- )
- if callback == "prometheus":
- global prometheusLogger
- verbose_logger.debug("reaches prometheus for success logging!")
- kwargs = {}
- for k, v in self.model_call_details.items():
- if (
- k != "original_response"
- ): # copy.deepcopy raises errors as this could be a coroutine
- kwargs[k] = v
- # this only logs streaming once, complete_streaming_response exists i.e when stream ends
- if self.stream:
- verbose_logger.debug(
- f"prometheus: is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
- )
- if complete_streaming_response is None:
- continue
- else:
- print_verbose(
- "reaches prometheus for streaming logging!"
- )
- result = kwargs["complete_streaming_response"]
- prometheusLogger.log_event(
- kwargs=kwargs,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- user_id=kwargs.get("user", None),
- print_verbose=print_verbose,
- )
- if callback == "generic":
- global genericAPILogger
- verbose_logger.debug("reaches langfuse for success logging!")
- kwargs = {}
- for k, v in self.model_call_details.items():
- if (
- k != "original_response"
- ): # copy.deepcopy raises errors as this could be a coroutine
- kwargs[k] = v
- # this only logs streaming once, complete_streaming_response exists i.e when stream ends
- if self.stream:
- verbose_logger.debug(
- f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
- )
- if complete_streaming_response is None:
- continue
- else:
- print_verbose("reaches langfuse for streaming logging!")
- result = kwargs["complete_streaming_response"]
- if genericAPILogger is None:
- genericAPILogger = GenericAPILogger()
- genericAPILogger.log_event(
- kwargs=kwargs,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- user_id=kwargs.get("user", None),
- print_verbose=print_verbose,
- )
- if callback == "clickhouse":
- global clickHouseLogger
- verbose_logger.debug("reaches clickhouse for success logging!")
- kwargs = {}
- for k, v in self.model_call_details.items():
- if (
- k != "original_response"
- ): # copy.deepcopy raises errors as this could be a coroutine
- kwargs[k] = v
- # this only logs streaming once, complete_streaming_response exists i.e when stream ends
- if self.stream:
- verbose_logger.debug(
- f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
- )
- if complete_streaming_response is None:
- continue
- else:
- print_verbose(
- "reaches clickhouse for streaming logging!"
- )
- result = kwargs["complete_streaming_response"]
- if clickHouseLogger is None:
- clickHouseLogger = ClickhouseLogger()
- clickHouseLogger.log_event(
- kwargs=kwargs,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- user_id=kwargs.get("user", None),
- print_verbose=print_verbose,
- )
- if callback == "greenscale":
- kwargs = {}
- for k, v in self.model_call_details.items():
- if (
- k != "original_response"
- ): # copy.deepcopy raises errors as this could be a coroutine
- kwargs[k] = v
- # this only logs streaming once, complete_streaming_response exists i.e when stream ends
- if self.stream:
- verbose_logger.debug(
- f"is complete_streaming_response in kwargs: {kwargs.get('complete_streaming_response', None)}"
- )
- if complete_streaming_response is None:
- continue
- else:
- print_verbose(
- "reaches greenscale for streaming logging!"
- )
- result = kwargs["complete_streaming_response"]
-
- greenscaleLogger.log_event(
- kwargs=kwargs,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- )
- if callback == "cache" and litellm.cache is not None:
- # this only logs streaming once, complete_streaming_response exists i.e when stream ends
- print_verbose("success_callback: reaches cache for logging!")
- kwargs = self.model_call_details
- if self.stream:
- if "complete_streaming_response" not in kwargs:
- print_verbose(
- f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
- )
- pass
- else:
- print_verbose(
- "success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
- )
- result = kwargs["complete_streaming_response"]
- # only add to cache once we have a complete streaming response
- litellm.cache.add_cache(result, **kwargs)
- if callback == "athina":
- deep_copy = {}
- for k, v in self.model_call_details.items():
- deep_copy[k] = v
- athinaLogger.log_event(
- kwargs=deep_copy,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- )
- if callback == "traceloop":
- deep_copy = {}
- for k, v in self.model_call_details.items():
- if k != "original_response":
- deep_copy[k] = v
- traceloopLogger.log_event(
- kwargs=deep_copy,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- user_id=kwargs.get("user", None),
- print_verbose=print_verbose,
- )
- if callback == "s3":
- global s3Logger
- if s3Logger is None:
- s3Logger = S3Logger()
- if self.stream:
- if "complete_streaming_response" in self.model_call_details:
- print_verbose(
- "S3Logger Logger: Got Stream Event - Completed Stream Response"
- )
- s3Logger.log_event(
- kwargs=self.model_call_details,
- response_obj=self.model_call_details[
- "complete_streaming_response"
- ],
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- )
- else:
- print_verbose(
- "S3Logger Logger: Got Stream Event - No complete stream response as yet"
- )
- else:
- s3Logger.log_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- )
- if (
- callback == "openmeter"
- and self.model_call_details.get("litellm_params", {}).get(
- "acompletion", False
- )
- == False
- and self.model_call_details.get("litellm_params", {}).get(
- "aembedding", False
- )
- == False
- and self.model_call_details.get("litellm_params", {}).get(
- "aimage_generation", False
- )
- == False
- and self.model_call_details.get("litellm_params", {}).get(
- "atranscription", False
- )
- == False
- ):
- global openMeterLogger
- if openMeterLogger is None:
- print_verbose("Instantiates openmeter client")
- openMeterLogger = OpenMeterLogger()
- if self.stream and complete_streaming_response is None:
- openMeterLogger.log_stream_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- )
- else:
- if self.stream and complete_streaming_response:
- self.model_call_details["complete_response"] = (
- self.model_call_details.get(
- "complete_streaming_response", {}
- )
- )
- result = self.model_call_details["complete_response"]
- openMeterLogger.log_success_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- )
-
- if (
- isinstance(callback, CustomLogger)
- and self.model_call_details.get("litellm_params", {}).get(
- "acompletion", False
- )
- == False
- and self.model_call_details.get("litellm_params", {}).get(
- "aembedding", False
- )
- == False
- and self.model_call_details.get("litellm_params", {}).get(
- "aimage_generation", False
- )
- == False
- and self.model_call_details.get("litellm_params", {}).get(
- "atranscription", False
- )
- == False
- ): # custom logger class
- if self.stream and complete_streaming_response is None:
- callback.log_stream_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- )
- else:
- if self.stream and complete_streaming_response:
- self.model_call_details["complete_response"] = (
- self.model_call_details.get(
- "complete_streaming_response", {}
- )
- )
- result = self.model_call_details["complete_response"]
- callback.log_success_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- )
- if (
- callable(callback) == True
- and self.model_call_details.get("litellm_params", {}).get(
- "acompletion", False
- )
- == False
- and self.model_call_details.get("litellm_params", {}).get(
- "aembedding", False
- )
- == False
- and self.model_call_details.get("litellm_params", {}).get(
- "aimage_generation", False
- )
- == False
- and self.model_call_details.get("litellm_params", {}).get(
- "atranscription", False
- )
- == False
- ): # custom logger functions
- print_verbose(
- f"success callbacks: Running Custom Callback Function"
- )
- customLogger.log_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- callback_func=callback,
- )
-
- except Exception as e:
- print_verbose(
- f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging with integrations {traceback.format_exc()}"
- )
- print_verbose(
- f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
- )
- if capture_exception: # log this error to sentry for debugging
- capture_exception(e)
- except:
- print_verbose(
- "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {}\n{}".format(
- str(e), traceback.format_exc()
- ),
- log_level="ERROR",
- )
- pass
-
- async def async_success_handler(
- self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
- ):
- """
- Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions.
- """
- print_verbose("Logging Details LiteLLM-Async Success Call")
- start_time, end_time, result = self._success_handler_helper_fn(
- start_time=start_time, end_time=end_time, result=result, cache_hit=cache_hit
- )
- ## BUILD COMPLETE STREAMED RESPONSE
- complete_streaming_response = None
- if self.stream:
- if result.choices[0].finish_reason is not None: # if it's the last chunk
- self.streaming_chunks.append(result)
- # verbose_logger.debug(f"final set of received chunks: {self.streaming_chunks}")
- try:
- complete_streaming_response = litellm.stream_chunk_builder(
- self.streaming_chunks,
- messages=self.model_call_details.get("messages", None),
- start_time=start_time,
- end_time=end_time,
- )
- except Exception as e:
- print_verbose(
- "Error occurred building stream chunk in success logging: {}\n{}".format(
- str(e), traceback.format_exc()
- ),
- log_level="ERROR",
- )
- complete_streaming_response = None
- else:
- self.streaming_chunks.append(result)
- if complete_streaming_response is not None:
- print_verbose("Async success callbacks: Got a complete streaming response")
- self.model_call_details["async_complete_streaming_response"] = (
- complete_streaming_response
- )
- try:
- if self.model_call_details.get("cache_hit", False) is True:
- self.model_call_details["response_cost"] = 0.0
- else:
- # check if base_model set on azure
- base_model = _get_base_model_from_metadata(
- model_call_details=self.model_call_details
- )
- # base_model defaults to None if not set on model_info
- self.model_call_details["response_cost"] = litellm.completion_cost(
- completion_response=complete_streaming_response,
- model=base_model,
- )
- verbose_logger.debug(
- f"Model={self.model}; cost={self.model_call_details['response_cost']}"
- )
- except litellm.NotFoundError as e:
- verbose_logger.error(
- f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None"
- )
- self.model_call_details["response_cost"] = None
-
- if self.dynamic_async_success_callbacks is not None and isinstance(
- self.dynamic_async_success_callbacks, list
- ):
- callbacks = self.dynamic_async_success_callbacks
- ## keep the internal functions ##
- for callback in litellm._async_success_callback:
- callback_name = ""
- if isinstance(callback, CustomLogger):
- callback_name = callback.__class__.__name__
- if callable(callback):
- callback_name = callback.__name__
- if "_PROXY_" in callback_name:
- callbacks.append(callback)
- else:
- callbacks = litellm._async_success_callback
-
- result = redact_message_input_output_from_logging(
- result=result, litellm_logging_obj=self
- )
-
- for callback in callbacks:
- # check if callback can run for this request
- litellm_params = self.model_call_details.get("litellm_params", {})
- if litellm_params.get("no-log", False) == True:
- # proxy cost tracking cal backs should run
- if not (
- isinstance(callback, CustomLogger)
- and "_PROXY_" in callback.__class__.__name__
- ):
- print_verbose("no-log request, skipping logging")
- continue
- try:
- if kwargs.get("no-log", False) == True:
- print_verbose("no-log request, skipping logging")
- continue
- if callback == "cache" and litellm.cache is not None:
- # set_cache once complete streaming response is built
- print_verbose("async success_callback: reaches cache for logging!")
- kwargs = self.model_call_details
- if self.stream:
- if "async_complete_streaming_response" not in kwargs:
- print_verbose(
- f"async success_callback: reaches cache for logging, there is no async_complete_streaming_response. Kwargs={kwargs}\n\n"
- )
- pass
- else:
- print_verbose(
- "async success_callback: reaches cache for logging, there is a async_complete_streaming_response. Adding to cache"
- )
- result = kwargs["async_complete_streaming_response"]
- # only add to cache once we have a complete streaming response
- if litellm.cache is not None and not isinstance(
- litellm.cache.cache, S3Cache
- ):
- await litellm.cache.async_add_cache(result, **kwargs)
- else:
- litellm.cache.add_cache(result, **kwargs)
- if callback == "openmeter":
- global openMeterLogger
- if self.stream == True:
- if (
- "async_complete_streaming_response"
- in self.model_call_details
- ):
- await openMeterLogger.async_log_success_event(
- kwargs=self.model_call_details,
- response_obj=self.model_call_details[
- "async_complete_streaming_response"
- ],
- start_time=start_time,
- end_time=end_time,
- )
- else:
- await openMeterLogger.async_log_stream_event( # [TODO]: move this to being an async log stream event function
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- )
- else:
- await openMeterLogger.async_log_success_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- )
- if isinstance(callback, CustomLogger): # custom logger class
- if self.stream == True:
- if (
- "async_complete_streaming_response"
- in self.model_call_details
- ):
- await callback.async_log_success_event(
- kwargs=self.model_call_details,
- response_obj=self.model_call_details[
- "async_complete_streaming_response"
- ],
- start_time=start_time,
- end_time=end_time,
- )
- else:
- await callback.async_log_stream_event( # [TODO]: move this to being an async log stream event function
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- )
- else:
- await callback.async_log_success_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- )
- if callable(callback): # custom logger functions
- if self.stream:
- if (
- "async_complete_streaming_response"
- in self.model_call_details
- ):
- await customLogger.async_log_event(
- kwargs=self.model_call_details,
- response_obj=self.model_call_details[
- "async_complete_streaming_response"
- ],
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- callback_func=callback,
- )
- else:
- await customLogger.async_log_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- callback_func=callback,
- )
- if callback == "dynamodb":
- global dynamoLogger
- if dynamoLogger is None:
- dynamoLogger = DyanmoDBLogger()
- if self.stream:
- if (
- "async_complete_streaming_response"
- in self.model_call_details
- ):
- print_verbose(
- "DynamoDB Logger: Got Stream Event - Completed Stream Response"
- )
- await dynamoLogger._async_log_event(
- kwargs=self.model_call_details,
- response_obj=self.model_call_details[
- "async_complete_streaming_response"
- ],
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- )
- else:
- print_verbose(
- "DynamoDB Logger: Got Stream Event - No complete stream response as yet"
- )
- else:
- await dynamoLogger._async_log_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- )
- except Exception as e:
- verbose_logger.error(
- f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
- )
- pass
-
- def _failure_handler_helper_fn(
- self, exception, traceback_exception, start_time=None, end_time=None
- ):
- if start_time is None:
- start_time = self.start_time
- if end_time is None:
- end_time = datetime.datetime.now()
-
- # on some exceptions, model_call_details is not always initialized, this ensures that we still log those exceptions
- if not hasattr(self, "model_call_details"):
- self.model_call_details = {}
-
- self.model_call_details["log_event_type"] = "failed_api_call"
- self.model_call_details["exception"] = exception
- self.model_call_details["traceback_exception"] = traceback_exception
- self.model_call_details["end_time"] = end_time
- self.model_call_details.setdefault("original_response", None)
- return start_time, end_time
-
- def failure_handler(
- self, exception, traceback_exception, start_time=None, end_time=None
- ):
- print_verbose(
- f"Logging Details LiteLLM-Failure Call: {litellm.failure_callback}"
- )
- try:
- start_time, end_time = self._failure_handler_helper_fn(
- exception=exception,
- traceback_exception=traceback_exception,
- start_time=start_time,
- end_time=end_time,
- )
- callbacks = [] # init this to empty incase it's not created
-
- if self.dynamic_failure_callbacks is not None and isinstance(
- self.dynamic_failure_callbacks, list
- ):
- callbacks = self.dynamic_failure_callbacks
- ## keep the internal functions ##
- for callback in litellm.failure_callback:
- if (
- isinstance(callback, CustomLogger)
- and "_PROXY_" in callback.__class__.__name__
- ):
- callbacks.append(callback)
- else:
- callbacks = litellm.failure_callback
-
- result = None # result sent to all loggers, init this to None incase it's not created
-
- result = redact_message_input_output_from_logging(
- result=result, litellm_logging_obj=self
- )
- for callback in callbacks:
- try:
- if callback == "lite_debugger":
- print_verbose("reaches lite_debugger for logging!")
- print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
- result = {
- "model": self.model,
- "created": time.time(),
- "error": traceback_exception,
- "usage": {
- "prompt_tokens": prompt_token_calculator(
- self.model, messages=self.messages
- ),
- "completion_tokens": 0,
- },
- }
- liteDebuggerClient.log_event(
- model=self.model,
- messages=self.messages,
- end_user=self.model_call_details.get("user", "default"),
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- litellm_call_id=self.litellm_call_id,
- print_verbose=print_verbose,
- call_type=self.call_type,
- stream=self.stream,
- )
- if callback == "lunary":
- print_verbose("reaches lunary for logging error!")
-
- model = self.model
-
- input = self.model_call_details["input"]
-
- _type = (
- "embed"
- if self.call_type == CallTypes.embedding.value
- else "llm"
- )
-
- lunaryLogger.log_event(
- type=_type,
- event="error",
- user_id=self.model_call_details.get("user", "default"),
- model=model,
- input=input,
- error=traceback_exception,
- run_id=self.litellm_call_id,
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- )
- if callback == "sentry":
- print_verbose("sending exception to sentry")
- if capture_exception:
- capture_exception(exception)
- else:
- print_verbose(
- f"capture exception not initialized: {capture_exception}"
- )
- if callable(callback): # custom logger functions
- customLogger.log_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- callback_func=callback,
- )
- if (
- isinstance(callback, CustomLogger)
- and self.model_call_details.get("litellm_params", {}).get(
- "acompletion", False
- )
- == False
- and self.model_call_details.get("litellm_params", {}).get(
- "aembedding", False
- )
- == False
- ): # custom logger class
- callback.log_failure_event(
- start_time=start_time,
- end_time=end_time,
- response_obj=result,
- kwargs=self.model_call_details,
- )
- if callback == "langfuse":
- global langFuseLogger
- verbose_logger.debug("reaches langfuse for logging failure")
- kwargs = {}
- for k, v in self.model_call_details.items():
- if (
- k != "original_response"
- ): # copy.deepcopy raises errors as this could be a coroutine
- kwargs[k] = v
- # this only logs streaming once, complete_streaming_response exists i.e when stream ends
- if langFuseLogger is None or (
- (
- self.langfuse_public_key is not None
- and self.langfuse_public_key
- != langFuseLogger.public_key
- )
- and (
- self.langfuse_public_key is not None
- and self.langfuse_public_key
- != langFuseLogger.public_key
- )
- ):
- langFuseLogger = LangFuseLogger(
- langfuse_public_key=self.langfuse_public_key,
- langfuse_secret=self.langfuse_secret,
- )
- langFuseLogger.log_event(
- start_time=start_time,
- end_time=end_time,
- response_obj=None,
- user_id=kwargs.get("user", None),
- print_verbose=print_verbose,
- status_message=str(exception),
- level="ERROR",
- kwargs=self.model_call_details,
- )
- if callback == "traceloop":
- traceloopLogger.log_event(
- start_time=start_time,
- end_time=end_time,
- response_obj=None,
- user_id=kwargs.get("user", None),
- print_verbose=print_verbose,
- status_message=str(exception),
- level="ERROR",
- kwargs=self.model_call_details,
- )
- if callback == "prometheus":
- global prometheusLogger
- verbose_logger.debug("reaches prometheus for success logging!")
- kwargs = {}
- for k, v in self.model_call_details.items():
- if (
- k != "original_response"
- ): # copy.deepcopy raises errors as this could be a coroutine
- kwargs[k] = v
- kwargs["exception"] = str(exception)
- prometheusLogger.log_event(
- kwargs=kwargs,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- user_id=kwargs.get("user", None),
- print_verbose=print_verbose,
- )
-
- if callback == "logfire":
- global logfireLogger
- verbose_logger.debug("reaches logfire for failure logging!")
- kwargs = {}
- for k, v in self.model_call_details.items():
- if (
- k != "original_response"
- ): # copy.deepcopy raises errors as this could be a coroutine
- kwargs[k] = v
- kwargs["exception"] = exception
-
- logfireLogger.log_event(
- kwargs=kwargs,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- level=LogfireLevel.ERROR.value,
- print_verbose=print_verbose,
- )
- except Exception as e:
- print_verbose(
- f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}"
- )
- print_verbose(
- f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
- )
- if capture_exception: # log this error to sentry for debugging
- capture_exception(e)
- except Exception as e:
- print_verbose(
- f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging {traceback.format_exc()}"
- )
- pass
-
- async def async_failure_handler(
- self, exception, traceback_exception, start_time=None, end_time=None
- ):
- """
- Implementing async callbacks, to handle asyncio event loop issues when custom integrations need to use async functions.
- """
- start_time, end_time = self._failure_handler_helper_fn(
- exception=exception,
- traceback_exception=traceback_exception,
- start_time=start_time,
- end_time=end_time,
- )
- result = None # result sent to all loggers, init this to None incase it's not created
- for callback in litellm._async_failure_callback:
- try:
- if isinstance(callback, CustomLogger): # custom logger class
- await callback.async_log_failure_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- ) # type: ignore
- if callable(callback): # custom logger functions
- await customLogger.async_log_event(
- kwargs=self.model_call_details,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- callback_func=callback,
- )
- except Exception as e:
- print_verbose(
- f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
- )
def exception_logging(
@@ -2848,6 +327,11 @@ def _init_custom_logger_compatible_class(
def function_setup(
original_function: str, rules_obj, start_time, *args, **kwargs
): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
+ ### NOTICES ###
+ if litellm.set_verbose is True:
+ verbose_logger.warning(
+ "`litellm.set_verbose` is deprecated. Please set `os.environ['LITELLM_LOG'] = 'DEBUG'` for debug logs."
+ )
try:
global callback_list, add_breadcrumb, user_logger_fn, Logging
function_id = kwargs["id"] if "id" in kwargs else None
@@ -2888,7 +372,9 @@ def function_setup(
+ litellm.failure_callback
)
)
- set_callbacks(callback_list=callback_list, function_id=function_id)
+ litellm.litellm_core_utils.litellm_logging.set_callbacks(
+ callback_list=callback_list, function_id=function_id
+ )
## ASYNC CALLBACKS
if len(litellm.input_callback) > 0:
removed_async_items = []
@@ -3033,7 +519,7 @@ def function_setup(
):
messages = kwargs.get("input", "speech")
stream = True if "stream" in kwargs and kwargs["stream"] == True else False
- logging_obj = Logging(
+ logging_obj = litellm.litellm_core_utils.litellm_logging.Logging(
model=model,
messages=messages,
stream=stream,
@@ -3191,7 +677,7 @@ def client(original_function):
)
if previous_models is not None:
if litellm.num_retries_per_request <= len(previous_models):
- raise Exception(f"Max retries per request hit!")
+ raise Exception("Max retries per request hit!")
# [OPTIONAL] CHECK CACHE
print_verbose(
@@ -3451,11 +937,6 @@ def client(original_function):
logging_obj.failure_handler(
e, traceback_exception, start_time, end_time
) # DO NOT MAKE THREADED - router retry fallback relies on this!
- my_thread = threading.Thread(
- target=handle_failure,
- args=(e, traceback_exception, start_time, end_time, args, kwargs),
- ) # don't interrupt execution of main thread
- my_thread.start()
if hasattr(e, "message"):
if (
liteDebuggerClient and liteDebuggerClient.dashboard_url != None
@@ -4323,229 +1804,6 @@ def token_counter(
return num_tokens
-def _cost_per_token_custom_pricing_helper(
- prompt_tokens=0,
- completion_tokens=0,
- response_time_ms=None,
- ### CUSTOM PRICING ###
- custom_cost_per_token: Optional[CostPerToken] = None,
- custom_cost_per_second: Optional[float] = None,
-) -> Optional[Tuple[float, float]]:
- """Internal helper function for calculating cost, if custom pricing given"""
- if custom_cost_per_token is None and custom_cost_per_second is None:
- return None
-
- if custom_cost_per_token is not None:
- input_cost = custom_cost_per_token["input_cost_per_token"] * prompt_tokens
- output_cost = custom_cost_per_token["output_cost_per_token"] * completion_tokens
- return input_cost, output_cost
- elif custom_cost_per_second is not None:
- output_cost = custom_cost_per_second * response_time_ms / 1000 # type: ignore
- return 0, output_cost
-
- return None
-
-
-def cost_per_token(
- model: str = "",
- prompt_tokens=0,
- completion_tokens=0,
- response_time_ms=None,
- custom_llm_provider=None,
- region_name=None,
- ### CUSTOM PRICING ###
- custom_cost_per_token: Optional[CostPerToken] = None,
- custom_cost_per_second: Optional[float] = None,
-) -> Tuple[float, float]:
- """
- Calculates the cost per token for a given model, prompt tokens, and completion tokens.
-
- Parameters:
- model (str): The name of the model to use. Default is ""
- prompt_tokens (int): The number of tokens in the prompt.
- completion_tokens (int): The number of tokens in the completion.
- response_time (float): The amount of time, in milliseconds, it took the call to complete.
- custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
- custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
- custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
-
- Returns:
- tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
- """
- if model is None:
- raise Exception("Invalid arg. Model cannot be none.")
- ## CUSTOM PRICING ##
- response_cost = _cost_per_token_custom_pricing_helper(
- prompt_tokens=prompt_tokens,
- completion_tokens=completion_tokens,
- response_time_ms=response_time_ms,
- custom_cost_per_second=custom_cost_per_second,
- custom_cost_per_token=custom_cost_per_token,
- )
- if response_cost is not None:
- return response_cost[0], response_cost[1]
-
- # given
- prompt_tokens_cost_usd_dollar: float = 0
- completion_tokens_cost_usd_dollar: float = 0
- model_cost_ref = litellm.model_cost
- model_with_provider = model
- if custom_llm_provider is not None:
- model_with_provider = custom_llm_provider + "/" + model
- if region_name is not None:
- model_with_provider_and_region = (
- f"{custom_llm_provider}/{region_name}/{model}"
- )
- if (
- model_with_provider_and_region in model_cost_ref
- ): # use region based pricing, if it's available
- model_with_provider = model_with_provider_and_region
-
- model_without_prefix = model
- model_parts = model.split("/")
- if len(model_parts) > 1:
- model_without_prefix = model_parts[1]
- else:
- model_without_prefix = model
- """
- Code block that formats model to lookup in litellm.model_cost
- Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1
- Option2. model = "openai/gpt-4" - model = provider/model
- Option3. model = "anthropic.claude-3" - model = model
- """
- if (
- model_with_provider in model_cost_ref
- ): # Option 2. use model with provider, model = "openai/gpt-4"
- model = model_with_provider
- elif model in model_cost_ref: # Option 1. use model passed, model="gpt-4"
- model = model
- elif (
- model_without_prefix in model_cost_ref
- ): # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3"
- model = model_without_prefix
-
- # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
- print_verbose(f"Looking up model={model} in model_cost_map")
- if model in model_cost_ref:
- print_verbose(f"Success: model={model} in model_cost_map")
- print_verbose(
- f"prompt_tokens={prompt_tokens}; completion_tokens={completion_tokens}"
- )
- if (
- model_cost_ref[model].get("input_cost_per_token", None) is not None
- and model_cost_ref[model].get("output_cost_per_token", None) is not None
- ):
- ## COST PER TOKEN ##
- prompt_tokens_cost_usd_dollar = (
- model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
- )
- completion_tokens_cost_usd_dollar = (
- model_cost_ref[model]["output_cost_per_token"] * completion_tokens
- )
- elif (
- model_cost_ref[model].get("output_cost_per_second", None) is not None
- and response_time_ms is not None
- ):
- print_verbose(
- f"For model={model} - output_cost_per_second: {model_cost_ref[model].get('output_cost_per_second')}; response time: {response_time_ms}"
- )
- ## COST PER SECOND ##
- prompt_tokens_cost_usd_dollar = 0
- completion_tokens_cost_usd_dollar = (
- model_cost_ref[model]["output_cost_per_second"]
- * response_time_ms
- / 1000
- )
- elif (
- model_cost_ref[model].get("input_cost_per_second", None) is not None
- and response_time_ms is not None
- ):
- print_verbose(
- f"For model={model} - input_cost_per_second: {model_cost_ref[model].get('input_cost_per_second')}; response time: {response_time_ms}"
- )
- ## COST PER SECOND ##
- prompt_tokens_cost_usd_dollar = (
- model_cost_ref[model]["input_cost_per_second"] * response_time_ms / 1000
- )
- completion_tokens_cost_usd_dollar = 0.0
- print_verbose(
- f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
- )
- return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
- elif "ft:gpt-3.5-turbo" in model:
- print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
- # fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
- prompt_tokens_cost_usd_dollar = (
- model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
- )
- completion_tokens_cost_usd_dollar = (
- model_cost_ref["ft:gpt-3.5-turbo"]["output_cost_per_token"]
- * completion_tokens
- )
- return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
- elif "ft:davinci-002" in model:
- print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
- # fuzzy match ft:davinci-002:abcd-id-cool-litellm
- prompt_tokens_cost_usd_dollar = (
- model_cost_ref["ft:davinci-002"]["input_cost_per_token"] * prompt_tokens
- )
- completion_tokens_cost_usd_dollar = (
- model_cost_ref["ft:davinci-002"]["output_cost_per_token"]
- * completion_tokens
- )
- return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
- elif "ft:babbage-002" in model:
- print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
- # fuzzy match ft:babbage-002:abcd-id-cool-litellm
- prompt_tokens_cost_usd_dollar = (
- model_cost_ref["ft:babbage-002"]["input_cost_per_token"] * prompt_tokens
- )
- completion_tokens_cost_usd_dollar = (
- model_cost_ref["ft:babbage-002"]["output_cost_per_token"]
- * completion_tokens
- )
- return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
- elif model in litellm.azure_llms:
- verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM")
- model = litellm.azure_llms[model]
- verbose_logger.debug(
- f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
- )
- prompt_tokens_cost_usd_dollar = (
- model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
- )
- verbose_logger.debug(
- f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}"
- )
- completion_tokens_cost_usd_dollar = (
- model_cost_ref[model]["output_cost_per_token"] * completion_tokens
- )
- return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
- elif model in litellm.azure_embedding_models:
- verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model")
- model = litellm.azure_embedding_models[model]
- prompt_tokens_cost_usd_dollar = (
- model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
- )
- completion_tokens_cost_usd_dollar = (
- model_cost_ref[model]["output_cost_per_token"] * completion_tokens
- )
- return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
- else:
- # if model is not in model_prices_and_context_window.json. Raise an exception-let users know
- error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
- raise litellm.exceptions.NotFoundError( # type: ignore
- message=error_str,
- model=model,
- response=httpx.Response(
- status_code=404,
- content=error_str,
- request=httpx.Request(method="cost_per_token", url="https://github.com/BerriAI/litellm"), # type: ignore
- ),
- llm_provider="",
- )
-
-
def supports_httpx_timeout(custom_llm_provider: str) -> bool:
"""
Helper function to know if a provider implementation supports httpx timeout
@@ -7513,264 +4771,6 @@ def validate_environment(model: Optional[str] = None) -> dict:
return {"keys_in_environment": keys_in_environment, "missing_keys": missing_keys}
-def set_callbacks(callback_list, function_id=None):
- global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, logfireLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger, greenscaleLogger, openMeterLogger
-
- try:
- for callback in callback_list:
- print_verbose(f"init callback list: {callback}")
- if callback == "sentry":
- try:
- import sentry_sdk
- except ImportError:
- print_verbose("Package 'sentry_sdk' is missing. Installing it...")
- subprocess.check_call(
- [sys.executable, "-m", "pip", "install", "sentry_sdk"]
- )
- import sentry_sdk
- sentry_sdk_instance = sentry_sdk
- sentry_trace_rate = (
- os.environ.get("SENTRY_API_TRACE_RATE")
- if "SENTRY_API_TRACE_RATE" in os.environ
- else "1.0"
- )
- sentry_sdk_instance.init(
- dsn=os.environ.get("SENTRY_DSN"),
- traces_sample_rate=float(sentry_trace_rate),
- )
- capture_exception = sentry_sdk_instance.capture_exception
- add_breadcrumb = sentry_sdk_instance.add_breadcrumb
- elif callback == "posthog":
- try:
- from posthog import Posthog
- except ImportError:
- print_verbose("Package 'posthog' is missing. Installing it...")
- subprocess.check_call(
- [sys.executable, "-m", "pip", "install", "posthog"]
- )
- from posthog import Posthog
- posthog = Posthog(
- project_api_key=os.environ.get("POSTHOG_API_KEY"),
- host=os.environ.get("POSTHOG_API_URL"),
- )
- elif callback == "slack":
- try:
- from slack_bolt import App
- except ImportError:
- print_verbose("Package 'slack_bolt' is missing. Installing it...")
- subprocess.check_call(
- [sys.executable, "-m", "pip", "install", "slack_bolt"]
- )
- from slack_bolt import App
- slack_app = App(
- token=os.environ.get("SLACK_API_TOKEN"),
- signing_secret=os.environ.get("SLACK_API_SECRET"),
- )
- alerts_channel = os.environ["SLACK_API_CHANNEL"]
- print_verbose(f"Initialized Slack App: {slack_app}")
- elif callback == "traceloop":
- traceloopLogger = TraceloopLogger()
- elif callback == "athina":
- athinaLogger = AthinaLogger()
- print_verbose("Initialized Athina Logger")
- elif callback == "helicone":
- heliconeLogger = HeliconeLogger()
- elif callback == "lunary":
- lunaryLogger = LunaryLogger()
- elif callback == "promptlayer":
- promptLayerLogger = PromptLayerLogger()
- elif callback == "langfuse":
- langFuseLogger = LangFuseLogger()
- elif callback == "openmeter":
- openMeterLogger = OpenMeterLogger()
- elif callback == "datadog":
- dataDogLogger = DataDogLogger()
- elif callback == "prometheus":
- if prometheusLogger is None:
- prometheusLogger = PrometheusLogger()
- elif callback == "dynamodb":
- dynamoLogger = DyanmoDBLogger()
- elif callback == "s3":
- s3Logger = S3Logger()
- elif callback == "wandb":
- weightsBiasesLogger = WeightsBiasesLogger()
- elif callback == "langsmith":
- langsmithLogger = LangsmithLogger()
- elif callback == "logfire":
- logfireLogger = LogfireLogger()
- elif callback == "aispend":
- aispendLogger = AISpendLogger()
- elif callback == "berrispend":
- berrispendLogger = BerriSpendLogger()
- elif callback == "supabase":
- print_verbose(f"instantiating supabase")
- supabaseClient = Supabase()
- elif callback == "greenscale":
- greenscaleLogger = GreenscaleLogger()
- print_verbose("Initialized Greenscale Logger")
- elif callback == "lite_debugger":
- print_verbose(f"instantiating lite_debugger")
- if function_id:
- liteDebuggerClient = LiteDebugger(email=function_id)
- elif litellm.token:
- liteDebuggerClient = LiteDebugger(email=litellm.token)
- elif litellm.email:
- liteDebuggerClient = LiteDebugger(email=litellm.email)
- else:
- liteDebuggerClient = LiteDebugger(email=str(uuid.uuid4()))
- elif callable(callback):
- customLogger = CustomLogger()
- except Exception as e:
- raise e
-
-
-# NOTE: DEPRECATING this in favor of using failure_handler() in Logging:
-def handle_failure(exception, traceback_exception, start_time, end_time, args, kwargs):
- global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger
- try:
- # print_verbose(f"handle_failure args: {args}")
- # print_verbose(f"handle_failure kwargs: {kwargs}")
-
- success_handler = additional_details.pop("success_handler", None)
- failure_handler = additional_details.pop("failure_handler", None)
-
- additional_details["Event_Name"] = additional_details.pop(
- "failed_event_name", "litellm.failed_query"
- )
- print_verbose(f"self.failure_callback: {litellm.failure_callback}")
- for callback in litellm.failure_callback:
- try:
- if callback == "slack":
- slack_msg = ""
- if len(kwargs) > 0:
- for key in kwargs:
- slack_msg += f"{key}: {kwargs[key]}\n"
- if len(args) > 0:
- for i, arg in enumerate(args):
- slack_msg += f"LiteLLM_Args_{str(i)}: {arg}"
- for detail in additional_details:
- slack_msg += f"{detail}: {additional_details[detail]}\n"
- slack_msg += f"Traceback: {traceback_exception}"
- truncated_slack_msg = textwrap.shorten(
- slack_msg, width=512, placeholder="..."
- )
- slack_app.client.chat_postMessage(
- channel=alerts_channel, text=truncated_slack_msg
- )
- elif callback == "sentry":
- capture_exception(exception)
- elif callback == "posthog":
- print_verbose(
- f"inside posthog, additional_details: {len(additional_details.keys())}"
- )
- ph_obj = {}
- if len(kwargs) > 0:
- ph_obj = kwargs
- if len(args) > 0:
- for i, arg in enumerate(args):
- ph_obj["litellm_args_" + str(i)] = arg
- for detail in additional_details:
- ph_obj[detail] = additional_details[detail]
- event_name = additional_details["Event_Name"]
- print_verbose(f"ph_obj: {ph_obj}")
- print_verbose(f"PostHog Event Name: {event_name}")
- if "user_id" in additional_details:
- posthog.capture(
- additional_details["user_id"], event_name, ph_obj
- )
- else: # PostHog calls require a unique id to identify a user - https://posthog.com/docs/libraries/python
- unique_id = str(uuid.uuid4())
- posthog.capture(unique_id, event_name)
- print_verbose(f"successfully logged to PostHog!")
- elif callback == "berrispend":
- print_verbose("reaches berrispend for logging!")
- model = args[0] if len(args) > 0 else kwargs["model"]
- messages = args[1] if len(args) > 1 else kwargs["messages"]
- result = {
- "model": model,
- "created": time.time(),
- "error": traceback_exception,
- "usage": {
- "prompt_tokens": prompt_token_calculator(
- model, messages=messages
- ),
- "completion_tokens": 0,
- },
- }
- berrispendLogger.log_event(
- model=model,
- messages=messages,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- )
- elif callback == "aispend":
- print_verbose("reaches aispend for logging!")
- model = args[0] if len(args) > 0 else kwargs["model"]
- messages = args[1] if len(args) > 1 else kwargs["messages"]
- result = {
- "model": model,
- "created": time.time(),
- "usage": {
- "prompt_tokens": prompt_token_calculator(
- model, messages=messages
- ),
- "completion_tokens": 0,
- },
- }
- aispendLogger.log_event(
- model=model,
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- print_verbose=print_verbose,
- )
- elif callback == "supabase":
- print_verbose("reaches supabase for logging!")
- print_verbose(f"supabaseClient: {supabaseClient}")
- model = args[0] if len(args) > 0 else kwargs["model"]
- messages = args[1] if len(args) > 1 else kwargs["messages"]
- result = {
- "model": model,
- "created": time.time(),
- "error": traceback_exception,
- "usage": {
- "prompt_tokens": prompt_token_calculator(
- model, messages=messages
- ),
- "completion_tokens": 0,
- },
- }
- supabaseClient.log_event(
- model=model,
- messages=messages,
- end_user=kwargs.get("user", "default"),
- response_obj=result,
- start_time=start_time,
- end_time=end_time,
- litellm_call_id=kwargs["litellm_call_id"],
- print_verbose=print_verbose,
- )
- except:
- print_verbose(
- f"Error Occurred while logging failure: {traceback.format_exc()}"
- )
- pass
-
- if failure_handler and callable(failure_handler):
- call_details = {
- "exception": exception,
- "additional_details": additional_details,
- }
- failure_handler(call_details)
- pass
- except Exception as e:
- # LOGGING
- exception_logging(logger_fn=user_logger_fn, exception=e)
- pass
-
-
async def convert_to_streaming_response_async(response_object: Optional[dict] = None):
"""
Asynchronously converts a response object to a streaming response.
diff --git a/poetry.lock b/poetry.lock
index ac946690c..750def101 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2174,7 +2174,6 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
- {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -3198,4 +3197,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0, !=3.9.7"
-content-hash = "73054c657782120d170dc168ef07b494a916f1f810ff9c2b0ac878bd857a9dac"
+content-hash = "62156f0fa65f39f36576ef6ed91d773658399757111dd4b0660e1ce2a58ea7b2"