forked from phoenix/litellm-mirror
feat(proxy_server): adds create-proxy feature
This commit is contained in:
parent
3da89a58ae
commit
b28c055896
11 changed files with 246 additions and 124 deletions
Binary file not shown.
|
@ -13,6 +13,24 @@ class CustomLogger:
|
|||
def __init__(self, callback_func):
|
||||
# Instance variables
|
||||
self.callback_func = callback_func
|
||||
|
||||
def log_input_event(self, model, messages, kwargs, print_verbose):
|
||||
try:
|
||||
print_verbose(
|
||||
f"Custom Logger - Enters logging function for model {kwargs}"
|
||||
)
|
||||
kwargs["model"] = model
|
||||
kwargs["messages"] = messages
|
||||
kwargs["log_event_type"] = "pre_api_call"
|
||||
self.callback_func(
|
||||
kwargs,
|
||||
)
|
||||
print_verbose(
|
||||
f"Custom Logger - model call details: {kwargs}"
|
||||
)
|
||||
except:
|
||||
traceback.print_exc()
|
||||
print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
|
||||
|
||||
def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
|
||||
# Method definition
|
||||
|
@ -20,6 +38,7 @@ class CustomLogger:
|
|||
print_verbose(
|
||||
f"Custom Logger - Enters logging function for model {kwargs}"
|
||||
)
|
||||
kwargs["log_event_type"] = "post_api_call"
|
||||
self.callback_func(
|
||||
kwargs, # kwargs to func
|
||||
response_obj,
|
||||
|
|
|
@ -209,7 +209,7 @@ def init_bedrock_client(
|
|||
def convert_messages_to_prompt(messages, provider):
|
||||
# handle anthropic prompts using anthropic constants
|
||||
if provider == "anthropic":
|
||||
prompt = ""
|
||||
prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}"
|
||||
for message in messages:
|
||||
if "role" in message:
|
||||
if message["role"] == "user":
|
||||
|
|
|
@ -7,7 +7,6 @@ def default_pt(messages):
|
|||
return " ".join(message["content"] for message in messages)
|
||||
|
||||
# Llama2 prompt template
|
||||
llama_2_special_tokens = ["<s>", "</s>"]
|
||||
def llama_2_chat_pt(messages):
|
||||
prompt = custom_prompt(
|
||||
role_dict={
|
||||
|
|
85
litellm/proxy/api_log.json
Normal file
85
litellm/proxy/api_log.json
Normal file
|
@ -0,0 +1,85 @@
|
|||
{
|
||||
"20231012182157625128": {
|
||||
"pre_api_call": {
|
||||
"model": "anthropic.claude-v2",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what do you know?"
|
||||
}
|
||||
],
|
||||
"optional_params": {
|
||||
"temperature": 0.1,
|
||||
"stream": true
|
||||
},
|
||||
"litellm_params": {
|
||||
"return_async": false,
|
||||
"api_key": null,
|
||||
"force_timeout": 600,
|
||||
"logger_fn": null,
|
||||
"verbose": false,
|
||||
"custom_llm_provider": "bedrock",
|
||||
"api_base": null,
|
||||
"litellm_call_id": "902640b5-4a26-4629-932d-35d6cf4e1635",
|
||||
"model_alias_map": {},
|
||||
"completion_call_id": null,
|
||||
"metadata": null,
|
||||
"stream_response": {}
|
||||
},
|
||||
"input": "\n\nHuman: \n\nHuman: what do you know?\n\nAssistant: ",
|
||||
"api_key": "",
|
||||
"additional_args": {
|
||||
"complete_input_dict": "{\"prompt\": \"\\n\\nHuman: \\n\\nHuman: what do you know?\\n\\nAssistant: \", \"temperature\": 0.1, \"max_tokens_to_sample\": 256}"
|
||||
},
|
||||
"log_event_type": "pre_api_call"
|
||||
},
|
||||
"post_api_call": {
|
||||
"model": "anthropic.claude-v2",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "what do you know?"
|
||||
}
|
||||
],
|
||||
"optional_params": {
|
||||
"temperature": 0.1,
|
||||
"stream": true
|
||||
},
|
||||
"litellm_params": {
|
||||
"return_async": false,
|
||||
"api_key": null,
|
||||
"force_timeout": 600,
|
||||
"logger_fn": null,
|
||||
"verbose": false,
|
||||
"custom_llm_provider": "bedrock",
|
||||
"api_base": null,
|
||||
"litellm_call_id": "902640b5-4a26-4629-932d-35d6cf4e1635",
|
||||
"model_alias_map": {},
|
||||
"completion_call_id": null,
|
||||
"metadata": null,
|
||||
"stream_response": {}
|
||||
},
|
||||
"input": null,
|
||||
"api_key": null,
|
||||
"additional_args": {},
|
||||
"log_event_type": "post_api_call",
|
||||
"original_response": "<class 'generator'>",
|
||||
"complete_streaming_response": {
|
||||
"id": "chatcmpl-1757e5ea-71f2-44a2-9d8d-1ba8238a7c99",
|
||||
"object": "chat.completion.chunk",
|
||||
"created": 1697160117,
|
||||
"model": "anthropic.claude-v2",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": " I'm Claude, an AI assistant created by Anthropic. I don't actually have general knowledge about the world. I'm an AI conversational model trained by Anthropic to be helpful, harmless, and honest."
|
||||
},
|
||||
"finish_reason": "stop_sequence"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
2023-10-11 15:02:23 - Model gpt-4 Cost: $0.00063000
|
||||
2023-10-11 15:02:57 - Model gpt-4 Cost: $0.00093000
|
||||
2023-10-11 15:09:10 - Model gpt-4 Cost: $0.00135000
|
||||
2023-10-11 15:09:50 - Model gpt-4 Cost: $0.01626000
|
||||
2023-10-11 15:12:57 - Model gpt-4 Cost: $0.01974000
|
||||
2023-10-11 15:13:35 - Model gpt-4 Cost: $0.02415000
|
||||
2023-10-11 15:14:04 - Model gpt-4 Cost: $0.03291000
|
||||
2023-10-11 15:18:16 - Model gpt-4 Cost: $0.03669000
|
||||
2023-10-11 15:19:12 - Model gpt-4 Cost: $0.04806000
|
||||
2023-10-11 21:11:06 - Model claude-2 Cost: $0.00041534
|
||||
2023-10-11 21:15:34 - Model claude-2 Cost: $0.00054606
|
|
@ -1,6 +1,6 @@
|
|||
import click
|
||||
import subprocess, traceback
|
||||
import os, appdirs
|
||||
import os, sys
|
||||
import random
|
||||
from dotenv import load_dotenv
|
||||
|
||||
|
@ -8,42 +8,33 @@ load_dotenv()
|
|||
from importlib import resources
|
||||
import shutil
|
||||
|
||||
config_filename = "litellm.secrets.toml"
|
||||
pkg_config_filename = "template.secrets.toml"
|
||||
# Using appdirs to determine user-specific config path
|
||||
config_dir = appdirs.user_config_dir("litellm")
|
||||
user_config_path = os.path.join(config_dir, config_filename)
|
||||
|
||||
def run_ollama_serve():
|
||||
command = ['ollama', 'serve']
|
||||
|
||||
with open(os.devnull, 'w') as devnull:
|
||||
process = subprocess.Popen(command, stdout=devnull, stderr=devnull)
|
||||
|
||||
def open_config():
|
||||
# Create the .env file if it doesn't exist
|
||||
if not os.path.exists(user_config_path):
|
||||
# If user's env doesn't exist, copy the default env from the package
|
||||
here = os.path.abspath(os.path.dirname(__file__))
|
||||
parent_dir = os.path.dirname(here)
|
||||
default_env_path = os.path.join(parent_dir, pkg_config_filename)
|
||||
# Ensure the user-specific directory exists
|
||||
os.makedirs(config_dir, exist_ok=True)
|
||||
# Copying the file using shutil.copy
|
||||
try:
|
||||
shutil.copy(default_env_path, user_config_path)
|
||||
except Exception as e:
|
||||
print(f"Failed to copy .template.secrets.toml: {e}")
|
||||
def clone_subfolder(repo_url, subfolder, destination):
|
||||
|
||||
# Open the .env file in the default editor
|
||||
try:
|
||||
if os.name == 'nt': # For Windows
|
||||
os.startfile(user_config_path)
|
||||
elif os.name == 'posix': # For MacOS, Linux, and anything using Bash
|
||||
subprocess.call(('open', '-t', user_config_path))
|
||||
except:
|
||||
pass
|
||||
print(f"LiteLLM: Proxy Server Config - {user_config_path}")
|
||||
# Clone the full repo
|
||||
repo_name = repo_url.split('/')[-1]
|
||||
repo_master = os.path.join(destination, "repo_master")
|
||||
subprocess.run(['git', 'clone', repo_url, repo_master])
|
||||
|
||||
# Move into the subfolder
|
||||
subfolder_path = os.path.join(repo_master, subfolder)
|
||||
|
||||
# Copy subfolder to destination
|
||||
for file_name in os.listdir(subfolder_path):
|
||||
source = os.path.join(subfolder_path, file_name)
|
||||
if os.path.isfile(source):
|
||||
shutil.copy(source, destination)
|
||||
else:
|
||||
dest_path = os.path.join(destination, file_name)
|
||||
shutil.copytree(source, dest_path)
|
||||
|
||||
# Remove cloned repo folder
|
||||
subprocess.run(['rm', '-rf', os.path.join(destination, "repo_master")])
|
||||
|
||||
def is_port_in_use(port):
|
||||
import socket
|
||||
|
@ -60,23 +51,31 @@ def is_port_in_use(port):
|
|||
@click.option('--temperature', default=None, type=float, help='Set temperature for the model')
|
||||
@click.option('--max_tokens', default=None, type=int, help='Set max tokens for the model')
|
||||
@click.option('--drop_params', is_flag=True, help='Drop any unmapped params')
|
||||
@click.option('--create_proxy', is_flag=True, help='Creates a local OpenAI-compatible server template')
|
||||
@click.option('--add_function_to_prompt', is_flag=True, help='If function passed but unsupported, pass it as prompt')
|
||||
@click.option('--max_budget', default=None, type=float, help='Set max budget for API calls - works for hosted models like OpenAI, TogetherAI, Anthropic, etc.`')
|
||||
@click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`')
|
||||
@click.option('--config', is_flag=True, help='Create and open .env file from .env.template')
|
||||
@click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to')
|
||||
@click.option('--local', is_flag=True, default=False, help='for local debugging')
|
||||
@click.option('--cost', is_flag=True, default=False, help='for viewing cost logs')
|
||||
def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, add_function_to_prompt, max_budget, telemetry, config, test, local, cost):
|
||||
if config:
|
||||
open_config()
|
||||
return
|
||||
def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, max_budget, telemetry, test, local, cost):
|
||||
if local:
|
||||
from proxy_server import app, initialize, deploy_proxy, print_cost_logs
|
||||
debug = True
|
||||
else:
|
||||
from .proxy_server import app, initialize, deploy_proxy, print_cost_logs
|
||||
try:
|
||||
from .proxy_server import app, initialize, deploy_proxy, print_cost_logs
|
||||
except ImportError as e:
|
||||
from proxy_server import app, initialize, deploy_proxy, print_cost_logs
|
||||
|
||||
if create_proxy == True:
|
||||
repo_url = 'https://github.com/BerriAI/litellm'
|
||||
subfolder = 'litellm/proxy'
|
||||
destination = os.path.join(os.getcwd(), 'litellm-proxy')
|
||||
|
||||
clone_subfolder(repo_url, subfolder, destination)
|
||||
|
||||
return
|
||||
if deploy == True:
|
||||
print(f"\033[32mLiteLLM: Deploying your proxy to api.litellm.ai\033[0m\n")
|
||||
print(f"\033[32mLiteLLM: Deploying proxy for model: {model}\033[0m\n")
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
import sys, os, platform
|
||||
import sys, os, platform, time, copy
|
||||
import threading
|
||||
import shutil, random, traceback
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
# sys.path.insert(
|
||||
# 0, os.path.abspath("../..")
|
||||
# ) # Adds the parent directory to the system path - for litellm local dev
|
||||
|
||||
|
||||
try:
|
||||
import uvicorn
|
||||
|
@ -76,12 +77,10 @@ user_max_tokens = None
|
|||
user_temperature = None
|
||||
user_telemetry = False
|
||||
user_config = None
|
||||
config_filename = "litellm.secrets.toml"
|
||||
pkg_config_filename = "template.secrets.toml"
|
||||
# Using appdirs to determine user-specific config path
|
||||
config_dir = appdirs.user_config_dir("litellm")
|
||||
config_filename = "secrets.toml"
|
||||
config_dir = os.getcwd()
|
||||
user_config_path = os.path.join(config_dir, config_filename)
|
||||
|
||||
log_file = 'api_log.json'
|
||||
#### HELPER FUNCTIONS ####
|
||||
def print_verbose(print_statement):
|
||||
global user_debug
|
||||
|
@ -98,15 +97,6 @@ def usage_telemetry(): # helps us know if people are using this feature. Set `li
|
|||
def load_config():
|
||||
try:
|
||||
global user_config, user_api_base, user_max_tokens, user_temperature, user_model
|
||||
if not os.path.exists(user_config_path):
|
||||
# If user's config doesn't exist, copy the default config from the package
|
||||
here = os.path.abspath(os.path.dirname(__file__))
|
||||
parent_dir = os.path.dirname(here)
|
||||
default_config_path = os.path.join(parent_dir, pkg_config_filename)
|
||||
# Ensure the user-specific directory exists
|
||||
os.makedirs(config_dir, exist_ok=True)
|
||||
# Copying the file using shutil.copy
|
||||
shutil.copy(default_config_path, user_config_path)
|
||||
# As the .env file is typically much simpler in structure, we use load_dotenv here directly
|
||||
with open(user_config_path, "rb") as f:
|
||||
user_config = tomllib.load(f)
|
||||
|
@ -133,11 +123,8 @@ def load_config():
|
|||
|
||||
## load model config - to set this run `litellm --config`
|
||||
model_config = None
|
||||
if user_model == "local":
|
||||
model_config = user_config["local_model"]
|
||||
elif user_model == "hosted":
|
||||
model_config = user_config["hosted_model"]
|
||||
litellm.max_budget = model_config.get("max_budget", None) # check if user set a budget for hosted model - e.g. gpt-4
|
||||
if user_model in user_config["model"]:
|
||||
model_config = user_config["model"][user_model]
|
||||
|
||||
print_verbose(f"user_config: {user_config}")
|
||||
print_verbose(f"model_config: {model_config}")
|
||||
|
@ -317,7 +304,55 @@ def track_cost_callback(
|
|||
except:
|
||||
pass
|
||||
|
||||
litellm.success_callback = [track_cost_callback]
|
||||
def logger(
|
||||
kwargs, # kwargs to completion
|
||||
completion_response=None, # response from completion
|
||||
start_time=None,
|
||||
end_time=None # start/end time
|
||||
):
|
||||
log_event_type = kwargs['log_event_type']
|
||||
print(f"REACHES LOGGER: {log_event_type}")
|
||||
try:
|
||||
if log_event_type == 'pre_api_call':
|
||||
inference_params = copy.deepcopy(kwargs)
|
||||
timestamp = inference_params.pop('start_time')
|
||||
dt_key = timestamp.strftime("%Y%m%d%H%M%S%f")[:23]
|
||||
log_data = {
|
||||
dt_key: {
|
||||
'pre_api_call': inference_params
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
with open(log_file, 'r') as f:
|
||||
existing_data = json.load(f)
|
||||
except FileNotFoundError:
|
||||
existing_data = {}
|
||||
|
||||
existing_data.update(log_data)
|
||||
|
||||
with open(log_file, 'w') as f:
|
||||
json.dump(existing_data, f, indent=2)
|
||||
elif log_event_type == 'post_api_call':
|
||||
print(f"post api call kwargs: {kwargs}")
|
||||
if "stream" not in kwargs["optional_params"] or kwargs["optional_params"]["stream"] is False or kwargs.get("complete_streaming_response", False):
|
||||
inference_params = copy.deepcopy(kwargs)
|
||||
timestamp = inference_params.pop('start_time')
|
||||
dt_key = timestamp.strftime("%Y%m%d%H%M%S%f")[:23]
|
||||
|
||||
with open(log_file, 'r') as f:
|
||||
existing_data = json.load(f)
|
||||
|
||||
existing_data[dt_key]['post_api_call'] = inference_params
|
||||
|
||||
with open(log_file, 'w') as f:
|
||||
json.dump(existing_data, f, indent=2)
|
||||
except:
|
||||
traceback.print_exc()
|
||||
|
||||
litellm.input_callback = [logger]
|
||||
litellm.success_callback = [logger]
|
||||
litellm.failure_callback = [logger]
|
||||
|
||||
def litellm_completion(data, type):
|
||||
try:
|
||||
|
|
29
litellm/proxy/secrets.toml
Normal file
29
litellm/proxy/secrets.toml
Normal file
|
@ -0,0 +1,29 @@
|
|||
[keys]
|
||||
# HUGGINGFACE_API_KEY="" # Uncomment to save your Hugging Face API key
|
||||
# OPENAI_API_KEY="" # Uncomment to save your OpenAI API Key
|
||||
# TOGETHERAI_API_KEY="" # Uncomment to save your TogetherAI API key
|
||||
# NLP_CLOUD_API_KEY="" # Uncomment to save your NLP Cloud API key
|
||||
# ANTHROPIC_API_KEY="" # Uncomment to save your Anthropic API key
|
||||
# REPLICATE_API_KEY="" # Uncomment to save your Replicate API key
|
||||
|
||||
[general]
|
||||
# add_function_to_prompt = True # e.g: Ollama doesn't support functions, so add it to the prompt instead
|
||||
# drop_params = True # drop any params not supported by the provider (e.g. Ollama)
|
||||
|
||||
[model."ollama/llama2"] # run via `litellm --model ollama/llama2`
|
||||
# max_tokens = "" # set max tokens for the model
|
||||
# temperature = "" # set temperature for the model
|
||||
# api_base = "" # set a custom api base for the model
|
||||
|
||||
[model."ollama/llama2".prompt_template] # [OPTIONAL] LiteLLM can automatically formats the prompt - docs: https://docs.litellm.ai/docs/completion/prompt_formatting
|
||||
# MODEL_SYSTEM_MESSAGE_START_TOKEN = "[INST] <<SYS>>\n" # This does not need to be a token, can be any string
|
||||
# MODEL_SYSTEM_MESSAGE_END_TOKEN = "\n<</SYS>>\n [/INST]\n" # This does not need to be a token, can be any string
|
||||
|
||||
# MODEL_USER_MESSAGE_START_TOKEN = "[INST] " # This does not need to be a token, can be any string
|
||||
# MODEL_USER_MESSAGE_END_TOKEN = " [/INST]\n" # Applies only to user messages. Can be any string.
|
||||
|
||||
# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "" # Applies only to assistant messages. Can be any string.
|
||||
# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "\n" # Applies only to system messages. Can be any string.
|
||||
|
||||
# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt
|
||||
# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt
|
|
@ -1,50 +0,0 @@
|
|||
[keys]
|
||||
# HUGGINGFACE_API_KEY="" # Uncomment to save your Hugging Face API key
|
||||
# OPENAI_API_KEY="" # Uncomment to save your OpenAI API Key
|
||||
# TOGETHERAI_API_KEY="" # Uncomment to save your TogetherAI API key
|
||||
# NLP_CLOUD_API_KEY="" # Uncomment to save your NLP Cloud API key
|
||||
# ANTHROPIC_API_KEY="" # Uncomment to save your Anthropic API key
|
||||
# REPLICATE_API_KEY="" # Uncomment to save your Replicate API key
|
||||
|
||||
[general]
|
||||
# add_function_to_prompt = True # e.g: Ollama doesn't support functions, so add it to the prompt instead
|
||||
# drop_params = True # drop any params not supported by the provider (e.g. Ollama)
|
||||
|
||||
[local_model] # run via `litellm --model local`
|
||||
# model_name = "ollama/codellama" # Uncomment to set a local model
|
||||
# max_tokens = "" # set max tokens for the model
|
||||
# temperature = "" # set temperature for the model
|
||||
# api_base = "" # set a custom api base for the model
|
||||
|
||||
[local_model.prompt_template] # Set a custom prompt template for your local model - docs: https://docs.litellm.ai/docs/completion/prompt_formatting#format-prompt-yourself
|
||||
# MODEL_SYSTEM_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string
|
||||
# MODEL_SYSTEM_MESSAGE_END_TOKEN = "<|endoftext|>" # This does not need to be a token, can be any string
|
||||
|
||||
# MODEL_USER_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string
|
||||
# MODEL_USER_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to user messages. Can be any string.
|
||||
|
||||
# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "<|prompter|>" # Applies only to assistant messages. Can be any string.
|
||||
# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to system messages. Can be any string.
|
||||
|
||||
# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt
|
||||
# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt
|
||||
|
||||
[hosted_model] # run via `litellm --model hosted`
|
||||
# model_name = "gpt-4"
|
||||
# max_tokens = "" # set max tokens for the model
|
||||
# temperature = "" # set temperature for the model
|
||||
# api_base = "" # set a custom api base for the model
|
||||
# max_budget = 100 # sets a max budget of $100 for your hosted model
|
||||
|
||||
[hosted_model.prompt_template] # Set a custom prompt template for your hosted model - docs: https://docs.litellm.ai/docs/completion/prompt_formatting#format-prompt-yourself
|
||||
# MODEL_SYSTEM_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string
|
||||
# MODEL_SYSTEM_MESSAGE_END_TOKEN = "<|endoftext|>" # This does not need to be a token, can be any string
|
||||
|
||||
# MODEL_USER_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string
|
||||
# MODEL_USER_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to user messages. Can be any string.
|
||||
|
||||
# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "<|prompter|>" # Applies only to assistant messages. Can be any string.
|
||||
# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to system messages. Can be any string.
|
||||
|
||||
# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt
|
||||
# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt
|
|
@ -53,7 +53,6 @@ from .exceptions import (
|
|||
)
|
||||
from typing import cast, List, Dict, Union, Optional
|
||||
from .caching import Cache
|
||||
from .llms.prompt_templates.factory import llama_2_special_tokens
|
||||
|
||||
####### ENVIRONMENT VARIABLES ####################
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
|
@ -249,6 +248,7 @@ class Logging:
|
|||
"messages": self.messages,
|
||||
"optional_params": self.optional_params,
|
||||
"litellm_params": self.litellm_params,
|
||||
"start_time": self.start_time
|
||||
}
|
||||
|
||||
def pre_call(self, input, api_key, model=None, additional_args={}):
|
||||
|
@ -323,7 +323,15 @@ class Logging:
|
|||
message=f"Model Call Details pre-call: {self.model_call_details}",
|
||||
level="info",
|
||||
)
|
||||
elif callable(callback): # custom logger functions
|
||||
customLogger.log_input_event(
|
||||
model=self.model,
|
||||
messages=self.messages,
|
||||
kwargs=self.model_call_details,
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
print_verbose(
|
||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}"
|
||||
)
|
||||
|
@ -416,6 +424,7 @@ class Logging:
|
|||
|
||||
## BUILD COMPLETE STREAMED RESPONSE
|
||||
if self.stream:
|
||||
print(f"stream result: {result}")
|
||||
if result.choices[0].finish_reason: # if it's the last chunk
|
||||
self.streaming_chunks.append(result)
|
||||
complete_streaming_response = litellm.stream_chunk_builder(self.streaming_chunks)
|
||||
|
@ -573,6 +582,14 @@ class Logging:
|
|||
capture_exception(exception)
|
||||
else:
|
||||
print_verbose(f"capture exception not initialized: {capture_exception}")
|
||||
elif callable(callback): # custom logger functions
|
||||
customLogger.log_event(
|
||||
kwargs=self.model_call_details,
|
||||
response_obj=result,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
except Exception as e:
|
||||
print_verbose(
|
||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {traceback.format_exc()}"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue