diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 99e08f1adf..e2888030c2 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py index 49815e5dc8..cd41f5d712 100644 --- a/litellm/integrations/custom_logger.py +++ b/litellm/integrations/custom_logger.py @@ -13,6 +13,24 @@ class CustomLogger: def __init__(self, callback_func): # Instance variables self.callback_func = callback_func + + def log_input_event(self, model, messages, kwargs, print_verbose): + try: + print_verbose( + f"Custom Logger - Enters logging function for model {kwargs}" + ) + kwargs["model"] = model + kwargs["messages"] = messages + kwargs["log_event_type"] = "pre_api_call" + self.callback_func( + kwargs, + ) + print_verbose( + f"Custom Logger - model call details: {kwargs}" + ) + except: + traceback.print_exc() + print_verbose(f"Custom Logger Error - {traceback.format_exc()}") def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose): # Method definition @@ -20,6 +38,7 @@ class CustomLogger: print_verbose( f"Custom Logger - Enters logging function for model {kwargs}" ) + kwargs["log_event_type"] = "post_api_call" self.callback_func( kwargs, # kwargs to func response_obj, diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py index aa8bb9b945..1fb5e84464 100644 --- a/litellm/llms/bedrock.py +++ b/litellm/llms/bedrock.py @@ -209,7 +209,7 @@ def init_bedrock_client( def convert_messages_to_prompt(messages, provider): # handle anthropic prompts using anthropic constants if provider == "anthropic": - prompt = "" + prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}" for message in messages: if "role" in message: if message["role"] == "user": diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index e26a846fb9..ab7a3560ca 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -7,7 +7,6 @@ def default_pt(messages): return " ".join(message["content"] for message in messages) # Llama2 prompt template -llama_2_special_tokens = ["", ""] def llama_2_chat_pt(messages): prompt = custom_prompt( role_dict={ diff --git a/litellm/proxy/api_log.json b/litellm/proxy/api_log.json new file mode 100644 index 0000000000..810c5b826b --- /dev/null +++ b/litellm/proxy/api_log.json @@ -0,0 +1,85 @@ +{ + "20231012182157625128": { + "pre_api_call": { + "model": "anthropic.claude-v2", + "messages": [ + { + "role": "user", + "content": "what do you know?" + } + ], + "optional_params": { + "temperature": 0.1, + "stream": true + }, + "litellm_params": { + "return_async": false, + "api_key": null, + "force_timeout": 600, + "logger_fn": null, + "verbose": false, + "custom_llm_provider": "bedrock", + "api_base": null, + "litellm_call_id": "902640b5-4a26-4629-932d-35d6cf4e1635", + "model_alias_map": {}, + "completion_call_id": null, + "metadata": null, + "stream_response": {} + }, + "input": "\n\nHuman: \n\nHuman: what do you know?\n\nAssistant: ", + "api_key": "", + "additional_args": { + "complete_input_dict": "{\"prompt\": \"\\n\\nHuman: \\n\\nHuman: what do you know?\\n\\nAssistant: \", \"temperature\": 0.1, \"max_tokens_to_sample\": 256}" + }, + "log_event_type": "pre_api_call" + }, + "post_api_call": { + "model": "anthropic.claude-v2", + "messages": [ + { + "role": "user", + "content": "what do you know?" + } + ], + "optional_params": { + "temperature": 0.1, + "stream": true + }, + "litellm_params": { + "return_async": false, + "api_key": null, + "force_timeout": 600, + "logger_fn": null, + "verbose": false, + "custom_llm_provider": "bedrock", + "api_base": null, + "litellm_call_id": "902640b5-4a26-4629-932d-35d6cf4e1635", + "model_alias_map": {}, + "completion_call_id": null, + "metadata": null, + "stream_response": {} + }, + "input": null, + "api_key": null, + "additional_args": {}, + "log_event_type": "post_api_call", + "original_response": "", + "complete_streaming_response": { + "id": "chatcmpl-1757e5ea-71f2-44a2-9d8d-1ba8238a7c99", + "object": "chat.completion.chunk", + "created": 1697160117, + "model": "anthropic.claude-v2", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": " I'm Claude, an AI assistant created by Anthropic. I don't actually have general knowledge about the world. I'm an AI conversational model trained by Anthropic to be helpful, harmless, and honest." + }, + "finish_reason": "stop_sequence" + } + ] + } + } + } +} \ No newline at end of file diff --git a/litellm/proxy/cost.log b/litellm/proxy/cost.log index 365bc3e70e..e69de29bb2 100644 --- a/litellm/proxy/cost.log +++ b/litellm/proxy/cost.log @@ -1,11 +0,0 @@ -2023-10-11 15:02:23 - Model gpt-4 Cost: $0.00063000 -2023-10-11 15:02:57 - Model gpt-4 Cost: $0.00093000 -2023-10-11 15:09:10 - Model gpt-4 Cost: $0.00135000 -2023-10-11 15:09:50 - Model gpt-4 Cost: $0.01626000 -2023-10-11 15:12:57 - Model gpt-4 Cost: $0.01974000 -2023-10-11 15:13:35 - Model gpt-4 Cost: $0.02415000 -2023-10-11 15:14:04 - Model gpt-4 Cost: $0.03291000 -2023-10-11 15:18:16 - Model gpt-4 Cost: $0.03669000 -2023-10-11 15:19:12 - Model gpt-4 Cost: $0.04806000 -2023-10-11 21:11:06 - Model claude-2 Cost: $0.00041534 -2023-10-11 21:15:34 - Model claude-2 Cost: $0.00054606 diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 644641f978..19b105a1ad 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -1,6 +1,6 @@ import click import subprocess, traceback -import os, appdirs +import os, sys import random from dotenv import load_dotenv @@ -8,42 +8,33 @@ load_dotenv() from importlib import resources import shutil -config_filename = "litellm.secrets.toml" -pkg_config_filename = "template.secrets.toml" -# Using appdirs to determine user-specific config path -config_dir = appdirs.user_config_dir("litellm") -user_config_path = os.path.join(config_dir, config_filename) - def run_ollama_serve(): command = ['ollama', 'serve'] with open(os.devnull, 'w') as devnull: process = subprocess.Popen(command, stdout=devnull, stderr=devnull) -def open_config(): - # Create the .env file if it doesn't exist - if not os.path.exists(user_config_path): - # If user's env doesn't exist, copy the default env from the package - here = os.path.abspath(os.path.dirname(__file__)) - parent_dir = os.path.dirname(here) - default_env_path = os.path.join(parent_dir, pkg_config_filename) - # Ensure the user-specific directory exists - os.makedirs(config_dir, exist_ok=True) - # Copying the file using shutil.copy - try: - shutil.copy(default_env_path, user_config_path) - except Exception as e: - print(f"Failed to copy .template.secrets.toml: {e}") +def clone_subfolder(repo_url, subfolder, destination): - # Open the .env file in the default editor - try: - if os.name == 'nt': # For Windows - os.startfile(user_config_path) - elif os.name == 'posix': # For MacOS, Linux, and anything using Bash - subprocess.call(('open', '-t', user_config_path)) - except: - pass - print(f"LiteLLM: Proxy Server Config - {user_config_path}") + # Clone the full repo + repo_name = repo_url.split('/')[-1] + repo_master = os.path.join(destination, "repo_master") + subprocess.run(['git', 'clone', repo_url, repo_master]) + + # Move into the subfolder + subfolder_path = os.path.join(repo_master, subfolder) + + # Copy subfolder to destination + for file_name in os.listdir(subfolder_path): + source = os.path.join(subfolder_path, file_name) + if os.path.isfile(source): + shutil.copy(source, destination) + else: + dest_path = os.path.join(destination, file_name) + shutil.copytree(source, dest_path) + + # Remove cloned repo folder + subprocess.run(['rm', '-rf', os.path.join(destination, "repo_master")]) def is_port_in_use(port): import socket @@ -60,23 +51,31 @@ def is_port_in_use(port): @click.option('--temperature', default=None, type=float, help='Set temperature for the model') @click.option('--max_tokens', default=None, type=int, help='Set max tokens for the model') @click.option('--drop_params', is_flag=True, help='Drop any unmapped params') +@click.option('--create_proxy', is_flag=True, help='Creates a local OpenAI-compatible server template') @click.option('--add_function_to_prompt', is_flag=True, help='If function passed but unsupported, pass it as prompt') @click.option('--max_budget', default=None, type=float, help='Set max budget for API calls - works for hosted models like OpenAI, TogetherAI, Anthropic, etc.`') @click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`') -@click.option('--config', is_flag=True, help='Create and open .env file from .env.template') @click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to') @click.option('--local', is_flag=True, default=False, help='for local debugging') @click.option('--cost', is_flag=True, default=False, help='for viewing cost logs') -def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, add_function_to_prompt, max_budget, telemetry, config, test, local, cost): - if config: - open_config() - return +def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, max_budget, telemetry, test, local, cost): if local: from proxy_server import app, initialize, deploy_proxy, print_cost_logs debug = True else: - from .proxy_server import app, initialize, deploy_proxy, print_cost_logs + try: + from .proxy_server import app, initialize, deploy_proxy, print_cost_logs + except ImportError as e: + from proxy_server import app, initialize, deploy_proxy, print_cost_logs + if create_proxy == True: + repo_url = 'https://github.com/BerriAI/litellm' + subfolder = 'litellm/proxy' + destination = os.path.join(os.getcwd(), 'litellm-proxy') + + clone_subfolder(repo_url, subfolder, destination) + + return if deploy == True: print(f"\033[32mLiteLLM: Deploying your proxy to api.litellm.ai\033[0m\n") print(f"\033[32mLiteLLM: Deploying proxy for model: {model}\033[0m\n") diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 210493a742..186f462520 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1,9 +1,10 @@ -import sys, os, platform +import sys, os, platform, time, copy import threading import shutil, random, traceback -sys.path.insert( - 0, os.path.abspath("../..") -) # Adds the parent directory to the system path +# sys.path.insert( +# 0, os.path.abspath("../..") +# ) # Adds the parent directory to the system path - for litellm local dev + try: import uvicorn @@ -76,12 +77,10 @@ user_max_tokens = None user_temperature = None user_telemetry = False user_config = None -config_filename = "litellm.secrets.toml" -pkg_config_filename = "template.secrets.toml" -# Using appdirs to determine user-specific config path -config_dir = appdirs.user_config_dir("litellm") +config_filename = "secrets.toml" +config_dir = os.getcwd() user_config_path = os.path.join(config_dir, config_filename) - +log_file = 'api_log.json' #### HELPER FUNCTIONS #### def print_verbose(print_statement): global user_debug @@ -98,15 +97,6 @@ def usage_telemetry(): # helps us know if people are using this feature. Set `li def load_config(): try: global user_config, user_api_base, user_max_tokens, user_temperature, user_model - if not os.path.exists(user_config_path): - # If user's config doesn't exist, copy the default config from the package - here = os.path.abspath(os.path.dirname(__file__)) - parent_dir = os.path.dirname(here) - default_config_path = os.path.join(parent_dir, pkg_config_filename) - # Ensure the user-specific directory exists - os.makedirs(config_dir, exist_ok=True) - # Copying the file using shutil.copy - shutil.copy(default_config_path, user_config_path) # As the .env file is typically much simpler in structure, we use load_dotenv here directly with open(user_config_path, "rb") as f: user_config = tomllib.load(f) @@ -133,11 +123,8 @@ def load_config(): ## load model config - to set this run `litellm --config` model_config = None - if user_model == "local": - model_config = user_config["local_model"] - elif user_model == "hosted": - model_config = user_config["hosted_model"] - litellm.max_budget = model_config.get("max_budget", None) # check if user set a budget for hosted model - e.g. gpt-4 + if user_model in user_config["model"]: + model_config = user_config["model"][user_model] print_verbose(f"user_config: {user_config}") print_verbose(f"model_config: {model_config}") @@ -317,7 +304,55 @@ def track_cost_callback( except: pass -litellm.success_callback = [track_cost_callback] +def logger( + kwargs, # kwargs to completion + completion_response=None, # response from completion + start_time=None, + end_time=None # start/end time +): + log_event_type = kwargs['log_event_type'] + print(f"REACHES LOGGER: {log_event_type}") + try: + if log_event_type == 'pre_api_call': + inference_params = copy.deepcopy(kwargs) + timestamp = inference_params.pop('start_time') + dt_key = timestamp.strftime("%Y%m%d%H%M%S%f")[:23] + log_data = { + dt_key: { + 'pre_api_call': inference_params + } + } + + try: + with open(log_file, 'r') as f: + existing_data = json.load(f) + except FileNotFoundError: + existing_data = {} + + existing_data.update(log_data) + + with open(log_file, 'w') as f: + json.dump(existing_data, f, indent=2) + elif log_event_type == 'post_api_call': + print(f"post api call kwargs: {kwargs}") + if "stream" not in kwargs["optional_params"] or kwargs["optional_params"]["stream"] is False or kwargs.get("complete_streaming_response", False): + inference_params = copy.deepcopy(kwargs) + timestamp = inference_params.pop('start_time') + dt_key = timestamp.strftime("%Y%m%d%H%M%S%f")[:23] + + with open(log_file, 'r') as f: + existing_data = json.load(f) + + existing_data[dt_key]['post_api_call'] = inference_params + + with open(log_file, 'w') as f: + json.dump(existing_data, f, indent=2) + except: + traceback.print_exc() + +litellm.input_callback = [logger] +litellm.success_callback = [logger] +litellm.failure_callback = [logger] def litellm_completion(data, type): try: diff --git a/litellm/proxy/secrets.toml b/litellm/proxy/secrets.toml new file mode 100644 index 0000000000..fa04cb5f18 --- /dev/null +++ b/litellm/proxy/secrets.toml @@ -0,0 +1,29 @@ +[keys] +# HUGGINGFACE_API_KEY="" # Uncomment to save your Hugging Face API key +# OPENAI_API_KEY="" # Uncomment to save your OpenAI API Key +# TOGETHERAI_API_KEY="" # Uncomment to save your TogetherAI API key +# NLP_CLOUD_API_KEY="" # Uncomment to save your NLP Cloud API key +# ANTHROPIC_API_KEY="" # Uncomment to save your Anthropic API key +# REPLICATE_API_KEY="" # Uncomment to save your Replicate API key + +[general] +# add_function_to_prompt = True # e.g: Ollama doesn't support functions, so add it to the prompt instead +# drop_params = True # drop any params not supported by the provider (e.g. Ollama) + +[model."ollama/llama2"] # run via `litellm --model ollama/llama2` +# max_tokens = "" # set max tokens for the model +# temperature = "" # set temperature for the model +# api_base = "" # set a custom api base for the model + +[model."ollama/llama2".prompt_template] # [OPTIONAL] LiteLLM can automatically formats the prompt - docs: https://docs.litellm.ai/docs/completion/prompt_formatting +# MODEL_SYSTEM_MESSAGE_START_TOKEN = "[INST] <>\n" # This does not need to be a token, can be any string +# MODEL_SYSTEM_MESSAGE_END_TOKEN = "\n<>\n [/INST]\n" # This does not need to be a token, can be any string + +# MODEL_USER_MESSAGE_START_TOKEN = "[INST] " # This does not need to be a token, can be any string +# MODEL_USER_MESSAGE_END_TOKEN = " [/INST]\n" # Applies only to user messages. Can be any string. + +# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "" # Applies only to assistant messages. Can be any string. +# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "\n" # Applies only to system messages. Can be any string. + +# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt +# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt \ No newline at end of file diff --git a/litellm/template.secrets.toml b/litellm/template.secrets.toml deleted file mode 100644 index 11de08fbf0..0000000000 --- a/litellm/template.secrets.toml +++ /dev/null @@ -1,50 +0,0 @@ -[keys] -# HUGGINGFACE_API_KEY="" # Uncomment to save your Hugging Face API key -# OPENAI_API_KEY="" # Uncomment to save your OpenAI API Key -# TOGETHERAI_API_KEY="" # Uncomment to save your TogetherAI API key -# NLP_CLOUD_API_KEY="" # Uncomment to save your NLP Cloud API key -# ANTHROPIC_API_KEY="" # Uncomment to save your Anthropic API key -# REPLICATE_API_KEY="" # Uncomment to save your Replicate API key - -[general] -# add_function_to_prompt = True # e.g: Ollama doesn't support functions, so add it to the prompt instead -# drop_params = True # drop any params not supported by the provider (e.g. Ollama) - -[local_model] # run via `litellm --model local` -# model_name = "ollama/codellama" # Uncomment to set a local model -# max_tokens = "" # set max tokens for the model -# temperature = "" # set temperature for the model -# api_base = "" # set a custom api base for the model - -[local_model.prompt_template] # Set a custom prompt template for your local model - docs: https://docs.litellm.ai/docs/completion/prompt_formatting#format-prompt-yourself -# MODEL_SYSTEM_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string -# MODEL_SYSTEM_MESSAGE_END_TOKEN = "<|endoftext|>" # This does not need to be a token, can be any string - -# MODEL_USER_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string -# MODEL_USER_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to user messages. Can be any string. - -# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "<|prompter|>" # Applies only to assistant messages. Can be any string. -# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to system messages. Can be any string. - -# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt -# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt - -[hosted_model] # run via `litellm --model hosted` -# model_name = "gpt-4" -# max_tokens = "" # set max tokens for the model -# temperature = "" # set temperature for the model -# api_base = "" # set a custom api base for the model -# max_budget = 100 # sets a max budget of $100 for your hosted model - -[hosted_model.prompt_template] # Set a custom prompt template for your hosted model - docs: https://docs.litellm.ai/docs/completion/prompt_formatting#format-prompt-yourself -# MODEL_SYSTEM_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string -# MODEL_SYSTEM_MESSAGE_END_TOKEN = "<|endoftext|>" # This does not need to be a token, can be any string - -# MODEL_USER_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string -# MODEL_USER_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to user messages. Can be any string. - -# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "<|prompter|>" # Applies only to assistant messages. Can be any string. -# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to system messages. Can be any string. - -# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt -# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index 069cb4d8fa..7d678aeb0a 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -53,7 +53,6 @@ from .exceptions import ( ) from typing import cast, List, Dict, Union, Optional from .caching import Cache -from .llms.prompt_templates.factory import llama_2_special_tokens ####### ENVIRONMENT VARIABLES #################### dotenv.load_dotenv() # Loading env variables using dotenv @@ -249,6 +248,7 @@ class Logging: "messages": self.messages, "optional_params": self.optional_params, "litellm_params": self.litellm_params, + "start_time": self.start_time } def pre_call(self, input, api_key, model=None, additional_args={}): @@ -323,7 +323,15 @@ class Logging: message=f"Model Call Details pre-call: {self.model_call_details}", level="info", ) + elif callable(callback): # custom logger functions + customLogger.log_input_event( + model=self.model, + messages=self.messages, + kwargs=self.model_call_details, + print_verbose=print_verbose, + ) except Exception as e: + traceback.print_exc() print_verbose( f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}" ) @@ -416,6 +424,7 @@ class Logging: ## BUILD COMPLETE STREAMED RESPONSE if self.stream: + print(f"stream result: {result}") if result.choices[0].finish_reason: # if it's the last chunk self.streaming_chunks.append(result) complete_streaming_response = litellm.stream_chunk_builder(self.streaming_chunks) @@ -573,6 +582,14 @@ class Logging: capture_exception(exception) else: print_verbose(f"capture exception not initialized: {capture_exception}") + elif callable(callback): # custom logger functions + customLogger.log_event( + kwargs=self.model_call_details, + response_obj=result, + start_time=start_time, + end_time=end_time, + print_verbose=print_verbose, + ) except Exception as e: print_verbose( f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {traceback.format_exc()}"