diff --git a/.DS_Store b/.DS_Store index 51ab802dc..639e40acb 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.env.example b/.env.example index 7889a204c..276feaba8 100644 --- a/.env.example +++ b/.env.example @@ -1,5 +1,4 @@ OPENAI_API_KEY = "" COHERE_API_KEY = "" -OPENROUTER_API_KEY = "" OR_SITE_URL = "" OR_APP_NAME = "LiteLLM Example app" \ No newline at end of file diff --git a/build/lib/litellm/main.py b/build/lib/litellm/main.py index 053d796e1..d4fc60053 100644 --- a/build/lib/litellm/main.py +++ b/build/lib/litellm/main.py @@ -1,7 +1,17 @@ -import os, openai, cohere, dotenv - +import os, openai, cohere, replicate, sys +from typing import Any +from func_timeout import func_set_timeout, FunctionTimedOut +from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT +import json +import traceback +import threading +import dotenv +import traceback +import subprocess +####### ENVIRONMENT VARIABLES ################### # Loading env variables using dotenv dotenv.load_dotenv() +set_verbose = False ####### COMPLETION MODELS ################### open_ai_chat_completion_models = [ @@ -16,16 +26,9 @@ cohere_models = [ 'command-nightly', ] -openrouter_models = [ - 'google/palm-2-codechat-bison', - 'google/palm-2-chat-bison', - 'openai/gpt-3.5-turbo', - 'openai/gpt-3.5-turbo-16k', - 'openai/gpt-4-32k', - 'anthropic/claude-2', - 'anthropic/claude-instant-v1', - 'meta-llama/llama-2-13b-chat', - 'meta-llama/llama-2-70b-chat' +anthropic_models = [ + "claude-2", + "claude-instant-1" ] ####### EMBEDDING MODELS ################### @@ -38,122 +41,389 @@ open_ai_embedding_models = [ ####### COMPLETION ENDPOINTS ################ ############################################# -def completion(model, messages, azure=False): - if azure == True: - # azure configs - openai.api_type = "azure" - openai.api_base = os.environ.get("AZURE_API_BASE") - openai.api_version = os.environ.get("AZURE_API_VERSION") - openai.api_key = os.environ.get("AZURE_API_KEY") - response = openai.ChatCompletion.create( - engine=model, - messages = messages - ) - elif "replicate" in model: - prompt = " ".join([message["content"] for message in messages]) - output = replicate.run( - model, - input={ - "prompt": prompt, - }) - print(f"output: {output}") - response = "" - for item in output: - print(f"item: {item}") - response += item - new_response = { - "choices": [ - { - "finish_reason": "stop", - "index": 0, - "message": { - "content": response, - "role": "assistant" - } - } - ] - } - print(f"new response: {new_response}") - response = new_response - elif model in cohere_models: - cohere_key = os.environ.get("COHERE_API_KEY") - co = cohere.Client(cohere_key) - prompt = " ".join([message["content"] for message in messages]) - response = co.generate( - model=model, - prompt = prompt - ) - new_response = { - "choices": [ - { - "finish_reason": "stop", - "index": 0, - "message": { - "content": response[0], - "role": "assistant" - } - } - ], - } - - response = new_response - - elif model in open_ai_chat_completion_models: - openai.api_type = "openai" - openai.api_base = "https://api.openai.com/v1" - openai.api_version = None - openai.api_key = os.environ.get("OPENAI_API_KEY") - response = openai.ChatCompletion.create( - model=model, +@func_set_timeout(10, allowOverride=True) ## https://pypi.org/project/func-timeout/ - timeouts, in case calls hang (e.g. Azure) +def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None): + try: + if azure == True: + # azure configs + openai.api_type = "azure" + openai.api_base = os.environ.get("AZURE_API_BASE") + openai.api_version = os.environ.get("AZURE_API_VERSION") + openai.api_key = os.environ.get("AZURE_API_KEY") + ## LOGGING + logging(model=model, input=input, azure=azure, logger_fn=logger_fn) + ## COMPLETION CALL + response = openai.ChatCompletion.create( + engine=model, messages = messages - ) - elif model in open_ai_text_completion_models: - openai.api_type = "openai" - openai.api_base = "https://api.openai.com/v1" - openai.api_version = None - openai.api_key = os.environ.get("OPENAI_API_KEY") - prompt = " ".join([message["content"] for message in messages]) - response = openai.Completion.create( + ) + elif "replicate" in model: + # replicate defaults to os.environ.get("REPLICATE_API_TOKEN") + # checking in case user set it to REPLICATE_API_KEY instead + if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"): + replicate_api_token = os.environ.get("REPLICATE_API_KEY") + os.environ["REPLICATE_API_TOKEN"] = replicate_api_token + prompt = " ".join([message["content"] for message in messages]) + input = [{"prompt": prompt}] + if max_tokens: + input["max_length"] = max_tokens # for t5 models + input["max_new_tokens"] = max_tokens # for llama2 models + ## LOGGING + logging(model=model, input=input, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn) + ## COMPLETION CALL + output = replicate.run( + model, + input=input) + response = "" + for item in output: + response += item + new_response = { + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": response, + "role": "assistant" + } + } + ] + } + response = new_response + elif model in anthropic_models: + #anthropic defaults to os.environ.get("ANTHROPIC_API_KEY") + prompt = f"{HUMAN_PROMPT}" + for message in messages: + if "role" in message: + if message["role"] == "user": + prompt += f"{HUMAN_PROMPT}{message['content']}" + else: + prompt += f"{AI_PROMPT}{message['content']}" + else: + prompt += f"{HUMAN_PROMPT}{message['content']}" + prompt += f"{AI_PROMPT}" + anthropic = Anthropic() + if max_tokens: + max_tokens_to_sample = max_tokens + else: + max_tokens_to_sample = 300 # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries + ## LOGGING + logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn) + ## COMPLETION CALL + completion = anthropic.completions.create( + model=model, + prompt=prompt, + max_tokens_to_sample=max_tokens_to_sample + ) + new_response = { + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": completion.completion, + "role": "assistant" + } + } + ] + } + print(f"new response: {new_response}") + response = new_response + elif model in cohere_models: + cohere_key = os.environ.get("COHERE_API_KEY") + co = cohere.Client(cohere_key) + prompt = " ".join([message["content"] for message in messages]) + ## LOGGING + logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn) + ## COMPLETION CALL + response = co.generate( model=model, prompt = prompt - ) - - elif model in openrouter_models: - openai.api_base = "https://openrouter.ai/api/v1" - openai.api_key = os.environ.get("OPENROUTER_API_KEY") - - prompt = " ".join([message["content"] for message in messages]) - - response = openai.ChatCompletion.create( - model=model, - messages=messages, - headers={ - "HTTP-Referer": os.environ.get("OR_SITE_URL"), # To identify your app - "X-Title": os.environ.get("OR_APP_NAME") - }, - ) - reply = response.choices[0].message - return response + ) + new_response = { + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": response[0], + "role": "assistant" + } + } + ], + } + response = new_response + elif model in open_ai_chat_completion_models: + openai.api_type = "openai" + openai.api_base = "https://api.openai.com/v1" + openai.api_version = None + openai.api_key = os.environ.get("OPENAI_API_KEY") + ## LOGGING + logging(model=model, input=messages, azure=azure, logger_fn=logger_fn) + ## COMPLETION CALL + response = openai.ChatCompletion.create( + model=model, + messages = messages + ) + elif model in open_ai_text_completion_models: + openai.api_type = "openai" + openai.api_base = "https://api.openai.com/v1" + openai.api_version = None + openai.api_key = os.environ.get("OPENAI_API_KEY") + prompt = " ".join([message["content"] for message in messages]) + ## LOGGING + logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn) + ## COMPLETION CALL + response = openai.Completion.create( + model=model, + prompt = prompt + ) + else: + logging(model=model, input=messages, azure=azure, logger_fn=logger_fn) + return response + except Exception as e: + logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn) + raise e ### EMBEDDING ENDPOINTS #################### -def embedding(model, input=[], azure=False): +@func_set_timeout(60, allowOverride=True) ## https://pypi.org/project/func-timeout/ +def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None): + response = None if azure == True: # azure configs openai.api_type = "azure" openai.api_base = os.environ.get("AZURE_API_BASE") openai.api_version = os.environ.get("AZURE_API_VERSION") - openai.api_key = os.environ.get("AZURE_API_KEY") + openai.api_key = os.environ.get("AZURE_API_KEY") + ## LOGGING + logging(model=model, input=input, azure=azure, logger_fn=logger_fn) + ## EMBEDDING CALL response = openai.Embedding.create(input=input, engine=model) + print_verbose(f"response_value: {str(response)[:50]}") elif model in open_ai_embedding_models: openai.api_type = "openai" openai.api_base = "https://api.openai.com/v1" openai.api_version = None openai.api_key = os.environ.get("OPENAI_API_KEY") + ## LOGGING + logging(model=model, input=input, azure=azure, logger_fn=logger_fn) + ## EMBEDDING CALL response = openai.Embedding.create(input=input, model=model) + print_verbose(f"response_value: {str(response)[:50]}") + else: + logging(model=model, input=input, azure=azure, logger_fn=logger_fn) + return response -############################################# -############################################# \ No newline at end of file +### CLIENT CLASS #################### make it easy to push completion/embedding runs to different sources -> sentry/posthog/slack, etc. +class litellm_client: + def __init__(self, success_callback=[], failure_callback=[], verbose=False): # Constructor + set_verbose = verbose + self.success_callback = success_callback + self.failure_callback = failure_callback + self.logger_fn = None # if user passes in their own logging function + self.callback_list = list(set(self.success_callback + self.failure_callback)) + self.set_callbacks() + + ## COMPLETION CALL + def completion(self, model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None, additional_details={}) -> Any: + try: + self.logger_fn = logger_fn + response = completion(model=model, messages=messages, max_tokens=max_tokens, forceTimeout=forceTimeout, azure=azure, logger_fn=self.handle_input) + my_thread = threading.Thread(target=self.handle_success, args=(model, messages, additional_details)) # don't interrupt execution of main thread + my_thread.start() + return response + except Exception as e: + args = locals() # get all the param values + self.handle_failure(e, args) + raise e + + ## EMBEDDING CALL + def embedding(self, model, input=[], azure=False, logger_fn=None, forceTimeout=60, additional_details={}) -> Any: + try: + self.logger_fn = logger_fn + response = embedding(model, input, azure=azure, logger_fn=self.handle_input) + my_thread = threading.Thread(target=self.handle_success, args=(model, input, additional_details)) # don't interrupt execution of main thread + my_thread.start() + return response + except Exception as e: + args = locals() # get all the param values + self.handle_failure(e, args) + raise e + + + def set_callbacks(self): #instantiate any external packages + for callback in self.callback_list: # only install what's required + if callback == "sentry": + try: + import sentry_sdk + except ImportError: + print_verbose("Package 'sentry_sdk' is missing. Installing it...") + subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk']) + import sentry_sdk + self.sentry_sdk = sentry_sdk + self.sentry_sdk.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE"))) + self.capture_exception = self.sentry_sdk.capture_exception + self.add_breadcrumb = self.sentry_sdk.add_breadcrumb + elif callback == "posthog": + try: + from posthog import Posthog + except: + print_verbose("Package 'posthog' is missing. Installing it...") + subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog']) + from posthog import Posthog + self.posthog = Posthog( + project_api_key=os.environ.get("POSTHOG_API_KEY"), + host=os.environ.get("POSTHOG_API_URL")) + elif callback == "slack": + try: + from slack_bolt import App + except ImportError: + print_verbose("Package 'slack_bolt' is missing. Installing it...") + subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt']) + from slack_bolt import App + self.slack_app = App( + token=os.environ.get("SLACK_API_TOKEN"), + signing_secret=os.environ.get("SLACK_API_SECRET") + ) + self.alerts_channel = os.environ["SLACK_API_CHANNEL"] + + def handle_input(self, model_call_details={}): + if len(model_call_details.keys()) > 0: + model = model_call_details["model"] if "model" in model_call_details else None + if model: + for callback in self.callback_list: + if callback == "sentry": # add a sentry breadcrumb if user passed in sentry integration + self.add_breadcrumb( + category=f'{model}', + message='Trying request model {} input {}'.format(model, json.dumps(model_call_details)), + level='info', + ) + if self.logger_fn and callable(self.logger_fn): + self.logger_fn(model_call_details) + pass + + def handle_success(self, model, messages, additional_details): + success_handler = additional_details.pop("success_handler", None) + failure_handler = additional_details.pop("failure_handler", None) + additional_details["litellm_model"] = str(model) + additional_details["litellm_messages"] = str(messages) + for callback in self.success_callback: + try: + if callback == "posthog": + ph_obj = {} + for detail in additional_details: + ph_obj[detail] = additional_details[detail] + event_name = additional_details["successful_event"] if "successful_event" in additional_details else "litellm.succes_query" + if "user_id" in additional_details: + self.posthog.capture(additional_details["user_id"], event_name, ph_obj) + else: + self.posthog.capture(event_name, ph_obj) + pass + elif callback == "slack": + slack_msg = "" + if len(additional_details.keys()) > 0: + for detail in additional_details: + slack_msg += f"{detail}: {additional_details[detail]}\n" + slack_msg += f"Successful call" + self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg) + except: + pass + + if success_handler and callable(success_handler): + call_details = { + "model": model, + "messages": messages, + "additional_details": additional_details + } + success_handler(call_details) + pass + + def handle_failure(self, exception, args): + args.pop("self") + additional_details = args.pop("additional_details", {}) + + success_handler = additional_details.pop("success_handler", None) + failure_handler = additional_details.pop("failure_handler", None) + + for callback in self.failure_callback: + try: + if callback == "slack": + slack_msg = "" + for param in args: + slack_msg += f"{param}: {args[param]}\n" + if len(additional_details.keys()) > 0: + for detail in additional_details: + slack_msg += f"{detail}: {additional_details[detail]}\n" + slack_msg += f"Traceback: {traceback.format_exc()}" + self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg) + elif callback == "sentry": + self.capture_exception(exception) + elif callback == "posthog": + if len(additional_details.keys()) > 0: + ph_obj = {} + for param in args: + ph_obj[param] += args[param] + for detail in additional_details: + ph_obj[detail] = additional_details[detail] + event_name = additional_details["failed_event"] if "failed_event" in additional_details else "litellm.failed_query" + if "user_id" in additional_details: + self.posthog.capture(additional_details["user_id"], event_name, ph_obj) + else: + self.posthog.capture(event_name, ph_obj) + else: + pass + except: + print(f"got an error calling {callback} - {traceback.format_exc()}") + + if failure_handler and callable(failure_handler): + call_details = { + "exception": exception, + "additional_details": additional_details + } + failure_handler(call_details) + pass +####### HELPER FUNCTIONS ################ + +#Logging function -> log the exact model details + what's being sent | Non-Blocking +def logging(model, input, azure=False, additional_args={}, logger_fn=None): + try: + model_call_details = {} + model_call_details["model"] = model + model_call_details["input"] = input + model_call_details["azure"] = azure + model_call_details["additional_args"] = additional_args + if logger_fn and callable(logger_fn): + try: + # log additional call details -> api key, etc. + if azure == True or model in open_ai_chat_completion_models or model in open_ai_chat_completion_models or model in open_ai_embedding_models: + model_call_details["api_type"] = openai.api_type + model_call_details["api_base"] = openai.api_base + model_call_details["api_version"] = openai.api_version + model_call_details["api_key"] = openai.api_key + elif "replicate" in model: + model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN") + elif model in anthropic_models: + model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY") + elif model in cohere_models: + model_call_details["api_key"] = os.environ.get("COHERE_API_KEY") + + logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object + except: + print_verbose(f"Basic model call details: {model_call_details}") + print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}") + pass + else: + print_verbose(f"Basic model call details: {model_call_details}") + pass + except: + pass + +## Set verbose to true -> ```litellm.verbose = True``` +def print_verbose(print_statement): + if set_verbose: + print(f"LiteLLM: {print_statement}") + print("Get help - https://discord.com/invite/wuPM9dRgDw") \ No newline at end of file diff --git a/completion_test.py b/completion_test.py index a657e42a4..c9e74932c 100644 --- a/completion_test.py +++ b/completion_test.py @@ -26,9 +26,4 @@ print(response) # cohere call response = completion("command-nightly", messages) print("\nCohere call") -print(response) - -# openrouter call -response = completion("google/palm-2-codechat-bison", messages) -print("\OpenRouter call") print(response) \ No newline at end of file diff --git a/dist/litellm-0.1.0-py3-none-any.whl b/dist/litellm-0.1.0-py3-none-any.whl deleted file mode 100644 index 9bc973ed7..000000000 Binary files a/dist/litellm-0.1.0-py3-none-any.whl and /dev/null differ diff --git a/dist/litellm-0.1.0.tar.gz b/dist/litellm-0.1.0.tar.gz deleted file mode 100644 index f0c18b9b7..000000000 Binary files a/dist/litellm-0.1.0.tar.gz and /dev/null differ diff --git a/dist/litellm-0.1.1-py3-none-any.whl b/dist/litellm-0.1.1-py3-none-any.whl deleted file mode 100644 index 43a15ee40..000000000 Binary files a/dist/litellm-0.1.1-py3-none-any.whl and /dev/null differ diff --git a/dist/litellm-0.1.1.tar.gz b/dist/litellm-0.1.1.tar.gz deleted file mode 100644 index 26e4c3b2c..000000000 Binary files a/dist/litellm-0.1.1.tar.gz and /dev/null differ diff --git a/dist/litellm-0.1.2-py3-none-any.whl b/dist/litellm-0.1.2-py3-none-any.whl new file mode 100644 index 000000000..07cfc4417 Binary files /dev/null and b/dist/litellm-0.1.2-py3-none-any.whl differ diff --git a/dist/litellm-0.1.2.tar.gz b/dist/litellm-0.1.2.tar.gz new file mode 100644 index 000000000..853c7db39 Binary files /dev/null and b/dist/litellm-0.1.2.tar.gz differ diff --git a/litellm.egg-info/PKG-INFO b/litellm.egg-info/PKG-INFO index cf19261b2..e8f0962bc 100644 --- a/litellm.egg-info/PKG-INFO +++ b/litellm.egg-info/PKG-INFO @@ -1,12 +1,6 @@ Metadata-Version: 2.1 Name: litellm -Version: 0.1.1 +Version: 0.1.2 Summary: Library to easily interface with LLM API providers -Home-page: UNKNOWN -Author: Ishaan Jaffer -License: UNKNOWN -Platform: UNKNOWN +Author: BerriAI License-File: LICENSE - -UNKNOWN - diff --git a/litellm/.DS_Store b/litellm/.DS_Store new file mode 100644 index 000000000..b9f40dc63 Binary files /dev/null and b/litellm/.DS_Store differ diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 042ecd40d..80675f031 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/main.py b/litellm/main.py index 14de0b02e..d4fc60053 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1,7 +1,17 @@ -import os, openai, cohere, dotenv - +import os, openai, cohere, replicate, sys +from typing import Any +from func_timeout import func_set_timeout, FunctionTimedOut +from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT +import json +import traceback +import threading +import dotenv +import traceback +import subprocess +####### ENVIRONMENT VARIABLES ################### # Loading env variables using dotenv dotenv.load_dotenv() +set_verbose = False ####### COMPLETION MODELS ################### open_ai_chat_completion_models = [ @@ -16,16 +26,9 @@ cohere_models = [ 'command-nightly', ] -openrouter_models = [ - 'google/palm-2-codechat-bison', - 'google/palm-2-chat-bison', - 'openai/gpt-3.5-turbo', - 'openai/gpt-3.5-turbo-16k', - 'openai/gpt-4-32k', - 'anthropic/claude-2', - 'anthropic/claude-instant-v1', - 'meta-llama/llama-2-13b-chat', - 'meta-llama/llama-2-70b-chat' +anthropic_models = [ + "claude-2", + "claude-instant-1" ] ####### EMBEDDING MODELS ################### @@ -38,123 +41,389 @@ open_ai_embedding_models = [ ####### COMPLETION ENDPOINTS ################ ############################################# -def completion(model, messages, azure=False): - if azure == True: - # azure configs - openai.api_type = "azure" - openai.api_base = os.environ.get("AZURE_API_BASE") - openai.api_version = os.environ.get("AZURE_API_VERSION") - openai.api_key = os.environ.get("AZURE_API_KEY") - response = openai.ChatCompletion.create( - engine=model, - messages = messages - ) - elif "replicate" in model: - prompt = " ".join([message["content"] for message in messages]) - output = replicate.run( - model, - input={ - "prompt": prompt, - }) - print(f"output: {output}") - response = "" - for item in output: - print(f"item: {item}") - response += item - new_response = { - "choices": [ - { - "finish_reason": "stop", - "index": 0, - "message": { - "content": response, - "role": "assistant" - } - } - ] - } - print(f"new response: {new_response}") - response = new_response - elif model in cohere_models: - cohere_key = os.environ.get("COHERE_API_KEY") - co = cohere.Client(cohere_key) - prompt = " ".join([message["content"] for message in messages]) - response = co.generate( - model=model, - prompt = prompt - ) - new_response = { - "choices": [ - { - "finish_reason": "stop", - "index": 0, - "message": { - "content": response[0], - "role": "assistant" - } - } - ], - } - - response = new_response - - elif model in open_ai_chat_completion_models: - openai.api_type = "openai" - openai.api_base = "https://api.openai.com/v1" - openai.api_version = None - openai.api_key = os.environ.get("OPENAI_API_KEY") - response = openai.ChatCompletion.create( - model=model, +@func_set_timeout(10, allowOverride=True) ## https://pypi.org/project/func-timeout/ - timeouts, in case calls hang (e.g. Azure) +def completion(model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None): + try: + if azure == True: + # azure configs + openai.api_type = "azure" + openai.api_base = os.environ.get("AZURE_API_BASE") + openai.api_version = os.environ.get("AZURE_API_VERSION") + openai.api_key = os.environ.get("AZURE_API_KEY") + ## LOGGING + logging(model=model, input=input, azure=azure, logger_fn=logger_fn) + ## COMPLETION CALL + response = openai.ChatCompletion.create( + engine=model, messages = messages - ) - elif model in open_ai_text_completion_models: - openai.api_type = "openai" - openai.api_base = "https://api.openai.com/v1" - openai.api_version = None - openai.api_key = os.environ.get("OPENAI_API_KEY") - prompt = " ".join([message["content"] for message in messages]) - response = openai.Completion.create( + ) + elif "replicate" in model: + # replicate defaults to os.environ.get("REPLICATE_API_TOKEN") + # checking in case user set it to REPLICATE_API_KEY instead + if not os.environ.get("REPLICATE_API_TOKEN") and os.environ.get("REPLICATE_API_KEY"): + replicate_api_token = os.environ.get("REPLICATE_API_KEY") + os.environ["REPLICATE_API_TOKEN"] = replicate_api_token + prompt = " ".join([message["content"] for message in messages]) + input = [{"prompt": prompt}] + if max_tokens: + input["max_length"] = max_tokens # for t5 models + input["max_new_tokens"] = max_tokens # for llama2 models + ## LOGGING + logging(model=model, input=input, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn) + ## COMPLETION CALL + output = replicate.run( + model, + input=input) + response = "" + for item in output: + response += item + new_response = { + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": response, + "role": "assistant" + } + } + ] + } + response = new_response + elif model in anthropic_models: + #anthropic defaults to os.environ.get("ANTHROPIC_API_KEY") + prompt = f"{HUMAN_PROMPT}" + for message in messages: + if "role" in message: + if message["role"] == "user": + prompt += f"{HUMAN_PROMPT}{message['content']}" + else: + prompt += f"{AI_PROMPT}{message['content']}" + else: + prompt += f"{HUMAN_PROMPT}{message['content']}" + prompt += f"{AI_PROMPT}" + anthropic = Anthropic() + if max_tokens: + max_tokens_to_sample = max_tokens + else: + max_tokens_to_sample = 300 # default in Anthropic docs https://docs.anthropic.com/claude/reference/client-libraries + ## LOGGING + logging(model=model, input=prompt, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn) + ## COMPLETION CALL + completion = anthropic.completions.create( + model=model, + prompt=prompt, + max_tokens_to_sample=max_tokens_to_sample + ) + new_response = { + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": completion.completion, + "role": "assistant" + } + } + ] + } + print(f"new response: {new_response}") + response = new_response + elif model in cohere_models: + cohere_key = os.environ.get("COHERE_API_KEY") + co = cohere.Client(cohere_key) + prompt = " ".join([message["content"] for message in messages]) + ## LOGGING + logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn) + ## COMPLETION CALL + response = co.generate( model=model, prompt = prompt - ) - - elif model in openrouter_models: - openai.api_base = "https://openrouter.ai/api/v1" - openai.api_key = os.environ.get("OPENROUTER_API_KEY") - - prompt = " ".join([message["content"] for message in messages]) - - response = openai.ChatCompletion.create( - model=model, - messages=messages, - headers={ - "HTTP-Referer": os.environ.get("OR_SITE_URL"), # To identify your app - "X-Title": os.environ.get("OR_APP_NAME") - }, - ) - reply = response.choices[0].message - return response + ) + new_response = { + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": response[0], + "role": "assistant" + } + } + ], + } + response = new_response + elif model in open_ai_chat_completion_models: + openai.api_type = "openai" + openai.api_base = "https://api.openai.com/v1" + openai.api_version = None + openai.api_key = os.environ.get("OPENAI_API_KEY") + ## LOGGING + logging(model=model, input=messages, azure=azure, logger_fn=logger_fn) + ## COMPLETION CALL + response = openai.ChatCompletion.create( + model=model, + messages = messages + ) + elif model in open_ai_text_completion_models: + openai.api_type = "openai" + openai.api_base = "https://api.openai.com/v1" + openai.api_version = None + openai.api_key = os.environ.get("OPENAI_API_KEY") + prompt = " ".join([message["content"] for message in messages]) + ## LOGGING + logging(model=model, input=prompt, azure=azure, logger_fn=logger_fn) + ## COMPLETION CALL + response = openai.Completion.create( + model=model, + prompt = prompt + ) + else: + logging(model=model, input=messages, azure=azure, logger_fn=logger_fn) + return response + except Exception as e: + logging(model=model, input=messages, azure=azure, additional_args={"max_tokens": max_tokens}, logger_fn=logger_fn) + raise e ### EMBEDDING ENDPOINTS #################### -def embedding(model, input=[], azure=False): +@func_set_timeout(60, allowOverride=True) ## https://pypi.org/project/func-timeout/ +def embedding(model, input=[], azure=False, forceTimeout=60, logger_fn=None): + response = None if azure == True: # azure configs openai.api_type = "azure" openai.api_base = os.environ.get("AZURE_API_BASE") openai.api_version = os.environ.get("AZURE_API_VERSION") - openai.api_key = os.environ.get("AZURE_API_KEY") + openai.api_key = os.environ.get("AZURE_API_KEY") + ## LOGGING + logging(model=model, input=input, azure=azure, logger_fn=logger_fn) + ## EMBEDDING CALL response = openai.Embedding.create(input=input, engine=model) + print_verbose(f"response_value: {str(response)[:50]}") elif model in open_ai_embedding_models: openai.api_type = "openai" openai.api_base = "https://api.openai.com/v1" openai.api_version = None openai.api_key = os.environ.get("OPENAI_API_KEY") + ## LOGGING + logging(model=model, input=input, azure=azure, logger_fn=logger_fn) + ## EMBEDDING CALL response = openai.Embedding.create(input=input, model=model) + print_verbose(f"response_value: {str(response)[:50]}") + else: + logging(model=model, input=input, azure=azure, logger_fn=logger_fn) + return response -############################################# -############################################# +### CLIENT CLASS #################### make it easy to push completion/embedding runs to different sources -> sentry/posthog/slack, etc. +class litellm_client: + def __init__(self, success_callback=[], failure_callback=[], verbose=False): # Constructor + set_verbose = verbose + self.success_callback = success_callback + self.failure_callback = failure_callback + self.logger_fn = None # if user passes in their own logging function + self.callback_list = list(set(self.success_callback + self.failure_callback)) + self.set_callbacks() + + ## COMPLETION CALL + def completion(self, model, messages, max_tokens=None, forceTimeout=10, azure=False, logger_fn=None, additional_details={}) -> Any: + try: + self.logger_fn = logger_fn + response = completion(model=model, messages=messages, max_tokens=max_tokens, forceTimeout=forceTimeout, azure=azure, logger_fn=self.handle_input) + my_thread = threading.Thread(target=self.handle_success, args=(model, messages, additional_details)) # don't interrupt execution of main thread + my_thread.start() + return response + except Exception as e: + args = locals() # get all the param values + self.handle_failure(e, args) + raise e + ## EMBEDDING CALL + def embedding(self, model, input=[], azure=False, logger_fn=None, forceTimeout=60, additional_details={}) -> Any: + try: + self.logger_fn = logger_fn + response = embedding(model, input, azure=azure, logger_fn=self.handle_input) + my_thread = threading.Thread(target=self.handle_success, args=(model, input, additional_details)) # don't interrupt execution of main thread + my_thread.start() + return response + except Exception as e: + args = locals() # get all the param values + self.handle_failure(e, args) + raise e + + + def set_callbacks(self): #instantiate any external packages + for callback in self.callback_list: # only install what's required + if callback == "sentry": + try: + import sentry_sdk + except ImportError: + print_verbose("Package 'sentry_sdk' is missing. Installing it...") + subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'sentry_sdk']) + import sentry_sdk + self.sentry_sdk = sentry_sdk + self.sentry_sdk.init(dsn=os.environ.get("SENTRY_API_URL"), traces_sample_rate=float(os.environ.get("SENTRY_API_TRACE_RATE"))) + self.capture_exception = self.sentry_sdk.capture_exception + self.add_breadcrumb = self.sentry_sdk.add_breadcrumb + elif callback == "posthog": + try: + from posthog import Posthog + except: + print_verbose("Package 'posthog' is missing. Installing it...") + subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'posthog']) + from posthog import Posthog + self.posthog = Posthog( + project_api_key=os.environ.get("POSTHOG_API_KEY"), + host=os.environ.get("POSTHOG_API_URL")) + elif callback == "slack": + try: + from slack_bolt import App + except ImportError: + print_verbose("Package 'slack_bolt' is missing. Installing it...") + subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'slack_bolt']) + from slack_bolt import App + self.slack_app = App( + token=os.environ.get("SLACK_API_TOKEN"), + signing_secret=os.environ.get("SLACK_API_SECRET") + ) + self.alerts_channel = os.environ["SLACK_API_CHANNEL"] + + def handle_input(self, model_call_details={}): + if len(model_call_details.keys()) > 0: + model = model_call_details["model"] if "model" in model_call_details else None + if model: + for callback in self.callback_list: + if callback == "sentry": # add a sentry breadcrumb if user passed in sentry integration + self.add_breadcrumb( + category=f'{model}', + message='Trying request model {} input {}'.format(model, json.dumps(model_call_details)), + level='info', + ) + if self.logger_fn and callable(self.logger_fn): + self.logger_fn(model_call_details) + pass + + def handle_success(self, model, messages, additional_details): + success_handler = additional_details.pop("success_handler", None) + failure_handler = additional_details.pop("failure_handler", None) + additional_details["litellm_model"] = str(model) + additional_details["litellm_messages"] = str(messages) + for callback in self.success_callback: + try: + if callback == "posthog": + ph_obj = {} + for detail in additional_details: + ph_obj[detail] = additional_details[detail] + event_name = additional_details["successful_event"] if "successful_event" in additional_details else "litellm.succes_query" + if "user_id" in additional_details: + self.posthog.capture(additional_details["user_id"], event_name, ph_obj) + else: + self.posthog.capture(event_name, ph_obj) + pass + elif callback == "slack": + slack_msg = "" + if len(additional_details.keys()) > 0: + for detail in additional_details: + slack_msg += f"{detail}: {additional_details[detail]}\n" + slack_msg += f"Successful call" + self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg) + except: + pass + + if success_handler and callable(success_handler): + call_details = { + "model": model, + "messages": messages, + "additional_details": additional_details + } + success_handler(call_details) + pass + + def handle_failure(self, exception, args): + args.pop("self") + additional_details = args.pop("additional_details", {}) + + success_handler = additional_details.pop("success_handler", None) + failure_handler = additional_details.pop("failure_handler", None) + + for callback in self.failure_callback: + try: + if callback == "slack": + slack_msg = "" + for param in args: + slack_msg += f"{param}: {args[param]}\n" + if len(additional_details.keys()) > 0: + for detail in additional_details: + slack_msg += f"{detail}: {additional_details[detail]}\n" + slack_msg += f"Traceback: {traceback.format_exc()}" + self.slack_app.client.chat_postMessage(channel=self.alerts_channel, text=slack_msg) + elif callback == "sentry": + self.capture_exception(exception) + elif callback == "posthog": + if len(additional_details.keys()) > 0: + ph_obj = {} + for param in args: + ph_obj[param] += args[param] + for detail in additional_details: + ph_obj[detail] = additional_details[detail] + event_name = additional_details["failed_event"] if "failed_event" in additional_details else "litellm.failed_query" + if "user_id" in additional_details: + self.posthog.capture(additional_details["user_id"], event_name, ph_obj) + else: + self.posthog.capture(event_name, ph_obj) + else: + pass + except: + print(f"got an error calling {callback} - {traceback.format_exc()}") + + if failure_handler and callable(failure_handler): + call_details = { + "exception": exception, + "additional_details": additional_details + } + failure_handler(call_details) + pass +####### HELPER FUNCTIONS ################ + +#Logging function -> log the exact model details + what's being sent | Non-Blocking +def logging(model, input, azure=False, additional_args={}, logger_fn=None): + try: + model_call_details = {} + model_call_details["model"] = model + model_call_details["input"] = input + model_call_details["azure"] = azure + model_call_details["additional_args"] = additional_args + if logger_fn and callable(logger_fn): + try: + # log additional call details -> api key, etc. + if azure == True or model in open_ai_chat_completion_models or model in open_ai_chat_completion_models or model in open_ai_embedding_models: + model_call_details["api_type"] = openai.api_type + model_call_details["api_base"] = openai.api_base + model_call_details["api_version"] = openai.api_version + model_call_details["api_key"] = openai.api_key + elif "replicate" in model: + model_call_details["api_key"] = os.environ.get("REPLICATE_API_TOKEN") + elif model in anthropic_models: + model_call_details["api_key"] = os.environ.get("ANTHROPIC_API_KEY") + elif model in cohere_models: + model_call_details["api_key"] = os.environ.get("COHERE_API_KEY") + + logger_fn(model_call_details) # Expectation: any logger function passed in by the user should accept a dict object + except: + print_verbose(f"Basic model call details: {model_call_details}") + print_verbose(f"[Non-Blocking] Exception occurred while logging {traceback.format_exc()}") + pass + else: + print_verbose(f"Basic model call details: {model_call_details}") + pass + except: + pass + +## Set verbose to true -> ```litellm.verbose = True``` +def print_verbose(print_statement): + if set_verbose: + print(f"LiteLLM: {print_statement}") + print("Get help - https://discord.com/invite/wuPM9dRgDw") \ No newline at end of file diff --git a/litellm/tests/test_bad_params.py b/litellm/tests/test_bad_params.py new file mode 100644 index 000000000..2b2e4bbcf --- /dev/null +++ b/litellm/tests/test_bad_params.py @@ -0,0 +1,20 @@ +import sys, os +import traceback +sys.path.append('..') # Adds the parent directory to the system path +import main +from main import litellm_client +client = litellm_client(success_callback=["posthog"], failure_callback=["slack", "sentry", "posthog"], verbose=True) +completion = client.completion +embedding = client.embedding + +main.set_verbose = True + +user_message = "Hello, how are you?" +messages = [{ "content": user_message,"role": "user"}] +model_val = None +# test on empty +try: + response = completion(model=model_val, messages=messages) +except: + print(f"error occurred: {traceback.format_exc()}") + pass diff --git a/litellm/tests/test_client.py b/litellm/tests/test_client.py new file mode 100644 index 000000000..06850ea18 --- /dev/null +++ b/litellm/tests/test_client.py @@ -0,0 +1,59 @@ +import sys, os +import traceback +sys.path.append('..') # Adds the parent directory to the system path +import main +from main import litellm_client +client = litellm_client(success_callback=["posthog"], failure_callback=["slack", "sentry", "posthog"], verbose=True) +completion = client.completion +embedding = client.embedding + +main.set_verbose = True + +def logger_fn(model_call_object: dict): + print(f"model call details: {model_call_object}") + +user_message = "Hello, how are you?" +messages = [{ "content": user_message,"role": "user"}] + +# test on openai completion call +try: + response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn) +except: + print(f"error occurred: {traceback.format_exc()}") + pass + + +# test on openai completion call +try: + response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn) +except: + print(f"error occurred: {traceback.format_exc()}") + pass + +# test on non-openai completion call +try: + response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn) +except: + print(f"error occurred: {traceback.format_exc()}") + pass + +# test on openai embedding call +try: + response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn) + print(f"response: {str(response)[:50]}") +except: + traceback.print_exc() + +# test on bad azure openai embedding call -> missing azure flag and this isn't an embedding model +try: + response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn) + print(f"response: {str(response)[:50]}") +except: + traceback.print_exc() + +# test on good azure openai embedding call +try: + response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn) + print(f"response: {str(response)[:50]}") +except: + traceback.print_exc() diff --git a/litellm/tests/test_logging.py b/litellm/tests/test_logging.py new file mode 100644 index 000000000..95a75de47 --- /dev/null +++ b/litellm/tests/test_logging.py @@ -0,0 +1,48 @@ +import sys, os +import traceback +sys.path.append('..') # Adds the parent directory to the system path +import main +from main import completion, embedding + +main.verbose = True ## Replace to: ```litellm.verbose = True``` when using pypi package + +def logger_fn(model_call_object: dict): + print(f"model call details: {model_call_object}") + +user_message = "Hello, how are you?" +messages = [{ "content": user_message,"role": "user"}] + +# test on openai completion call +try: + response = completion(model="gpt-3.5-turbo", messages=messages) +except: + print(f"error occurred: {traceback.format_exc()}") + pass + +# test on non-openai completion call +try: + response = completion(model="claude-instant-1", messages=messages, logger_fn=logger_fn) +except: + print(f"error occurred: {traceback.format_exc()}") + pass + +# test on openai embedding call +try: + response = embedding(model='text-embedding-ada-002', input=[user_message], logger_fn=logger_fn) + print(f"response: {str(response)[:50]}") +except: + traceback.print_exc() + +# test on bad azure openai embedding call -> missing azure flag and this isn't an embedding model +try: + response = embedding(model='chatgpt-test', input=[user_message], logger_fn=logger_fn) + print(f"response: {str(response)[:50]}") +except: + traceback.print_exc() + +# test on good azure openai embedding call +try: + response = embedding(model='azure-embedding-model', input=[user_message], azure=True, logger_fn=logger_fn) + print(f"response: {str(response)[:50]}") +except: + traceback.print_exc() diff --git a/setup.py b/setup.py index 2288a1f82..a143fbe5b 100644 --- a/setup.py +++ b/setup.py @@ -2,9 +2,9 @@ from setuptools import setup, find_packages setup( name='litellm', - version='0.1.202', + version='0.1.2', description='Library to easily interface with LLM API providers', - author='Ishaan Jaffer', + author='BerriAI', packages=[ 'litellm' ],