formatting fixes

2023-08-21 17:09:53 -07:00 · 2023-08-21 17:09:53 -07:00 · ccf875f84b
commit ccf875f84b
parent 601bc7ecbd
9 changed files with 299 additions and 104 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -1,5 +1,6 @@
 import threading
 from typing import Callable, List, Optional
 input_callback: List[str] = []
 success_callback: List[str] = []
 failure_callback: List[str] = []
@ -21,6 +22,7 @@ vertex_location: Optional[str] = None
 togetherai_api_key: Optional[str] = None
 caching = False
 caching_with_models = False  # if you want the caching key to be model + prompt
 debugger = False
 model_cost = {
    "gpt-3.5-turbo": {
        "max_tokens": 4000,
@ -156,7 +158,7 @@ replicate_models = [
    "a16z-infra/llama-2-7b-chat:7b0bfc9aff140d5b75bacbed23e91fd3c34b01a1e958d32132de6e0a19796e2c",
    "replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b",
    "daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f",
-    "replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad"
+    "replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad",
 ]  # placeholder, to make sure we accept any replicate model in our model_list
 openrouter_models = [
@ -196,14 +198,10 @@ ai21_models = ["j2-ultra", "j2-mid", "j2-light"]
 together_ai_models = [
    "togethercomputer/llama-2-70b-chat",
    "togethercomputer/Llama-2-7B-32K-Instruct",
-    "togethercomputer/llama-2-7b"
+    "togethercomputer/llama-2-7b",
 ]
-baseten_models = [
+baseten_models = ["qvv0xeq", "q841o8w", "31dxrj3"]  # FALCON 7B  # WizardLM  # Mosaic ML
    "qvv0xeq", # FALCON 7B
    "q841o8w", # WizardLM
    "31dxrj3" # Mosaic ML
 ]
 model_list = (
    open_ai_chat_completion_models
@ -231,12 +229,11 @@ provider_list = [
    "openrouter",
    "vertex_ai",
    "ai21",
-    "baseten"
+    "baseten",
 ]
 models_by_provider = {
-    "openai": open_ai_chat_completion_models
+    "openai": open_ai_chat_completion_models + open_ai_text_completion_models,
    + open_ai_text_completion_models,
    "cohere": cohere_models,
    "anthropic": anthropic_models,
    "replicate": replicate_models,
@ -263,8 +260,11 @@ from .utils import (
    completion_cost,
    get_litellm_params,
    Logging,
 <<<<<<< HEAD
    acreate,
    get_model_list
 =======
 >>>>>>> 878f1a6 (formatting fixes)
 )
 from .main import *  # type: ignore
 from .integrations import *
--- a/litellm/integrations/litedebugger.py
+++ b/litellm/integrations/litedebugger.py
@ -1,5 +1,6 @@
 import requests, traceback, json, os
 class LiteDebugger:
    user_email = None
    dashboard_url = None
@ -14,12 +15,17 @@ class LiteDebugger:
            self.dashboard_url = 'https://admin.litellm.ai/' + self.user_email
            print(f"Here's your free Dashboard 👉 {self.dashboard_url}")
            if self.user_email == None:
-                raise Exception("[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_EMAIL. Set it in your environment. Eg.: os.environ['LITELLM_EMAIL']= <your_email>")
+                raise Exception(
                    "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_EMAIL. Set it in your environment. Eg.: os.environ['LITELLM_EMAIL']= <your_email>"
                )
        except Exception as e:
-            raise ValueError("[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_EMAIL. Set it in your environment. Eg.: os.environ['LITELLM_EMAIL']= <your_email>")
+            raise ValueError(
                "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_EMAIL. Set it in your environment. Eg.: os.environ['LITELLM_EMAIL']= <your_email>"
            )
-    
+    def input_log_event(
-    def input_log_event(self, model, messages, end_user, litellm_call_id, print_verbose):
+        self, model, messages, end_user, litellm_call_id, print_verbose
    ):
        try:
            print_verbose(
                f"LiteLLMDebugger: Logging - Enters input logging function for model {model}"
@ -30,22 +36,31 @@ class LiteDebugger:
                "end_user": end_user,
                "status": "initiated",
                "litellm_call_id": litellm_call_id,
-                "user_email": self.user_email
+                "user_email": self.user_email,
            }
-            response = requests.post(url=self.api_url, headers={"content-type": "application/json"}, data=json.dumps(litellm_data_obj))
+            response = requests.post(
                url=self.api_url,
                headers={"content-type": "application/json"},
                data=json.dumps(litellm_data_obj),
            )
            print_verbose(f"LiteDebugger: api response - {response.text}")
        except:
-            print_verbose(f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}")
+            print_verbose(
                f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
            )
            pass
-    def log_event(self, model,
+    def log_event(
        self,
        model,
        messages,
        end_user,
        response_obj,
        start_time,
        end_time,
        litellm_call_id,
-        print_verbose,):
+        print_verbose,
    ):
        try:
            print_verbose(
                f"LiteLLMDebugger: Logging - Enters input logging function for model {model}"
@ -62,12 +77,16 @@ class LiteDebugger:
                    "end_user": end_user,
                    "litellm_call_id": litellm_call_id,
                    "status": "success",
-                    "user_email": self.user_email
+                    "user_email": self.user_email,
                }
                print_verbose(
                    f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
                )
-                response = requests.post(url=self.api_url, headers={"content-type": "application/json"}, data=json.dumps(litellm_data_obj))
+                response = requests.post(
                    url=self.api_url,
                    headers={"content-type": "application/json"},
                    data=json.dumps(litellm_data_obj),
                )
            elif "error" in response_obj:
                if "Unable to map your input to a model." in response_obj["error"]:
                    total_cost = 0
@ -80,13 +99,19 @@ class LiteDebugger:
                    "end_user": end_user,
                    "litellm_call_id": litellm_call_id,
                    "status": "failure",
-                    "user_email": self.user_email
+                    "user_email": self.user_email,
                }
                print_verbose(
                    f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
                )
-                response = requests.post(url=self.api_url, headers={"content-type": "application/json"}, data=json.dumps(litellm_data_obj))
+                response = requests.post(
                    url=self.api_url,
                    headers={"content-type": "application/json"},
                    data=json.dumps(litellm_data_obj),
                )
                print_verbose(f"LiteDebugger: api response - {response.text}")
        except:
-            print_verbose(f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}")
+            print_verbose(
                f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
            )
            pass
--- a/litellm/integrations/supabase.py
+++ b/litellm/integrations/supabase.py
@ -144,7 +144,9 @@ class Supabase:
            )
        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
-    def input_log_event(self, model, messages, end_user, litellm_call_id, print_verbose):
+    def input_log_event(
        self, model, messages, end_user, litellm_call_id, print_verbose
    ):
        try:
            print_verbose(
                f"Supabase Logging - Enters input logging function for model {model}"
@ -154,7 +156,7 @@ class Supabase:
                "messages": messages,
                "end_user": end_user,
                "status": "initiated",
-                "litellm_call_id": litellm_call_id
+                "litellm_call_id": litellm_call_id,
            }
            data, count = (
                self.supabase_client.table(self.supabase_table_name)
@ -200,7 +202,7 @@ class Supabase:
                    "response": response_obj["choices"][0]["message"]["content"],
                    "end_user": end_user,
                    "litellm_call_id": litellm_call_id,
-                    "status": "success"
+                    "status": "success",
                }
                print_verbose(
                    f"Supabase Logging - final data object: {supabase_data_obj}"
@ -221,7 +223,7 @@ class Supabase:
                    "error": response_obj["error"],
                    "end_user": end_user,
                    "litellm_call_id": litellm_call_id,
-                    "status": "failure"
+                    "status": "failure",
                }
                print_verbose(
                    f"Supabase Logging - final data object: {supabase_data_obj}"
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@ -21,7 +21,9 @@ class AnthropicError(Exception):
 class AnthropicLLM:
-    def __init__(self, encoding, default_max_tokens_to_sample, logging_obj, api_key=None):
+    def __init__(
        self, encoding, default_max_tokens_to_sample, logging_obj, api_key=None
    ):
        self.encoding = encoding
        self.default_max_tokens_to_sample = default_max_tokens_to_sample
        self.completion_url = "https://api.anthropic.com/v1/complete"
@ -84,7 +86,11 @@ class AnthropicLLM:
        }
        ## LOGGING
-        self.logging_obj.pre_call(input=prompt, api_key=self.api_key, additional_args={"complete_input_dict": data})
+        self.logging_obj.pre_call(
            input=prompt,
            api_key=self.api_key,
            additional_args={"complete_input_dict": data},
        )
        ## COMPLETION CALL
        response = requests.post(
            self.completion_url, headers=self.headers, data=json.dumps(data)
@ -93,7 +99,12 @@ class AnthropicLLM:
            return response.iter_lines()
        else:
            ## LOGGING
-            self.logging_obj.post_call(input=prompt, api_key=self.api_key, original_response=response.text, additional_args={"complete_input_dict": data})
+            self.logging_obj.post_call(
                input=prompt,
                api_key=self.api_key,
                original_response=response.text,
                additional_args={"complete_input_dict": data},
            )
            print_verbose(f"raw model_response: {response.text}")
            ## RESPONSE OBJECT
            completion_response = response.json()
--- a/litellm/llms/huggingface_restapi.py
+++ b/litellm/llms/huggingface_restapi.py
@ -72,12 +72,13 @@ class HuggingfaceRestAPILLM:
        if "max_tokens" in optional_params:
            value = optional_params.pop("max_tokens")
            optional_params["max_new_tokens"] = value
-        data = {
+        data = {"inputs": prompt, "parameters": optional_params}
            "inputs": prompt,
            "parameters": optional_params
        }
        ## LOGGING
-        self.logging_obj.pre_call(input=prompt, api_key=self.api_key, additional_args={"complete_input_dict": data})
+        self.logging_obj.pre_call(
            input=prompt,
            api_key=self.api_key,
            additional_args={"complete_input_dict": data},
        )
        ## COMPLETION CALL
        response = requests.post(
            completion_url, headers=self.headers, data=json.dumps(data)
@ -86,7 +87,12 @@ class HuggingfaceRestAPILLM:
            return response.iter_lines()
        else:
            ## LOGGING
-            self.logging_obj.post_call(input=prompt, api_key=self.api_key, original_response=response.text, additional_args={"complete_input_dict": data})
+            self.logging_obj.post_call(
                input=prompt,
                api_key=self.api_key,
                original_response=response.text,
                additional_args={"complete_input_dict": data},
            )
            ## RESPONSE OBJECT
            completion_response = response.json()
            print_verbose(f"response: {completion_response}")
--- a/litellm/main.py
+++ b/litellm/main.py
@ -10,7 +10,7 @@ from litellm import (  # type: ignore
    timeout,
    get_optional_params,
    get_litellm_params,
-    Logging
+    Logging,
 )
 from litellm.utils import (
    get_secret,
@ -96,10 +96,14 @@ def completion(
        model_response = ModelResponse()
        if azure:  # this flag is deprecated, remove once notebooks are also updated.
            custom_llm_provider = "azure"
-        elif model.split("/", 1)[0] in litellm.provider_list: # allow custom provider to be passed in via the model name "azure/chatgpt-test"
+        elif (
            model.split("/", 1)[0] in litellm.provider_list
        ):  # allow custom provider to be passed in via the model name "azure/chatgpt-test"
            custom_llm_provider = model.split("/", 1)[0]
            model = model.split("/", 1)[1]
-            if "replicate" == custom_llm_provider and "/" not in model: # handle the "replicate/llama2..." edge-case
+            if (
                "replicate" == custom_llm_provider and "/" not in model
            ):  # handle the "replicate/llama2..." edge-case
                model = custom_llm_provider + "/" + model
        # check if user passed in any of the OpenAI optional params
        optional_params = get_optional_params(
@ -130,9 +134,14 @@ def completion(
            verbose=verbose,
            custom_llm_provider=custom_llm_provider,
            custom_api_base=custom_api_base,
-            litellm_call_id=litellm_call_id
+            litellm_call_id=litellm_call_id,
        )
        logging = Logging(
            model=model,
            messages=messages,
            optional_params=optional_params,
            litellm_params=litellm_params,
        )
        logging = Logging(model=model, messages=messages, optional_params=optional_params, litellm_params=litellm_params)
        if custom_llm_provider == "azure":
            # azure configs
            openai.api_type = "azure"
@ -153,7 +162,15 @@ def completion(
            # set key
            openai.api_key = api_key
            ## LOGGING
-            logging.pre_call(input=messages, api_key=openai.api_key, additional_args={"litellm.headers": litellm.headers, "api_version": openai.api_version, "api_base": openai.api_base})
+            logging.pre_call(
                input=messages,
                api_key=openai.api_key,
                additional_args={
                    "headers": litellm.headers,
                    "api_version": openai.api_version,
                    "api_base": openai.api_base,
                },
            )
            ## COMPLETION CALL
            if litellm.headers:
                response = openai.ChatCompletion.create(
@ -168,7 +185,16 @@ def completion(
                )
            ## LOGGING
-            logging.post_call(input=messages, api_key=openai.api_key, original_response=response, additional_args={"headers": litellm.headers, "api_version": openai.api_version, "api_base": openai.api_base})
+            logging.post_call(
                input=messages,
                api_key=openai.api_key,
                original_response=response,
                additional_args={
                    "headers": litellm.headers,
                    "api_version": openai.api_version,
                    "api_base": openai.api_base,
                },
            )
        elif (
            model in litellm.open_ai_chat_completion_models
            or custom_llm_provider == "custom_openai"
@ -193,7 +219,11 @@ def completion(
            openai.api_key = api_key
            ## LOGGING
-            logging.pre_call(input=messages, api_key=api_key, additional_args={"headers": litellm.headers, "api_base": api_base})
+            logging.pre_call(
                input=messages,
                api_key=api_key,
                additional_args={"headers": litellm.headers, "api_base": api_base},
            )
            ## COMPLETION CALL
            if litellm.headers:
                response = openai.ChatCompletion.create(
@ -207,7 +237,12 @@ def completion(
                    model=model, messages=messages, **optional_params
                )
            ## LOGGING
-            logging.post_call(input=messages, api_key=api_key, original_response=response, additional_args={"headers": litellm.headers})
+            logging.post_call(
                input=messages,
                api_key=api_key,
                original_response=response,
                additional_args={"headers": litellm.headers},
            )
        elif model in litellm.open_ai_text_completion_models:
            openai.api_type = "openai"
            openai.api_base = (
@ -228,7 +263,16 @@ def completion(
                openai.organization = litellm.organization
            prompt = " ".join([message["content"] for message in messages])
            ## LOGGING
-            logging.pre_call(input=prompt, api_key=api_key, additional_args={"openai_organization": litellm.organization, "headers": litellm.headers, "api_base": openai.api_base, "api_type": openai.api_type})
+            logging.pre_call(
                input=prompt,
                api_key=api_key,
                additional_args={
                    "openai_organization": litellm.organization,
                    "headers": litellm.headers,
                    "api_base": openai.api_base,
                    "api_type": openai.api_type,
                },
            )
            ## COMPLETION CALL
            if litellm.headers:
                response = openai.Completion.create(
@ -239,7 +283,17 @@ def completion(
            else:
                response = openai.Completion.create(model=model, prompt=prompt)
            ## LOGGING
-            logging.post_call(input=prompt, api_key=api_key, original_response=response, additional_args={"openai_organization": litellm.organization, "headers": litellm.headers, "api_base": openai.api_base, "api_type": openai.api_type})
+            logging.post_call(
                input=prompt,
                api_key=api_key,
                original_response=response,
                additional_args={
                    "openai_organization": litellm.organization,
                    "headers": litellm.headers,
                    "api_base": openai.api_base,
                    "api_type": openai.api_type,
                },
            )
            ## RESPONSE OBJECT
            completion_response = response["choices"][0]["text"]
            model_response["choices"][0]["message"]["content"] = completion_response
@ -270,7 +324,14 @@ def completion(
                input["max_length"] = max_tokens  # for t5 models
                input["max_new_tokens"] = max_tokens  # for llama2 models
            ## LOGGING
-            logging.pre_call(input=prompt, api_key=replicate_key, additional_args={"complete_input_dict": input, "max_tokens": max_tokens})
+            logging.pre_call(
                input=prompt,
                api_key=replicate_key,
                additional_args={
                    "complete_input_dict": input,
                    "max_tokens": max_tokens,
                },
            )
            ## COMPLETION CALL
            output = replicate.run(model, input=input)
            if "stream" in optional_params and optional_params["stream"] == True:
@ -283,7 +344,15 @@ def completion(
                response += item
            completion_response = response
            ## LOGGING
-            logging.post_call(input=prompt, api_key=replicate_key, original_response=completion_response, additional_args={"complete_input_dict": input, "max_tokens": max_tokens})
+            logging.post_call(
                input=prompt,
                api_key=replicate_key,
                original_response=completion_response,
                additional_args={
                    "complete_input_dict": input,
                    "max_tokens": max_tokens,
                },
            )
            ## USAGE
            prompt_tokens = len(encoding.encode(prompt))
            completion_tokens = len(encoding.encode(completion_response))
@ -305,7 +374,7 @@ def completion(
                encoding=encoding,
                default_max_tokens_to_sample=litellm.max_tokens,
                api_key=anthropic_key,
-                logging_obj = logging # model call logging done inside the class as we make need to modify I/O to fit anthropic's requirements
+                logging_obj=logging,  # model call logging done inside the class as we make need to modify I/O to fit anthropic's requirements
            )
            model_response = anthropic_client.completion(
                model=model,
@ -369,7 +438,9 @@ def completion(
                    **optional_params,
                )
            ## LOGGING
-            logging.post_call(input=messages, api_key=openai.api_key, original_response=response)
+            logging.post_call(
                input=messages, api_key=openai.api_key, original_response=response
            )
        elif model in litellm.cohere_models:
            # import cohere/if it fails then pip install cohere
            install_and_import("cohere")
@ -392,7 +463,9 @@ def completion(
                response = CustomStreamWrapper(response, model)
                return response
            ## LOGGING
-            logging.post_call(input=prompt, api_key=cohere_key, original_response=response)
+            logging.post_call(
                input=prompt, api_key=cohere_key, original_response=response
            )
            ## USAGE
            completion_response = response[0].text
            prompt_tokens = len(encoding.encode(prompt))
@ -475,7 +548,9 @@ def completion(
                headers=headers,
            )
            ## LOGGING
-            logging.post_call(input=prompt, api_key=TOGETHER_AI_TOKEN, original_response=res.text)
+            logging.post_call(
                input=prompt, api_key=TOGETHER_AI_TOKEN, original_response=res.text
            )
            # make this safe for reading, if output does not exist raise an error
            json_response = res.json()
            if "output" not in json_response:
@ -516,7 +591,9 @@ def completion(
            completion_response = chat.send_message(prompt, **optional_params)
            ## LOGGING
-            logging.post_call(input=prompt, api_key=None, original_response=completion_response)
+            logging.post_call(
                input=prompt, api_key=None, original_response=completion_response
            )
            ## RESPONSE OBJECT
            model_response["choices"][0]["message"]["content"] = completion_response
@ -541,7 +618,9 @@ def completion(
            completion_response = vertex_model.predict(prompt, **optional_params)
            ## LOGGING
-            logging.post_call(input=prompt, api_key=None, original_response=completion_response)
+            logging.post_call(
                input=prompt, api_key=None, original_response=completion_response
            )
            ## RESPONSE OBJECT
            model_response["choices"][0]["message"]["content"] = completion_response
            model_response["created"] = time.time()
@ -564,7 +643,11 @@ def completion(
            completion_response = ai21_response["completions"][0]["data"]["text"]
            ## LOGGING
-            logging.post_call(input=prompt, api_key=ai21.api_key, original_response=completion_response)
+            logging.post_call(
                input=prompt,
                api_key=ai21.api_key,
                original_response=completion_response,
            )
            ## RESPONSE OBJECT
            model_response["choices"][0]["message"]["content"] = completion_response
@ -578,7 +661,9 @@ def completion(
            prompt = " ".join([message["content"] for message in messages])
            ## LOGGING
-            logging.pre_call(input=prompt, api_key=None, additional_args={"endpoint": endpoint})
+            logging.pre_call(
                input=prompt, api_key=None, additional_args={"endpoint": endpoint}
            )
            generator = get_ollama_response_stream(endpoint, model, prompt)
            # assume all responses are streamed
@ -605,7 +690,11 @@ def completion(
                    completion_response = completion_response["generated_text"]
            ## LOGGING
-            logging.post_call(input=prompt, api_key=base_ten_key, original_response=completion_response)
+            logging.post_call(
                input=prompt,
                api_key=base_ten_key,
                original_response=completion_response,
            )
            ## RESPONSE OBJECT
            model_response["choices"][0]["message"]["content"] = completion_response
@ -622,13 +711,22 @@ def completion(
            prompt = " ".join([message["content"] for message in messages])
            ## LOGGING
-            logging.pre_call(input=prompt, api_key=None, additional_args={"url": url, "max_new_tokens": 100})
+            logging.pre_call(
                input=prompt,
                api_key=None,
                additional_args={"url": url, "max_new_tokens": 100},
            )
            response = requests.post(
                url, data={"inputs": prompt, "max_new_tokens": 100, "model": model}
            )
            ## LOGGING
-            logging.post_call(input=prompt, api_key=None, original_response=response.text, additional_args={"url": url, "max_new_tokens": 100})
+            logging.post_call(
                input=prompt,
                api_key=None,
                original_response=response.text,
                additional_args={"url": url, "max_new_tokens": 100},
            )
            completion_response = response.json()["outputs"]
@ -676,10 +774,22 @@ def batch_completion(*args, **kwargs):
@timeout(  # type: ignore
    60
 )  ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
-def embedding(model, input=[], azure=False, force_timeout=60, litellm_call_id=None, logger_fn=None):
+def embedding(
    model, input=[], azure=False, force_timeout=60, litellm_call_id=None, logger_fn=None
 ):
    try:
        response = None
-        logging = Logging(model=model, messages=input, optional_params={}, litellm_params={"azure": azure, "force_timeout": force_timeout, "logger_fn": logger_fn, "litellm_call_id": litellm_call_id})
+        logging = Logging(
            model=model,
            messages=input,
            optional_params={},
            litellm_params={
                "azure": azure,
                "force_timeout": force_timeout,
                "logger_fn": logger_fn,
                "litellm_call_id": litellm_call_id,
            },
        )
        if azure == True:
            # azure configs
            openai.api_type = "azure"
@ -687,7 +797,15 @@ def embedding(model, input=[], azure=False, force_timeout=60, litellm_call_id=No
            openai.api_version = get_secret("AZURE_API_VERSION")
            openai.api_key = get_secret("AZURE_API_KEY")
            ## LOGGING
-            logging.pre_call(input=input, api_key=openai.api_key, additional_args={"api_type": openai.api_type, "api_base": openai.api_base, "api_version": openai.api_version})
+            logging.pre_call(
                input=input,
                api_key=openai.api_key,
                additional_args={
                    "api_type": openai.api_type,
                    "api_base": openai.api_base,
                    "api_version": openai.api_version,
                },
            )
            ## EMBEDDING CALL
            response = openai.Embedding.create(input=input, engine=model)
            print_verbose(f"response_value: {str(response)[:50]}")
@ -697,7 +815,15 @@ def embedding(model, input=[], azure=False, force_timeout=60, litellm_call_id=No
            openai.api_version = None
            openai.api_key = get_secret("OPENAI_API_KEY")
            ## LOGGING
-            logging.pre_call(input=input, api_key=openai.api_key, additional_args={"api_type": openai.api_type, "api_base": openai.api_base, "api_version": openai.api_version})
+            logging.pre_call(
                input=input,
                api_key=openai.api_key,
                additional_args={
                    "api_type": openai.api_type,
                    "api_base": openai.api_base,
                    "api_version": openai.api_version,
                },
            )
            ## EMBEDDING CALL
            response = openai.Embedding.create(input=input, model=model)
            print_verbose(f"response_value: {str(response)[:50]}")
@ -710,7 +836,11 @@ def embedding(model, input=[], azure=False, force_timeout=60, litellm_call_id=No
        ## LOGGING
        logging.post_call(input=input, api_key=openai.api_key, original_response=e)
        ## Map to OpenAI Exception
-        raise exception_type(model=model, original_exception=e, custom_llm_provider="azure" if azure==True else None)
+        raise exception_type(
            model=model,
            original_exception=e,
            custom_llm_provider="azure" if azure == True else None,
        )
 ####### HELPER FUNCTIONS ################
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -34,7 +34,6 @@ def test_caching():
        pytest.fail(f"Error occurred: {e}")
 def test_caching_with_models():
    litellm.caching_with_models = True
    response2 = completion(model="gpt-3.5-turbo", messages=messages)
@ -47,6 +46,3 @@ def test_caching_with_models():
        print(f"response2: {response2}")
        print(f"response3: {response3}")
        pytest.fail(f"Error occurred: {e}")
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -28,15 +28,19 @@ def logger_fn(user_model_dict):
 def test_completion_custom_provider_model_name():
    try:
        response = completion(
-            model="together_ai/togethercomputer/llama-2-70b-chat", messages=messages, logger_fn=logger_fn
+            model="together_ai/togethercomputer/llama-2-70b-chat",
            messages=messages,
            logger_fn=logger_fn,
        )
        # Add any assertions here to check the response
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 test_completion_custom_provider_model_name()
 def test_completion_claude():
    try:
        response = completion(
@ -89,7 +93,10 @@ def test_completion_claude_stream():
 def test_completion_cohere():
    try:
        response = completion(
-            model="command-nightly", messages=messages, max_tokens=100, logit_bias={40: 10}
+            model="command-nightly",
            messages=messages,
            max_tokens=100,
            logit_bias={40: 10},
        )
        # Add any assertions here to check the response
        print(response)
@ -103,6 +110,7 @@ def test_completion_cohere():
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 def test_completion_cohere_stream():
    try:
        messages = [
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -139,6 +139,7 @@ def install_and_import(package: str):
 # Logging function -> log the exact model details + what's being sent | Non-Blocking
 class Logging:
    global supabaseClient, liteDebuggerClient
    def __init__(self, model, messages, optional_params, litellm_params):
        self.model = model
        self.messages = messages
@ -201,7 +202,9 @@ class Logging:
                            print_verbose=print_verbose,
                        )
                except Exception as e:
-                    print_verbose(f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}")
+                    print_verbose(
                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}"
                    )
                    print_verbose(
                        f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
                    )
@ -244,6 +247,7 @@ class Logging:
            )
            pass
 def exception_logging(
    additional_args={},
    logger_fn=None,
@ -278,6 +282,7 @@ def exception_logging(
 # make it easy to log if completion/embedding runs succeeded or failed + see what happened | Non-Blocking
 def client(original_function):
    global liteDebuggerClient
    def function_setup(
        *args, **kwargs
    ):  # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
@ -288,10 +293,16 @@ def client(original_function):
                litellm.success_callback.append("lite_debugger")
                litellm.failure_callback.append("lite_debugger")
            if (
-                len(litellm.input_callback) > 0 or len(litellm.success_callback) > 0 or len(litellm.failure_callback) > 0
+                len(litellm.input_callback) > 0
                or len(litellm.success_callback) > 0
                or len(litellm.failure_callback) > 0
            ) and len(callback_list) == 0:
                callback_list = list(
-                    set(litellm.input_callback + litellm.success_callback + litellm.failure_callback)
+                    set(
                        litellm.input_callback
                        + litellm.success_callback
                        + litellm.failure_callback
                    )
                )
                set_callbacks(
                    callback_list=callback_list,
@ -413,7 +424,9 @@ def client(original_function):
            )  # don't interrupt execution of main thread
            my_thread.start()
            if hasattr(e, "message"):
-                if liteDebuggerClient and liteDebuggerClient.dashboard_url != None: # make it easy to get to the debugger logs if you've initialized it
+                if (
                    liteDebuggerClient and liteDebuggerClient.dashboard_url != None
                ):  # make it easy to get to the debugger logs if you've initialized it
                    e.message += f"\n Check the log in your dashboard - {liteDebuggerClient.dashboard_url}"
            raise e
@ -497,7 +510,7 @@ def get_litellm_params(
        "verbose": verbose,
        "custom_llm_provider": custom_llm_provider,
        "custom_api_base": custom_api_base,
-        "litellm_call_id": litellm_call_id
+        "litellm_call_id": litellm_call_id,
    }
    return litellm_params
@ -1053,7 +1066,10 @@ def prompt_token_calculator(model, messages):
 def valid_model(model):
    try:
        # for a given model name, check if the user has the right permissions to access the model
-        if model in litellm.open_ai_chat_completion_models or model in litellm.open_ai_text_completion_models:
+        if (
            model in litellm.open_ai_chat_completion_models
            or model in litellm.open_ai_text_completion_models
        ):
            openai.Model.retrieve(model)
        else:
            messages = [{"role": "user", "content": "Hello World"}]
@ -1061,6 +1077,7 @@ def valid_model(model):
    except:
        raise InvalidRequestError(message="", model=model, llm_provider="")
 # integration helper function
 def modify_integration(integration_name, integration_params):
    global supabaseClient