fixes to litedebugger

2023-09-01 13:22:16 -07:00 · 2023-09-01 13:22:16 -07:00 · 0ff4515f20
commit 0ff4515f20
parent f30cb56b8d
8 changed files with 323 additions and 169 deletions
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/integrations/litedebugger.py
+++ b/litellm/integrations/litedebugger.py
@ -6,7 +6,8 @@ class LiteDebugger:
    dashboard_url = None

    def __init__(self, email=None):
-        self.api_url = "https://api.litellm.ai/debugger"
+        # self.api_url = "https://api.litellm.ai/debugger"
+        self.api_url = "http://0.0.0.0:4000/debugger"
        self.validate_environment(email)
        pass

@ -40,6 +41,7 @@ class LiteDebugger:
        litellm_params,
        optional_params,
    ):
+        print_verbose(f"LiteDebugger: Pre-API Call Logging")
        try:
            print_verbose(
                f"LiteLLMDebugger: Logging - Enters input logging function for model {model}"
@ -100,6 +102,7 @@ class LiteDebugger:
            pass

    def post_call_log_event(self, original_response, litellm_call_id, print_verbose, call_type, stream):
+        print_verbose(f"LiteDebugger: Post-API Call Logging")
        try:
            if call_type == "embedding":
                litellm_data_obj = {
@ -122,6 +125,7 @@ class LiteDebugger:
                    "litellm_call_id": litellm_call_id,
                    "user_email": self.user_email,
                }
+            print_verbose(f"litedebugger post-call data object - {litellm_data_obj}")
            response = requests.post(
                url=self.api_url,
                headers={"content-type": "application/json"},
@ -144,9 +148,10 @@ class LiteDebugger:
        call_type, 
        stream = False
    ):
+        print_verbose(f"LiteDebugger: Success/Failure Call Logging")
        try:
            print_verbose(
-                f"LiteLLMDebugger: Logging - Enters handler logging function for function {call_type} and stream set to {stream} with response object {response_obj}"
+                f"LiteLLMDebugger: Success/Failure Logging - Enters handler logging function for function {call_type} and stream set to {stream} with response object {response_obj}"
            )
            total_cost = 0  # [TODO] implement cost tracking
            response_time = (end_time - start_time).total_seconds()
--- a/litellm/main.py
+++ b/litellm/main.py
@ -94,6 +94,7 @@ def completion(
    custom_api_base=None,
    litellm_call_id=None,
    litellm_logging_obj=None,
+    use_client=False,
    id=None, # this is an optional param to tag individual completion calls 
    # model specific optional params
    # used by text-bison only
@ -245,6 +246,7 @@ def completion(
                additional_args={"headers": litellm.headers, "api_base": api_base},
            )
            ## COMPLETION CALL
+            try:
                if litellm.headers:
                    response = openai.ChatCompletion.create(
                        model=model,
@ -256,6 +258,16 @@ def completion(
                    response = openai.ChatCompletion.create(
                        model=model, messages=messages, **optional_params
                    )
+            except Exception as e:
+                ## LOGGING - log the original exception returned
+                logging.post_call(
+                    input=messages,
+                    api_key=api_key,
+                    original_response=str(e),
+                    additional_args={"headers": litellm.headers},
+                )
+                raise e
+            
            if "stream" in optional_params and optional_params["stream"] == True:
                response = CustomStreamWrapper(response, model, logging_obj=logging)
                return response
@ -817,6 +829,12 @@ def embedding(
        logging.post_call(input=input, api_key=openai.api_key, original_response=response)
        return response
    except Exception as e:
+        ## LOGGING
+        logging.post_call(
+            input=input,
+            api_key=openai.api_key,
+            original_response=str(e),
+        )
        ## Map to OpenAI Exception
        raise exception_type(
            model=model,
--- a/litellm/tests/test_litedebugger_integration.py
+++ b/litellm/tests/test_litedebugger_integration.py
@ -1,36 +1,106 @@
 #### What this tests ####
 #    This tests if logging to the litedebugger integration actually works
-# pytest mistakes intentional bad calls as failed tests -> [TODO] fix this
-import sys, os
-import traceback
+
+# Test Scenarios (test across normal completion, streaming)
+## 1: Pre-API-Call
+## 2: Post-API-Call
+## 3: On LiteLLM Call success
+## 4: On LiteLLM Call failure
+
+
+import sys, os, io
+import traceback, logging
 import pytest
+import dotenv
+dotenv.load_dotenv()

-sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+# Create logger
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+# Create a stream handler
+stream_handler = logging.StreamHandler(sys.stdout)
+logger.addHandler(stream_handler)
+
+# Create a function to log information
+def logger_fn(message):
+    logger.info(message)
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 import litellm
-from litellm import embedding, completion
-
+from litellm import completion_with_split_tests
+from openai.error import AuthenticationError
 litellm.set_verbose = True

+score = 0
+split_per_model = {
+	"gpt-4": 0.7, 
+	"claude-instant-1.2": 0.3
+}

-# Test 1: On completion call - without setting client to true -> ensure no logs are created
-response = completion(model="claude-instant-1", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
-# print(f"response: {response}")
-
-
-litellm.use_client = True

 user_message = "Hello, how are you?"
 messages = [{"content": user_message, "role": "user"}]

+# Test 1: On completion call - without setting client to true -> ensure litedebugger is not initialized
+try:
+    # Redirect stdout
+    old_stdout = sys.stdout
+    sys.stdout = new_stdout = io.StringIO()

-# Test 2: On completion call
-response = completion(model="claude-instant-1", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
-# print(f"response: {response}")
+    response = completion_with_split_tests(models=split_per_model, messages=messages)

-# Test 3: On embedding call
-response = embedding(model="text-embedding-ada-002", input=["sample text"])
-# print(f"response: {response}")
+    # Restore stdout
+    sys.stdout = old_stdout
+    output = new_stdout.getvalue().strip()

-# Test 4: On streaming completion call
-response = completion(model="replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}], stream=True)
-print(f"response: {response}")
+    if "LiteLLMDebugger" in output:
+        raise Exception("LiteLLM Debugger should not be called!")
+    score += 1
+except Exception as e:
+    pytest.fail(f"Error occurred: {e}")
+
+
+# Test 2: On normal completion call - setting client to true
+try:
+    # Redirect stdout
+    old_stdout = sys.stdout
+    sys.stdout = new_stdout = io.StringIO()
+
+    response = completion_with_split_tests(models=split_per_model, messages=messages, use_client=True, id="6d383c99-488d-481d-aa1b-1f94935cec44")
+
+    # Restore stdout
+    sys.stdout = old_stdout
+    output = new_stdout.getvalue().strip()
+
+    if "LiteDebugger: Pre-API Call Logging" not in output:
+        raise Exception("LiteLLMDebugger: pre-api call not logged!")
+    if "LiteDebugger: Post-API Call Logging" not in output:
+        raise Exception("LiteLLMDebugger: post-api call not logged!")
+    if "LiteDebugger: Success/Failure Call Logging" not in output:
+        raise Exception("LiteLLMDebugger: success/failure call not logged!")
+except:
+    pass
+
+# Test 3: On streaming completion call - setting client to true
+try:
+    # Redirect stdout
+    old_stdout = sys.stdout
+    sys.stdout = new_stdout = io.StringIO()
+
+    response = completion_with_split_tests(models=split_per_model, messages=messages, stream=True, use_client=True, id="6d383c99-488d-481d-aa1b-1f94935cec44")
+
+    # Restore stdout
+    sys.stdout = old_stdout
+    output = new_stdout.getvalue().strip()
+
+    if "LiteDebugger: Pre-API Call Logging" not in output:
+        raise Exception("LiteLLMDebugger: pre-api call not logged!")
+    if "LiteDebugger: Post-API Call Logging" not in output:
+        raise Exception("LiteLLMDebugger: post-api call not logged!")
+    if "LiteDebugger: Success/Failure Call Logging" not in output:
+        raise Exception("LiteLLMDebugger: success/failure call not logged!")
+except:
+    pass
--- a/litellm/tests/test_logging.py
+++ b/litellm/tests/test_logging.py
@ -157,27 +157,141 @@ except Exception as e:
 ## 2. On LiteLLM Call failure
 ## TEST BAD KEY

+# normal completion 
+## test on openai completion call
+try:
    temporary_oai_key = os.environ["OPENAI_API_KEY"]
    os.environ["OPENAI_API_KEY"] = "bad-key"

    temporary_anthropic_key = os.environ["ANTHROPIC_API_KEY"]
    os.environ["ANTHROPIC_API_KEY"] = "bad-key"

-# normal completion 
-## test on openai completion call
-try:
+
    # Redirect stdout
    old_stdout = sys.stdout
    sys.stdout = new_stdout = io.StringIO()
    
+    try:
        response = completion(model="gpt-3.5-turbo", messages=messages)
+    except AuthenticationError:
+        print(f"raised auth error")
+        pass
+    # Restore stdout
+    sys.stdout = old_stdout
+    output = new_stdout.getvalue().strip()
+
+    print(output)
+
+    if "Logging Details Pre-API Call" not in output:
+        raise Exception("Required log message not found!")
+    elif "Logging Details Post-API Call" not in output: 
+        raise Exception("Required log message not found!")
+    elif "Logging Details LiteLLM-Failure Call" not in output:
+        raise Exception("Required log message not found!")
+
+    os.environ["OPENAI_API_KEY"] = temporary_oai_key
+    os.environ["ANTHROPIC_API_KEY"] = temporary_anthropic_key
+    
+    score += 1
+except Exception as e:
+    print(f"exception type: {type(e).__name__}")
+    pytest.fail(f"Error occurred: {e}")
+    pass
+
+## test on non-openai completion call
+try:
+    temporary_oai_key = os.environ["OPENAI_API_KEY"]
+    os.environ["OPENAI_API_KEY"] = "bad-key"
+
+    temporary_anthropic_key = os.environ["ANTHROPIC_API_KEY"]
+    os.environ["ANTHROPIC_API_KEY"] = "bad-key"
+    # Redirect stdout
+    old_stdout = sys.stdout
+    sys.stdout = new_stdout = io.StringIO()
+
+    try:
+        response = completion(model="claude-instant-1", messages=messages)
+    except AuthenticationError:
+        pass
+    
+    # Restore stdout
+    sys.stdout = old_stdout
+    output = new_stdout.getvalue().strip()
+
+    print(output)
+    
+    if "Logging Details Pre-API Call" not in output:
+        raise Exception("Required log message not found!")
+    elif "Logging Details Post-API Call" not in output:
+        raise Exception("Required log message not found!")
+    elif "Logging Details LiteLLM-Failure Call" not in output:
+        raise Exception("Required log message not found!")
+    os.environ["OPENAI_API_KEY"] = temporary_oai_key
+    os.environ["ANTHROPIC_API_KEY"] = temporary_anthropic_key
+    score += 1
+except Exception as e:
+    print(f"exception type: {type(e).__name__}")
+    pytest.fail(f"Error occurred: {e}")
+
+
+# streaming completion
+## test on openai completion call
+try:
+    temporary_oai_key = os.environ["OPENAI_API_KEY"]
+    os.environ["OPENAI_API_KEY"] = "bad-key"
+
+    temporary_anthropic_key = os.environ["ANTHROPIC_API_KEY"]
+    os.environ["ANTHROPIC_API_KEY"] = "bad-key"
+    # Redirect stdout
+    old_stdout = sys.stdout
+    sys.stdout = new_stdout = io.StringIO()
+
+    try:
+        response = completion(model="gpt-3.5-turbo", messages=messages)
+    except AuthenticationError:
+        pass
+
+    # Restore stdout
+    sys.stdout = old_stdout
+    output = new_stdout.getvalue().strip()
+
+    print(output)
+
+    if "Logging Details Pre-API Call" not in output:
+        raise Exception("Required log message not found!")
+    elif "Logging Details Post-API Call" not in output:
+        raise Exception("Required log message not found!")
+    elif "Logging Details LiteLLM-Failure Call" not in output:
+        raise Exception("Required log message not found!")
+    
+    os.environ["OPENAI_API_KEY"] = temporary_oai_key
+    os.environ["ANTHROPIC_API_KEY"] = temporary_anthropic_key
+    score += 1
+except Exception as e:
+    print(f"exception type: {type(e).__name__}")
+    pytest.fail(f"Error occurred: {e}")
+
+## test on non-openai completion call
+try:
+    temporary_oai_key = os.environ["OPENAI_API_KEY"]
+    os.environ["OPENAI_API_KEY"] = "bad-key"
+
+    temporary_anthropic_key = os.environ["ANTHROPIC_API_KEY"]
+    os.environ["ANTHROPIC_API_KEY"] = "bad-key"
+    # Redirect stdout
+    old_stdout = sys.stdout
+    sys.stdout = new_stdout = io.StringIO()
+
+    try:
+        response = completion(model="claude-instant-1", messages=messages)
+    except AuthenticationError:
+        pass
    
    # Restore stdout
    sys.stdout = old_stdout
    output = new_stdout.getvalue().strip()

    print(output)
-    print(response)

    if "Logging Details Pre-API Call" not in output:
        raise Exception("Required log message not found!")
@ -188,92 +302,31 @@ try:
    score += 1
 except Exception as e:
    print(f"exception type: {type(e).__name__}")
-    if not isinstance(e, AuthenticationError):
-        pytest.fail(f"Error occurred: {e}")
-
-## test on non-openai completion call
-try:
-    # Redirect stdout
-    old_stdout = sys.stdout
-    sys.stdout = new_stdout = io.StringIO()
-
-    response = completion(model="claude-instant-1", messages=messages)
-    
-    # Restore stdout
-    sys.stdout = old_stdout
-    output = new_stdout.getvalue().strip()
-
-    if "Logging Details Pre-API Call" not in output:
-        raise Exception("Required log message not found!")
-    elif "Logging Details Post-API Call" not in output:
-        raise Exception("Required log message not found!")
-    elif "Logging Details LiteLLM-Failure Call" not in output:
-        raise Exception("Required log message not found!")
-    score += 1
-except Exception as e:
-    if not isinstance(e, AuthenticationError):
-        pytest.fail(f"Error occurred: {e}")
-
-# streaming completion
-## test on openai completion call
-try:
-    # Redirect stdout
-    old_stdout = sys.stdout
-    sys.stdout = new_stdout = io.StringIO()
-
-    response = completion(model="gpt-3.5-turbo", messages=messages)
-
-    # Restore stdout
-    sys.stdout = old_stdout
-    output = new_stdout.getvalue().strip()
-
-    if "Logging Details Pre-API Call" not in output:
-        raise Exception("Required log message not found!")
-    elif "Logging Details Post-API Call" not in output:
-        raise Exception("Required log message not found!")
-    elif "Logging Details LiteLLM-Failure Call" not in output:
-        raise Exception("Required log message not found!")
-    score += 1
-except Exception as e:
-    if not isinstance(e, AuthenticationError):
-        pytest.fail(f"Error occurred: {e}")
-
-## test on non-openai completion call
-try:
-    # Redirect stdout
-    old_stdout = sys.stdout
-    sys.stdout = new_stdout = io.StringIO()
-
-    response = completion(model="claude-instant-1", messages=messages)
-    
-    # Restore stdout
-    sys.stdout = old_stdout
-    output = new_stdout.getvalue().strip()
-
-    if "Logging Details Pre-API Call" not in output:
-        raise Exception("Required log message not found!")
-    elif "Logging Details Post-API Call" not in output:
-        raise Exception("Required log message not found!")
-    elif "Logging Details LiteLLM-Failure Call" not in output:
-        raise Exception("Required log message not found!")
-    score += 1
-except Exception as e:
-    if not isinstance(e, AuthenticationError):
    pytest.fail(f"Error occurred: {e}")

 # embedding

 try:
+    temporary_oai_key = os.environ["OPENAI_API_KEY"]
+    os.environ["OPENAI_API_KEY"] = "bad-key"
+
+    temporary_anthropic_key = os.environ["ANTHROPIC_API_KEY"]
+    os.environ["ANTHROPIC_API_KEY"] = "bad-key"
    # Redirect stdout
    old_stdout = sys.stdout
    sys.stdout = new_stdout = io.StringIO()

+    try:
        response = embedding(model="text-embedding-ada-002", input=["good morning from litellm"])
+    except AuthenticationError:
+        pass

    # Restore stdout
    sys.stdout = old_stdout
    output = new_stdout.getvalue().strip()

+    print(output)
+
    if "Logging Details Pre-API Call" not in output:
        raise Exception("Required log message not found!")
    elif "Logging Details Post-API Call" not in output:
@ -281,8 +334,5 @@ try:
    elif "Logging Details LiteLLM-Failure Call" not in output:
        raise Exception("Required log message not found!")
 except Exception as e:
-    if not isinstance(e, AuthenticationError):
+    print(f"exception type: {type(e).__name__}")
    pytest.fail(f"Error occurred: {e}")
-
-os.environ["OPENAI_API_KEY"] = temporary_oai_key
-os.environ["ANTHROPIC_API_KEY"] = temporary_anthropic_key
--- a/litellm/tests/test_model_alias_map.py
+++ b/litellm/tests/test_model_alias_map.py
@ -12,13 +12,12 @@ from litellm import embedding, completion

 litellm.set_verbose = True

-# Test: Check if the alias created via LiteDebugger is mapped correctly
-{
-    "top_p": 0.75,
-    "prompt": "What's the meaning of life?",
-    "num_beams": 4,
-    "temperature": 0.1,
+model_alias_map = {
+    "llama2": "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf"
 }
+
+litellm.model_alias_map = model_alias_map
+
 print(
    completion(
        "llama2",
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -157,7 +157,7 @@ class CallTypes(Enum):
 class Logging:
    global supabaseClient, liteDebuggerClient

-    def __init__(self, model, messages, stream, call_type, litellm_call_id, completion_call_id):
+    def __init__(self, model, messages, stream, call_type, litellm_call_id, function_id):
        if call_type not in [item.value for item in CallTypes]:
            allowed_values = ", ".join([item.value for item in CallTypes])
            raise ValueError(f"Invalid call_type {call_type}. Allowed values: {allowed_values}")
@ -166,7 +166,7 @@ class Logging:
        self.stream = stream
        self.call_type = call_type
        self.litellm_call_id = litellm_call_id
-        self.completion_call_id = completion_call_id
+        self.function_id = function_id
    
    def update_environment_variables(self, optional_params, litellm_params):
        self.optional_params = optional_params
@ -235,7 +235,7 @@ class Logging:
                            litellm_params=self.model_call_details["litellm_params"],
                            optional_params=self.model_call_details["optional_params"],
                            print_verbose=print_verbose,
-                            call_type=self.call_type, 
+                            call_type=self.call_type
                        )
                except Exception as e:
                    print_verbose(
@ -289,7 +289,7 @@ class Logging:
                            litellm_call_id=self.litellm_params["litellm_call_id"],
                            print_verbose=print_verbose,
                            call_type = self.call_type, 
-                            stream = self.stream
+                            stream = self.stream,
                        )
                    if callback == "cache":
                        try:
@ -349,11 +349,11 @@ class Logging:
                                litellm_call_id=self.litellm_call_id,
                                print_verbose=print_verbose,
                                call_type = self.call_type, 
-                                stream = self.stream
+                                stream = self.stream,
                            )
                except Exception as e:
                    print_verbose(
-                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while post-call logging with integrations {traceback.format_exc()}"
+                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging with integrations {traceback.format_exc()}"
                    )
                    print_verbose(
                        f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
@ -372,6 +372,7 @@ class Logging:
            )
        try:
            for callback in litellm.failure_callback:
+                try:
                    if callback == "lite_debugger":
                            print_verbose("reaches lite_debugger for logging!")
                            print_verbose(f"liteDebuggerClient: {liteDebuggerClient}")
@ -396,10 +397,21 @@ class Logging:
                                litellm_call_id=self.litellm_call_id,
                                print_verbose=print_verbose,
                                call_type = self.call_type, 
-                            stream = self.stream
+                                stream = self.stream,
                            )
-            pass
+                except Exception as e:
+                    print_verbose(
+                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {traceback.format_exc()}"
+                    )
+                    print_verbose(
+                        f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
+                    )
+                    if capture_exception:  # log this error to sentry for debugging
+                        capture_exception(e)
        except:
+            print_verbose(
+                f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging {traceback.format_exc()}"
+            )
            pass


@ -443,7 +455,9 @@ def client(original_function):
    ):  # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
        try:
            global callback_list, add_breadcrumb, user_logger_fn, Logging
+            function_id = kwargs["id"] if "id" in kwargs else None
            if "use_client" in kwargs and kwargs["use_client"] == True: 
+                print_verbose(f"litedebugger initialized")
                litellm.input_callback.append("lite_debugger")
                litellm.success_callback.append("lite_debugger")
                litellm.failure_callback.append("lite_debugger")
@ -461,6 +475,7 @@ def client(original_function):
                )
                set_callbacks(
                    callback_list=callback_list,
+                    function_id=function_id
                )
            if add_breadcrumb:
                add_breadcrumb(
@ -480,8 +495,7 @@ def client(original_function):
            elif call_type == CallTypes.embedding.value:
                messages = args[1] if len(args) > 1 else kwargs["input"]
            stream = True if "stream" in kwargs and kwargs["stream"] == True else False
-            completion_call_id = kwargs["id"] if "id" in kwargs else None
-            logging_obj = Logging(model=model, messages=messages, stream=stream, litellm_call_id=kwargs["litellm_call_id"], completion_call_id=completion_call_id, call_type=call_type)
+            logging_obj = Logging(model=model, messages=messages, stream=stream, litellm_call_id=kwargs["litellm_call_id"], function_id=function_id, call_type=call_type)
            return logging_obj
        except:  # DO NOT BLOCK running the function because of this
            print_verbose(f"[Non-Blocking] {traceback.format_exc()}; args - {args}; kwargs - {kwargs}")
@ -833,7 +847,7 @@ def load_test_model(
        }


-def set_callbacks(callback_list):
+def set_callbacks(callback_list, function_id=None):
    global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger, promptLayerLogger, langFuseLogger
    try:
        for callback in callback_list:
@ -902,14 +916,12 @@ def set_callbacks(callback_list):
            elif callback == "berrispend":
                berrispendLogger = BerriSpendLogger()
            elif callback == "supabase":
-                print(f"instantiating supabase")
+                print_verbose(f"instantiating supabase")
                supabaseClient = Supabase()
            elif callback == "lite_debugger":
                print_verbose(f"instantiating lite_debugger")
-                if litellm.token:
-                    liteDebuggerClient = LiteDebugger(email=litellm.token)
-                else:
-                    liteDebuggerClient = LiteDebugger(email=litellm.email)
+                if function_id:
+                    liteDebuggerClient = LiteDebugger(email=function_id)
    except Exception as e:
        raise e

@ -1944,7 +1956,7 @@ def completion_with_split_tests(models={}, messages=[], use_client=False, **kwar
    except:
        traceback.print_exc()
        raise ValueError("""models does not follow the required format - {'model_name': 'split_percentage'}, e.g. {'gpt-4': 0.7, 'huggingface/wizard-coder': 0.3}""")
-    return litellm.completion(model=selected_llm, messages=messages, **kwargs)
+    return litellm.completion(model=selected_llm, messages=messages, use_client=use_client, **kwargs)

 def completion_with_fallbacks(**kwargs):
    response = None