diff --git a/.circleci/config.yml b/.circleci/config.yml
index 25451f47b..5afd0c5d1 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -163,5 +163,4 @@ workflows:
           filters:
             branches:
               only:
-                - main
-                - /litellm_.*/
\ No newline at end of file
+                - main
\ No newline at end of file
diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py
index 98cc97d53..8a387e8a9 100644
--- a/litellm/llms/azure.py
+++ b/litellm/llms/azure.py
@@ -248,7 +248,7 @@ class AzureChatCompletion(BaseLLM):
                 else:
                     azure_client = client
                 response = azure_client.chat.completions.create(**data, timeout=timeout)  # type: ignore
-                stringified_response = response.model_dump_json()
+                stringified_response = response.model_dump()
                 ## LOGGING
                 logging_obj.post_call(
                     input=messages,
@@ -261,7 +261,7 @@ class AzureChatCompletion(BaseLLM):
                     },
                 )
                 return convert_to_model_response_object(
-                    response_object=json.loads(stringified_response),
+                    response_object=stringified_response,
                     model_response_object=model_response,
                 )
         except AzureOpenAIError as e:
@@ -323,7 +323,7 @@ class AzureChatCompletion(BaseLLM):
                 **data, timeout=timeout
             )
             return convert_to_model_response_object(
-                response_object=json.loads(response.model_dump_json()),
+                response_object=response.model_dump(),
                 model_response_object=model_response,
             )
         except AzureOpenAIError as e:
@@ -465,7 +465,7 @@ class AzureChatCompletion(BaseLLM):
             else:
                 openai_aclient = client
             response = await openai_aclient.embeddings.create(**data, timeout=timeout)
-            stringified_response = response.model_dump_json()
+            stringified_response = response.model_dump()
             ## LOGGING
             logging_obj.post_call(
                 input=input,
@@ -474,7 +474,7 @@ class AzureChatCompletion(BaseLLM):
                 original_response=stringified_response,
             )
             return convert_to_model_response_object(
-                response_object=json.loads(stringified_response),
+                response_object=stringified_response,
                 model_response_object=model_response,
                 response_type="embedding",
             )
@@ -564,7 +564,7 @@ class AzureChatCompletion(BaseLLM):
                 original_response=response,
             )
 
-            return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="embedding")  # type: ignore
+            return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="embedding")  # type: ignore
         except AzureOpenAIError as e:
             exception_mapping_worked = True
             raise e
@@ -599,7 +599,7 @@ class AzureChatCompletion(BaseLLM):
             else:
                 openai_aclient = client
             response = await openai_aclient.images.generate(**data, timeout=timeout)
-            stringified_response = response.model_dump_json()
+            stringified_response = response.model_dump()
             ## LOGGING
             logging_obj.post_call(
                 input=input,
@@ -608,7 +608,7 @@ class AzureChatCompletion(BaseLLM):
                 original_response=stringified_response,
             )
             return convert_to_model_response_object(
-                response_object=json.loads(stringified_response),
+                response_object=stringified_response,
                 model_response_object=model_response,
                 response_type="image_generation",
             )
@@ -697,7 +697,7 @@ class AzureChatCompletion(BaseLLM):
                 original_response=response,
             )
             # return response
-            return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="image_generation")  # type: ignore
+            return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="image_generation")  # type: ignore
         except AzureOpenAIError as e:
             exception_mapping_worked = True
             raise e
diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py
index 0299c502c..91a79fa57 100644
--- a/litellm/llms/openai.py
+++ b/litellm/llms/openai.py
@@ -280,18 +280,6 @@ class OpenAIChatCompletion(BaseLLM):
                             max_retries=max_retries,
                         )
                     else:
-                        ## LOGGING
-                        logging_obj.pre_call(
-                            input=messages,
-                            api_key=api_key,
-                            additional_args={
-                                "headers": headers,
-                                "api_base": api_base,
-                                "acompletion": acompletion,
-                                "complete_input_dict": data,
-                            },
-                        )
-
                         if not isinstance(max_retries, int):
                             raise OpenAIError(
                                 status_code=422, message="max retries must be an int"
@@ -306,8 +294,21 @@ class OpenAIChatCompletion(BaseLLM):
                             )
                         else:
                             openai_client = client
+
+                        ## LOGGING
+                        logging_obj.pre_call(
+                            input=messages,
+                            api_key=openai_client.api_key,
+                            additional_args={
+                                "headers": headers,
+                                "api_base": openai_client._base_url._uri_reference,
+                                "acompletion": acompletion,
+                                "complete_input_dict": data,
+                            },
+                        )
+
                         response = openai_client.chat.completions.create(**data, timeout=timeout)  # type: ignore
-                        stringified_response = response.model_dump_json()
+                        stringified_response = response.model_dump()
                         logging_obj.post_call(
                             input=messages,
                             api_key=api_key,
@@ -315,7 +316,7 @@ class OpenAIChatCompletion(BaseLLM):
                             additional_args={"complete_input_dict": data},
                         )
                         return convert_to_model_response_object(
-                            response_object=json.loads(stringified_response),
+                            response_object=stringified_response,
                             model_response_object=model_response,
                         )
                 except Exception as e:
@@ -386,7 +387,7 @@ class OpenAIChatCompletion(BaseLLM):
             response = await openai_aclient.chat.completions.create(
                 **data, timeout=timeout
             )
-            stringified_response = response.model_dump_json()
+            stringified_response = response.model_dump()
             logging_obj.post_call(
                 input=data["messages"],
                 api_key=api_key,
@@ -394,7 +395,7 @@ class OpenAIChatCompletion(BaseLLM):
                 additional_args={"complete_input_dict": data},
             )
             return convert_to_model_response_object(
-                response_object=json.loads(stringified_response),
+                response_object=stringified_response,
                 model_response_object=model_response,
             )
         except Exception as e:
@@ -527,7 +528,7 @@ class OpenAIChatCompletion(BaseLLM):
             else:
                 openai_aclient = client
             response = await openai_aclient.embeddings.create(**data, timeout=timeout)  # type: ignore
-            stringified_response = response.model_dump_json()
+            stringified_response = response.model_dump()
             ## LOGGING
             logging_obj.post_call(
                 input=input,
@@ -535,7 +536,7 @@ class OpenAIChatCompletion(BaseLLM):
                 additional_args={"complete_input_dict": data},
                 original_response=stringified_response,
             )
-            return convert_to_model_response_object(response_object=json.loads(stringified_response), model_response_object=model_response, response_type="embedding")  # type: ignore
+            return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, response_type="embedding")  # type: ignore
         except Exception as e:
             ## LOGGING
             logging_obj.post_call(
@@ -597,7 +598,7 @@ class OpenAIChatCompletion(BaseLLM):
                 original_response=response,
             )
 
-            return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="embedding")  # type: ignore
+            return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="embedding")  # type: ignore
         except OpenAIError as e:
             exception_mapping_worked = True
             raise e
@@ -634,7 +635,7 @@ class OpenAIChatCompletion(BaseLLM):
             else:
                 openai_aclient = client
             response = await openai_aclient.images.generate(**data, timeout=timeout)  # type: ignore
-            stringified_response = response.model_dump_json()
+            stringified_response = response.model_dump()
             ## LOGGING
             logging_obj.post_call(
                 input=prompt,
@@ -642,7 +643,7 @@ class OpenAIChatCompletion(BaseLLM):
                 additional_args={"complete_input_dict": data},
                 original_response=stringified_response,
             )
-            return convert_to_model_response_object(response_object=json.loads(stringified_response), model_response_object=model_response, response_type="image_generation")  # type: ignore
+            return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, response_type="image_generation")  # type: ignore
         except Exception as e:
             ## LOGGING
             logging_obj.post_call(
@@ -710,7 +711,7 @@ class OpenAIChatCompletion(BaseLLM):
                 original_response=response,
             )
             # return response
-            return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="image_generation")  # type: ignore
+            return convert_to_model_response_object(response_object=model_dump(), model_response_object=model_response, response_type="image_generation")  # type: ignore
         except OpenAIError as e:
             exception_mapping_worked = True
             raise e