diff --git a/.circleci/config.yml b/.circleci/config.yml index 25451f47b..5afd0c5d1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -163,5 +163,4 @@ workflows: filters: branches: only: - - main - - /litellm_.*/ \ No newline at end of file + - main \ No newline at end of file diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py index 98cc97d53..8a387e8a9 100644 --- a/litellm/llms/azure.py +++ b/litellm/llms/azure.py @@ -248,7 +248,7 @@ class AzureChatCompletion(BaseLLM): else: azure_client = client response = azure_client.chat.completions.create(**data, timeout=timeout) # type: ignore - stringified_response = response.model_dump_json() + stringified_response = response.model_dump() ## LOGGING logging_obj.post_call( input=messages, @@ -261,7 +261,7 @@ class AzureChatCompletion(BaseLLM): }, ) return convert_to_model_response_object( - response_object=json.loads(stringified_response), + response_object=stringified_response, model_response_object=model_response, ) except AzureOpenAIError as e: @@ -323,7 +323,7 @@ class AzureChatCompletion(BaseLLM): **data, timeout=timeout ) return convert_to_model_response_object( - response_object=json.loads(response.model_dump_json()), + response_object=response.model_dump(), model_response_object=model_response, ) except AzureOpenAIError as e: @@ -465,7 +465,7 @@ class AzureChatCompletion(BaseLLM): else: openai_aclient = client response = await openai_aclient.embeddings.create(**data, timeout=timeout) - stringified_response = response.model_dump_json() + stringified_response = response.model_dump() ## LOGGING logging_obj.post_call( input=input, @@ -474,7 +474,7 @@ class AzureChatCompletion(BaseLLM): original_response=stringified_response, ) return convert_to_model_response_object( - response_object=json.loads(stringified_response), + response_object=stringified_response, model_response_object=model_response, response_type="embedding", ) @@ -564,7 +564,7 @@ class AzureChatCompletion(BaseLLM): original_response=response, ) - return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="embedding") # type: ignore + return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="embedding") # type: ignore except AzureOpenAIError as e: exception_mapping_worked = True raise e @@ -599,7 +599,7 @@ class AzureChatCompletion(BaseLLM): else: openai_aclient = client response = await openai_aclient.images.generate(**data, timeout=timeout) - stringified_response = response.model_dump_json() + stringified_response = response.model_dump() ## LOGGING logging_obj.post_call( input=input, @@ -608,7 +608,7 @@ class AzureChatCompletion(BaseLLM): original_response=stringified_response, ) return convert_to_model_response_object( - response_object=json.loads(stringified_response), + response_object=stringified_response, model_response_object=model_response, response_type="image_generation", ) @@ -697,7 +697,7 @@ class AzureChatCompletion(BaseLLM): original_response=response, ) # return response - return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="image_generation") # type: ignore + return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="image_generation") # type: ignore except AzureOpenAIError as e: exception_mapping_worked = True raise e diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index 0299c502c..91a79fa57 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -280,18 +280,6 @@ class OpenAIChatCompletion(BaseLLM): max_retries=max_retries, ) else: - ## LOGGING - logging_obj.pre_call( - input=messages, - api_key=api_key, - additional_args={ - "headers": headers, - "api_base": api_base, - "acompletion": acompletion, - "complete_input_dict": data, - }, - ) - if not isinstance(max_retries, int): raise OpenAIError( status_code=422, message="max retries must be an int" @@ -306,8 +294,21 @@ class OpenAIChatCompletion(BaseLLM): ) else: openai_client = client + + ## LOGGING + logging_obj.pre_call( + input=messages, + api_key=openai_client.api_key, + additional_args={ + "headers": headers, + "api_base": openai_client._base_url._uri_reference, + "acompletion": acompletion, + "complete_input_dict": data, + }, + ) + response = openai_client.chat.completions.create(**data, timeout=timeout) # type: ignore - stringified_response = response.model_dump_json() + stringified_response = response.model_dump() logging_obj.post_call( input=messages, api_key=api_key, @@ -315,7 +316,7 @@ class OpenAIChatCompletion(BaseLLM): additional_args={"complete_input_dict": data}, ) return convert_to_model_response_object( - response_object=json.loads(stringified_response), + response_object=stringified_response, model_response_object=model_response, ) except Exception as e: @@ -386,7 +387,7 @@ class OpenAIChatCompletion(BaseLLM): response = await openai_aclient.chat.completions.create( **data, timeout=timeout ) - stringified_response = response.model_dump_json() + stringified_response = response.model_dump() logging_obj.post_call( input=data["messages"], api_key=api_key, @@ -394,7 +395,7 @@ class OpenAIChatCompletion(BaseLLM): additional_args={"complete_input_dict": data}, ) return convert_to_model_response_object( - response_object=json.loads(stringified_response), + response_object=stringified_response, model_response_object=model_response, ) except Exception as e: @@ -527,7 +528,7 @@ class OpenAIChatCompletion(BaseLLM): else: openai_aclient = client response = await openai_aclient.embeddings.create(**data, timeout=timeout) # type: ignore - stringified_response = response.model_dump_json() + stringified_response = response.model_dump() ## LOGGING logging_obj.post_call( input=input, @@ -535,7 +536,7 @@ class OpenAIChatCompletion(BaseLLM): additional_args={"complete_input_dict": data}, original_response=stringified_response, ) - return convert_to_model_response_object(response_object=json.loads(stringified_response), model_response_object=model_response, response_type="embedding") # type: ignore + return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, response_type="embedding") # type: ignore except Exception as e: ## LOGGING logging_obj.post_call( @@ -597,7 +598,7 @@ class OpenAIChatCompletion(BaseLLM): original_response=response, ) - return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="embedding") # type: ignore + return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="embedding") # type: ignore except OpenAIError as e: exception_mapping_worked = True raise e @@ -634,7 +635,7 @@ class OpenAIChatCompletion(BaseLLM): else: openai_aclient = client response = await openai_aclient.images.generate(**data, timeout=timeout) # type: ignore - stringified_response = response.model_dump_json() + stringified_response = response.model_dump() ## LOGGING logging_obj.post_call( input=prompt, @@ -642,7 +643,7 @@ class OpenAIChatCompletion(BaseLLM): additional_args={"complete_input_dict": data}, original_response=stringified_response, ) - return convert_to_model_response_object(response_object=json.loads(stringified_response), model_response_object=model_response, response_type="image_generation") # type: ignore + return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, response_type="image_generation") # type: ignore except Exception as e: ## LOGGING logging_obj.post_call( @@ -710,7 +711,7 @@ class OpenAIChatCompletion(BaseLLM): original_response=response, ) # return response - return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="image_generation") # type: ignore + return convert_to_model_response_object(response_object=model_dump(), model_response_object=model_response, response_type="image_generation") # type: ignore except OpenAIError as e: exception_mapping_worked = True raise e