perf(azure+openai-files): use model_dump instead of json.loads + model_dump_json

This commit is contained in:
Krrish Dholakia 2024-01-06 15:50:05 +05:30
parent 13e8535b14
commit 9a4a96f46e
3 changed files with 33 additions and 33 deletions

View file

@ -164,4 +164,3 @@ workflows:
branches: branches:
only: only:
- main - main
- /litellm_.*/

View file

@ -248,7 +248,7 @@ class AzureChatCompletion(BaseLLM):
else: else:
azure_client = client azure_client = client
response = azure_client.chat.completions.create(**data, timeout=timeout) # type: ignore response = azure_client.chat.completions.create(**data, timeout=timeout) # type: ignore
stringified_response = response.model_dump_json() stringified_response = response.model_dump()
## LOGGING ## LOGGING
logging_obj.post_call( logging_obj.post_call(
input=messages, input=messages,
@ -261,7 +261,7 @@ class AzureChatCompletion(BaseLLM):
}, },
) )
return convert_to_model_response_object( return convert_to_model_response_object(
response_object=json.loads(stringified_response), response_object=stringified_response,
model_response_object=model_response, model_response_object=model_response,
) )
except AzureOpenAIError as e: except AzureOpenAIError as e:
@ -323,7 +323,7 @@ class AzureChatCompletion(BaseLLM):
**data, timeout=timeout **data, timeout=timeout
) )
return convert_to_model_response_object( return convert_to_model_response_object(
response_object=json.loads(response.model_dump_json()), response_object=response.model_dump(),
model_response_object=model_response, model_response_object=model_response,
) )
except AzureOpenAIError as e: except AzureOpenAIError as e:
@ -465,7 +465,7 @@ class AzureChatCompletion(BaseLLM):
else: else:
openai_aclient = client openai_aclient = client
response = await openai_aclient.embeddings.create(**data, timeout=timeout) response = await openai_aclient.embeddings.create(**data, timeout=timeout)
stringified_response = response.model_dump_json() stringified_response = response.model_dump()
## LOGGING ## LOGGING
logging_obj.post_call( logging_obj.post_call(
input=input, input=input,
@ -474,7 +474,7 @@ class AzureChatCompletion(BaseLLM):
original_response=stringified_response, original_response=stringified_response,
) )
return convert_to_model_response_object( return convert_to_model_response_object(
response_object=json.loads(stringified_response), response_object=stringified_response,
model_response_object=model_response, model_response_object=model_response,
response_type="embedding", response_type="embedding",
) )
@ -564,7 +564,7 @@ class AzureChatCompletion(BaseLLM):
original_response=response, original_response=response,
) )
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="embedding") # type: ignore return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="embedding") # type: ignore
except AzureOpenAIError as e: except AzureOpenAIError as e:
exception_mapping_worked = True exception_mapping_worked = True
raise e raise e
@ -599,7 +599,7 @@ class AzureChatCompletion(BaseLLM):
else: else:
openai_aclient = client openai_aclient = client
response = await openai_aclient.images.generate(**data, timeout=timeout) response = await openai_aclient.images.generate(**data, timeout=timeout)
stringified_response = response.model_dump_json() stringified_response = response.model_dump()
## LOGGING ## LOGGING
logging_obj.post_call( logging_obj.post_call(
input=input, input=input,
@ -608,7 +608,7 @@ class AzureChatCompletion(BaseLLM):
original_response=stringified_response, original_response=stringified_response,
) )
return convert_to_model_response_object( return convert_to_model_response_object(
response_object=json.loads(stringified_response), response_object=stringified_response,
model_response_object=model_response, model_response_object=model_response,
response_type="image_generation", response_type="image_generation",
) )
@ -697,7 +697,7 @@ class AzureChatCompletion(BaseLLM):
original_response=response, original_response=response,
) )
# return response # return response
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="image_generation") # type: ignore return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="image_generation") # type: ignore
except AzureOpenAIError as e: except AzureOpenAIError as e:
exception_mapping_worked = True exception_mapping_worked = True
raise e raise e

View file

@ -280,18 +280,6 @@ class OpenAIChatCompletion(BaseLLM):
max_retries=max_retries, max_retries=max_retries,
) )
else: else:
## LOGGING
logging_obj.pre_call(
input=messages,
api_key=api_key,
additional_args={
"headers": headers,
"api_base": api_base,
"acompletion": acompletion,
"complete_input_dict": data,
},
)
if not isinstance(max_retries, int): if not isinstance(max_retries, int):
raise OpenAIError( raise OpenAIError(
status_code=422, message="max retries must be an int" status_code=422, message="max retries must be an int"
@ -306,8 +294,21 @@ class OpenAIChatCompletion(BaseLLM):
) )
else: else:
openai_client = client openai_client = client
## LOGGING
logging_obj.pre_call(
input=messages,
api_key=openai_client.api_key,
additional_args={
"headers": headers,
"api_base": openai_client._base_url._uri_reference,
"acompletion": acompletion,
"complete_input_dict": data,
},
)
response = openai_client.chat.completions.create(**data, timeout=timeout) # type: ignore response = openai_client.chat.completions.create(**data, timeout=timeout) # type: ignore
stringified_response = response.model_dump_json() stringified_response = response.model_dump()
logging_obj.post_call( logging_obj.post_call(
input=messages, input=messages,
api_key=api_key, api_key=api_key,
@ -315,7 +316,7 @@ class OpenAIChatCompletion(BaseLLM):
additional_args={"complete_input_dict": data}, additional_args={"complete_input_dict": data},
) )
return convert_to_model_response_object( return convert_to_model_response_object(
response_object=json.loads(stringified_response), response_object=stringified_response,
model_response_object=model_response, model_response_object=model_response,
) )
except Exception as e: except Exception as e:
@ -386,7 +387,7 @@ class OpenAIChatCompletion(BaseLLM):
response = await openai_aclient.chat.completions.create( response = await openai_aclient.chat.completions.create(
**data, timeout=timeout **data, timeout=timeout
) )
stringified_response = response.model_dump_json() stringified_response = response.model_dump()
logging_obj.post_call( logging_obj.post_call(
input=data["messages"], input=data["messages"],
api_key=api_key, api_key=api_key,
@ -394,7 +395,7 @@ class OpenAIChatCompletion(BaseLLM):
additional_args={"complete_input_dict": data}, additional_args={"complete_input_dict": data},
) )
return convert_to_model_response_object( return convert_to_model_response_object(
response_object=json.loads(stringified_response), response_object=stringified_response,
model_response_object=model_response, model_response_object=model_response,
) )
except Exception as e: except Exception as e:
@ -527,7 +528,7 @@ class OpenAIChatCompletion(BaseLLM):
else: else:
openai_aclient = client openai_aclient = client
response = await openai_aclient.embeddings.create(**data, timeout=timeout) # type: ignore response = await openai_aclient.embeddings.create(**data, timeout=timeout) # type: ignore
stringified_response = response.model_dump_json() stringified_response = response.model_dump()
## LOGGING ## LOGGING
logging_obj.post_call( logging_obj.post_call(
input=input, input=input,
@ -535,7 +536,7 @@ class OpenAIChatCompletion(BaseLLM):
additional_args={"complete_input_dict": data}, additional_args={"complete_input_dict": data},
original_response=stringified_response, original_response=stringified_response,
) )
return convert_to_model_response_object(response_object=json.loads(stringified_response), model_response_object=model_response, response_type="embedding") # type: ignore return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, response_type="embedding") # type: ignore
except Exception as e: except Exception as e:
## LOGGING ## LOGGING
logging_obj.post_call( logging_obj.post_call(
@ -597,7 +598,7 @@ class OpenAIChatCompletion(BaseLLM):
original_response=response, original_response=response,
) )
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="embedding") # type: ignore return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="embedding") # type: ignore
except OpenAIError as e: except OpenAIError as e:
exception_mapping_worked = True exception_mapping_worked = True
raise e raise e
@ -634,7 +635,7 @@ class OpenAIChatCompletion(BaseLLM):
else: else:
openai_aclient = client openai_aclient = client
response = await openai_aclient.images.generate(**data, timeout=timeout) # type: ignore response = await openai_aclient.images.generate(**data, timeout=timeout) # type: ignore
stringified_response = response.model_dump_json() stringified_response = response.model_dump()
## LOGGING ## LOGGING
logging_obj.post_call( logging_obj.post_call(
input=prompt, input=prompt,
@ -642,7 +643,7 @@ class OpenAIChatCompletion(BaseLLM):
additional_args={"complete_input_dict": data}, additional_args={"complete_input_dict": data},
original_response=stringified_response, original_response=stringified_response,
) )
return convert_to_model_response_object(response_object=json.loads(stringified_response), model_response_object=model_response, response_type="image_generation") # type: ignore return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, response_type="image_generation") # type: ignore
except Exception as e: except Exception as e:
## LOGGING ## LOGGING
logging_obj.post_call( logging_obj.post_call(
@ -710,7 +711,7 @@ class OpenAIChatCompletion(BaseLLM):
original_response=response, original_response=response,
) )
# return response # return response
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="image_generation") # type: ignore return convert_to_model_response_object(response_object=model_dump(), model_response_object=model_response, response_type="image_generation") # type: ignore
except OpenAIError as e: except OpenAIError as e:
exception_mapping_worked = True exception_mapping_worked = True
raise e raise e