perf(azure+openai-files): use model_dump instead of json.loads + model_dump_json

This commit is contained in:
Krrish Dholakia 2024-01-06 15:50:05 +05:30
parent 13e8535b14
commit 9a4a96f46e
3 changed files with 33 additions and 33 deletions

View file

@ -163,5 +163,4 @@ workflows:
filters:
branches:
only:
- main
- /litellm_.*/
- main

View file

@ -248,7 +248,7 @@ class AzureChatCompletion(BaseLLM):
else:
azure_client = client
response = azure_client.chat.completions.create(**data, timeout=timeout) # type: ignore
stringified_response = response.model_dump_json()
stringified_response = response.model_dump()
## LOGGING
logging_obj.post_call(
input=messages,
@ -261,7 +261,7 @@ class AzureChatCompletion(BaseLLM):
},
)
return convert_to_model_response_object(
response_object=json.loads(stringified_response),
response_object=stringified_response,
model_response_object=model_response,
)
except AzureOpenAIError as e:
@ -323,7 +323,7 @@ class AzureChatCompletion(BaseLLM):
**data, timeout=timeout
)
return convert_to_model_response_object(
response_object=json.loads(response.model_dump_json()),
response_object=response.model_dump(),
model_response_object=model_response,
)
except AzureOpenAIError as e:
@ -465,7 +465,7 @@ class AzureChatCompletion(BaseLLM):
else:
openai_aclient = client
response = await openai_aclient.embeddings.create(**data, timeout=timeout)
stringified_response = response.model_dump_json()
stringified_response = response.model_dump()
## LOGGING
logging_obj.post_call(
input=input,
@ -474,7 +474,7 @@ class AzureChatCompletion(BaseLLM):
original_response=stringified_response,
)
return convert_to_model_response_object(
response_object=json.loads(stringified_response),
response_object=stringified_response,
model_response_object=model_response,
response_type="embedding",
)
@ -564,7 +564,7 @@ class AzureChatCompletion(BaseLLM):
original_response=response,
)
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="embedding") # type: ignore
return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="embedding") # type: ignore
except AzureOpenAIError as e:
exception_mapping_worked = True
raise e
@ -599,7 +599,7 @@ class AzureChatCompletion(BaseLLM):
else:
openai_aclient = client
response = await openai_aclient.images.generate(**data, timeout=timeout)
stringified_response = response.model_dump_json()
stringified_response = response.model_dump()
## LOGGING
logging_obj.post_call(
input=input,
@ -608,7 +608,7 @@ class AzureChatCompletion(BaseLLM):
original_response=stringified_response,
)
return convert_to_model_response_object(
response_object=json.loads(stringified_response),
response_object=stringified_response,
model_response_object=model_response,
response_type="image_generation",
)
@ -697,7 +697,7 @@ class AzureChatCompletion(BaseLLM):
original_response=response,
)
# return response
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="image_generation") # type: ignore
return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="image_generation") # type: ignore
except AzureOpenAIError as e:
exception_mapping_worked = True
raise e

View file

@ -280,18 +280,6 @@ class OpenAIChatCompletion(BaseLLM):
max_retries=max_retries,
)
else:
## LOGGING
logging_obj.pre_call(
input=messages,
api_key=api_key,
additional_args={
"headers": headers,
"api_base": api_base,
"acompletion": acompletion,
"complete_input_dict": data,
},
)
if not isinstance(max_retries, int):
raise OpenAIError(
status_code=422, message="max retries must be an int"
@ -306,8 +294,21 @@ class OpenAIChatCompletion(BaseLLM):
)
else:
openai_client = client
## LOGGING
logging_obj.pre_call(
input=messages,
api_key=openai_client.api_key,
additional_args={
"headers": headers,
"api_base": openai_client._base_url._uri_reference,
"acompletion": acompletion,
"complete_input_dict": data,
},
)
response = openai_client.chat.completions.create(**data, timeout=timeout) # type: ignore
stringified_response = response.model_dump_json()
stringified_response = response.model_dump()
logging_obj.post_call(
input=messages,
api_key=api_key,
@ -315,7 +316,7 @@ class OpenAIChatCompletion(BaseLLM):
additional_args={"complete_input_dict": data},
)
return convert_to_model_response_object(
response_object=json.loads(stringified_response),
response_object=stringified_response,
model_response_object=model_response,
)
except Exception as e:
@ -386,7 +387,7 @@ class OpenAIChatCompletion(BaseLLM):
response = await openai_aclient.chat.completions.create(
**data, timeout=timeout
)
stringified_response = response.model_dump_json()
stringified_response = response.model_dump()
logging_obj.post_call(
input=data["messages"],
api_key=api_key,
@ -394,7 +395,7 @@ class OpenAIChatCompletion(BaseLLM):
additional_args={"complete_input_dict": data},
)
return convert_to_model_response_object(
response_object=json.loads(stringified_response),
response_object=stringified_response,
model_response_object=model_response,
)
except Exception as e:
@ -527,7 +528,7 @@ class OpenAIChatCompletion(BaseLLM):
else:
openai_aclient = client
response = await openai_aclient.embeddings.create(**data, timeout=timeout) # type: ignore
stringified_response = response.model_dump_json()
stringified_response = response.model_dump()
## LOGGING
logging_obj.post_call(
input=input,
@ -535,7 +536,7 @@ class OpenAIChatCompletion(BaseLLM):
additional_args={"complete_input_dict": data},
original_response=stringified_response,
)
return convert_to_model_response_object(response_object=json.loads(stringified_response), model_response_object=model_response, response_type="embedding") # type: ignore
return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, response_type="embedding") # type: ignore
except Exception as e:
## LOGGING
logging_obj.post_call(
@ -597,7 +598,7 @@ class OpenAIChatCompletion(BaseLLM):
original_response=response,
)
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="embedding") # type: ignore
return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="embedding") # type: ignore
except OpenAIError as e:
exception_mapping_worked = True
raise e
@ -634,7 +635,7 @@ class OpenAIChatCompletion(BaseLLM):
else:
openai_aclient = client
response = await openai_aclient.images.generate(**data, timeout=timeout) # type: ignore
stringified_response = response.model_dump_json()
stringified_response = response.model_dump()
## LOGGING
logging_obj.post_call(
input=prompt,
@ -642,7 +643,7 @@ class OpenAIChatCompletion(BaseLLM):
additional_args={"complete_input_dict": data},
original_response=stringified_response,
)
return convert_to_model_response_object(response_object=json.loads(stringified_response), model_response_object=model_response, response_type="image_generation") # type: ignore
return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, response_type="image_generation") # type: ignore
except Exception as e:
## LOGGING
logging_obj.post_call(
@ -710,7 +711,7 @@ class OpenAIChatCompletion(BaseLLM):
original_response=response,
)
# return response
return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="image_generation") # type: ignore
return convert_to_model_response_object(response_object=model_dump(), model_response_object=model_response, response_type="image_generation") # type: ignore
except OpenAIError as e:
exception_mapping_worked = True
raise e