diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py index f1a882bb2e..f07dc60cf2 100644 --- a/litellm/llms/azure.py +++ b/litellm/llms/azure.py @@ -1130,7 +1130,8 @@ class AzureChatCompletion(BaseLLM): else: azure_client = client ## COMPLETION CALL - response = azure_client.embeddings.with_raw_response.create(**data, timeout=timeout) # type: ignore + raw_response = azure_client.embeddings.with_raw_response.create(**data, timeout=timeout) # type: ignore + response = raw_response.parse() ## LOGGING logging_obj.post_call( input=input, diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py index 5c48eef4dd..d62e89f8af 100644 --- a/litellm/llms/openai.py +++ b/litellm/llms/openai.py @@ -2126,7 +2126,8 @@ class OpenAITextCompletion(BaseLLM): openai_client = client try: - response = openai_client.completions.with_raw_response.create(**data) + raw_response = openai_client.completions.with_raw_response.create(**data) + response = raw_response.parse() except Exception as e: status_code = getattr(e, "status_code", 500) error_headers = getattr(e, "headers", None) @@ -2170,8 +2171,8 @@ class OpenAITextCompletion(BaseLLM): else: openai_client = client - response = await openai_client.completions.with_raw_response.create(**data) - + raw_response = await openai_client.completions.with_raw_response.create(**data) + response = raw_response.parse() streamwrapper = CustomStreamWrapper( completion_stream=response, model=model,