From 16224f8db6f61ceb8760b378e332ddebd9b9a533 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 11 Mar 2025 21:22:13 -0700
Subject: [PATCH] fix(o_series_handler.py): handle async calls

---
 litellm/llms/azure/chat/o_series_handler.py  | 1 +
 tests/llm_translation/base_llm_unit_tests.py | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/litellm/llms/azure/chat/o_series_handler.py b/litellm/llms/azure/chat/o_series_handler.py
index 4464432faf..2f3e9e6399 100644
--- a/litellm/llms/azure/chat/o_series_handler.py
+++ b/litellm/llms/azure/chat/o_series_handler.py
@@ -45,6 +45,7 @@ class AzureOpenAIO1ChatCompletion(BaseAzureLLM, OpenAIChatCompletion):
             api_base=api_base,
             api_version=api_version,
             client=client,
+            _is_async=acompletion,
         )
         return super().completion(
             model_response=model_response,
diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py
index f91ef0eae9..32f631daad 100644
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@@ -868,10 +868,13 @@ class BaseLLMChatTest(ABC):
         except Exception as e:
             pytest.fail(f"Error occurred: {e}")
 
+    @pytest.mark.flaky(retries=3, delay=1)
     @pytest.mark.asyncio
     async def test_completion_cost(self):
         from litellm import completion_cost
 
+        litellm._turn_on_debug()
+
         os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
         litellm.model_cost = litellm.get_model_cost_map(url="")