From 354f12a8840dd4659f32d6d36011fc78edd0df3b Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Thu, 2 Apr 2026 11:59:21 -0700 Subject: [PATCH 01/13] Fix WorkQueue destructor deadlock by merging into AppRuntime WorkQueue::~WorkQueue() had a race condition where cancel() + notify_all() fired without the queue mutex, so the signal could be lost if the worker thread hadn't entered condition_variable::wait() yet, causing join() to hang forever. This change merges WorkQueue into AppRuntime, eliminating split-lifetime issues, and dispatches cancellation as a work item via Append(). Since push() acquires the queue mutex, it blocks until the worker enters wait(), guaranteeing the notification is delivered. Changes: - Merge WorkQueue members into AppRuntime (thread, dispatcher, cancel source, env, suspension lock) - Remove WorkQueue.h and WorkQueue.cpp - Update AppRuntime_JSI.cpp TaskRunnerAdapter to use AppRuntime::Dispatch - Add deterministic regression test using arcana testing hooks - Fix member declaration order so m_options outlives worker thread The regression test uses arcana::set_before_wait_callback() to sleep while holding the queue mutex before wait(), deterministically triggering the race. See #146 for the test running against the old broken code. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .gitignore | 1 + CMakeLists.txt | 8 +- Core/AppRuntime/CMakeLists.txt | 4 +- Core/AppRuntime/Include/Babylon/AppRuntime.h | 31 ++++++- Core/AppRuntime/Source/AppRuntime.cpp | 40 +++++++-- Core/AppRuntime/Source/AppRuntime_JSI.cpp | 14 ++-- Core/AppRuntime/Source/WorkQueue.cpp | 49 ----------- Core/AppRuntime/Source/WorkQueue.h | 58 ------------- .../Android/app/src/main/cpp/CMakeLists.txt | 1 + Tests/UnitTests/Shared/Shared.cpp | 83 +++++++++++++++++++ Tests/UnitTests/Win32/App.cpp | 6 +- 11 files changed, 163 insertions(+), 132 deletions(-) delete mode 100644 Core/AppRuntime/Source/WorkQueue.cpp delete mode 100644 Core/AppRuntime/Source/WorkQueue.h diff --git a/.gitignore b/.gitignore index b84ede5a..0260b044 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /Build +Tests/UnitTests/dist/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 33b28392..a35de88a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,8 +14,8 @@ FetchContent_Declare(AndroidExtensions GIT_TAG 2d5af72259cc73e5f249d3c99bee2010be9cb042 EXCLUDE_FROM_ALL) FetchContent_Declare(arcana.cpp - GIT_REPOSITORY https://github.com/microsoft/arcana.cpp.git - GIT_TAG 7c6be8aaa29effbc8ee1a2217388fb6e399150d2 + GIT_REPOSITORY https://github.com/bghgary/arcana.cpp.git + GIT_TAG 8328898f159c1778cb16fa41c7ad57994961e5e8 EXCLUDE_FROM_ALL) FetchContent_Declare(asio GIT_REPOSITORY https://github.com/chriskohlhoff/asio.git @@ -113,6 +113,10 @@ endif() # -------------------------------------------------- +if(JSRUNTIMEHOST_TESTS) + add_compile_definitions(ARCANA_TESTING_HOOKS) +endif() + FetchContent_MakeAvailable_With_Message(arcana.cpp) set_property(TARGET arcana PROPERTY FOLDER Dependencies) diff --git a/Core/AppRuntime/CMakeLists.txt b/Core/AppRuntime/CMakeLists.txt index ad8f670d..ba671233 100644 --- a/Core/AppRuntime/CMakeLists.txt +++ b/Core/AppRuntime/CMakeLists.txt @@ -9,9 +9,7 @@ set(SOURCES "Include/Babylon/AppRuntime.h" "Source/AppRuntime.cpp" "Source/AppRuntime_${NAPI_JAVASCRIPT_ENGINE}.cpp" - "Source/AppRuntime_${JSRUNTIMEHOST_PLATFORM}.${IMPL_EXT}" - "Source/WorkQueue.cpp" - "Source/WorkQueue.h") + "Source/AppRuntime_${JSRUNTIMEHOST_PLATFORM}.${IMPL_EXT}") add_library(AppRuntime ${SOURCES}) warnings_as_errors(AppRuntime) diff --git a/Core/AppRuntime/Include/Babylon/AppRuntime.h b/Core/AppRuntime/Include/Babylon/AppRuntime.h index 7aaeb398..6e595c81 100644 --- a/Core/AppRuntime/Include/Babylon/AppRuntime.h +++ b/Core/AppRuntime/Include/Babylon/AppRuntime.h @@ -6,14 +6,18 @@ #include +#include + #include #include #include +#include +#include +#include +#include namespace Babylon { - class WorkQueue; - class AppRuntime final { public: @@ -43,6 +47,23 @@ namespace Babylon static void BABYLON_API DefaultUnhandledExceptionHandler(const Napi::Error& error); private: + template + void Append(CallableT callable) + { + if constexpr (std::is_copy_constructible::value) + { + m_dispatcher.queue([this, callable = std::move(callable)]() { + callable(m_env.value()); + }); + } + else + { + m_dispatcher.queue([this, callablePtr = std::make_shared(std::move(callable))]() { + (*callablePtr)(m_env.value()); + }); + } + } + // These three methods are the mechanism by which platform- and JavaScript-specific // code can be "injected" into the execution of the JavaScript thread. These three // functions are implemented in separate files, thus allowing implementations to be @@ -61,7 +82,11 @@ namespace Babylon // extra logic around the invocation of a dispatched callback. void Execute(Dispatchable callback); - std::unique_ptr m_workQueue; Options m_options; + std::optional m_env{}; + std::optional> m_suspensionLock{}; + arcana::cancellation_source m_cancelSource{}; + arcana::manual_dispatcher<128> m_dispatcher{}; + std::thread m_thread; }; } diff --git a/Core/AppRuntime/Source/AppRuntime.cpp b/Core/AppRuntime/Source/AppRuntime.cpp index a6d89492..f05115fd 100644 --- a/Core/AppRuntime/Source/AppRuntime.cpp +++ b/Core/AppRuntime/Source/AppRuntime.cpp @@ -1,5 +1,4 @@ #include "AppRuntime.h" -#include "WorkQueue.h" #include namespace Babylon @@ -10,8 +9,8 @@ namespace Babylon } AppRuntime::AppRuntime(Options options) - : m_workQueue{std::make_unique([this] { RunPlatformTier(); })} - , m_options{std::move(options)} + : m_options{std::move(options)} + , m_thread{[this] { RunPlatformTier(); }} { Dispatch([this](Napi::Env env) { JsRuntime::CreateForJavaScript(env, [this](auto func) { Dispatch(std::move(func)); }); @@ -20,26 +19,53 @@ namespace Babylon AppRuntime::~AppRuntime() { + if (m_suspensionLock.has_value()) + { + m_suspensionLock.reset(); + } + + // Dispatch cancellation as a work item so the worker thread processes it + // naturally via blocking_tick, avoiding the race condition where an external + // cancel signal can be missed by the condition variable wait. + Append([this](Napi::Env) { + m_cancelSource.cancel(); + }); + + m_thread.join(); } void AppRuntime::Run(Napi::Env env) { - m_workQueue->Run(env); + m_env = std::make_optional(env); + + m_dispatcher.set_affinity(std::this_thread::get_id()); + + while (!m_cancelSource.cancelled()) + { + m_dispatcher.blocking_tick(m_cancelSource); + } + + // The dispatcher can be non-empty if something is dispatched after cancellation. + m_dispatcher.clear(); } void AppRuntime::Suspend() { - m_workQueue->Suspend(); + auto suspensionMutex = std::make_shared(); + m_suspensionLock.emplace(*suspensionMutex); + Append([suspensionMutex{std::move(suspensionMutex)}](Napi::Env) { + std::scoped_lock lock{*suspensionMutex}; + }); } void AppRuntime::Resume() { - m_workQueue->Resume(); + m_suspensionLock.reset(); } void AppRuntime::Dispatch(Dispatchable func) { - m_workQueue->Append([this, func{std::move(func)}](Napi::Env env) mutable { + Append([this, func{std::move(func)}](Napi::Env env) mutable { Execute([this, env, func{std::move(func)}]() mutable { try { diff --git a/Core/AppRuntime/Source/AppRuntime_JSI.cpp b/Core/AppRuntime/Source/AppRuntime_JSI.cpp index 020bf83f..b9a4f09b 100644 --- a/Core/AppRuntime/Source/AppRuntime_JSI.cpp +++ b/Core/AppRuntime/Source/AppRuntime_JSI.cpp @@ -1,5 +1,4 @@ #include "AppRuntime.h" -#include "WorkQueue.h" #include #include @@ -10,16 +9,15 @@ namespace class TaskRunnerAdapter : public v8runtime::JSITaskRunner { public: - TaskRunnerAdapter(Babylon::WorkQueue& workQueue) - : m_workQueue(workQueue) + TaskRunnerAdapter(Babylon::AppRuntime& appRuntime) + : m_appRuntime(appRuntime) { } void postTask(std::unique_ptr task) override { - std::shared_ptr shared_task(task.release()); - m_workQueue.Append([shared_task2 = std::move(shared_task)](Napi::Env) { - shared_task2->run(); + m_appRuntime.Dispatch([task = std::shared_ptr(std::move(task))](Napi::Env) { + task->run(); }); } @@ -27,7 +25,7 @@ namespace TaskRunnerAdapter(const TaskRunnerAdapter&) = delete; TaskRunnerAdapter& operator=(const TaskRunnerAdapter&) = delete; - Babylon::WorkQueue& m_workQueue; + Babylon::AppRuntime& m_appRuntime; }; } @@ -37,7 +35,7 @@ namespace Babylon { v8runtime::V8RuntimeArgs args{}; args.inspectorPort = 5643; - args.foreground_task_runner = std::make_shared(*m_workQueue); + args.foreground_task_runner = std::make_shared(*this); const auto runtime = v8runtime::makeV8Runtime(std::move(args)); const auto env = Napi::Attach(*runtime); diff --git a/Core/AppRuntime/Source/WorkQueue.cpp b/Core/AppRuntime/Source/WorkQueue.cpp deleted file mode 100644 index 4c1c5f21..00000000 --- a/Core/AppRuntime/Source/WorkQueue.cpp +++ /dev/null @@ -1,49 +0,0 @@ -#include "WorkQueue.h" - -namespace Babylon -{ - WorkQueue::WorkQueue(std::function threadProcedure) - : m_thread{std::move(threadProcedure)} - { - } - - WorkQueue::~WorkQueue() - { - if (m_suspensionLock.has_value()) - { - Resume(); - } - - m_cancelSource.cancel(); - m_dispatcher.cancelled(); - - m_thread.join(); - } - - void WorkQueue::Suspend() - { - auto suspensionMutex = std::make_shared(); - m_suspensionLock.emplace(*suspensionMutex); - Append([suspensionMutex{std::move(suspensionMutex)}](Napi::Env) { - std::scoped_lock lock{*suspensionMutex}; - }); - } - - void WorkQueue::Resume() - { - m_suspensionLock.reset(); - } - - void WorkQueue::Run(Napi::Env env) - { - m_env = std::make_optional(env); - m_dispatcher.set_affinity(std::this_thread::get_id()); - - while (!m_cancelSource.cancelled()) - { - m_dispatcher.blocking_tick(m_cancelSource); - } - - m_dispatcher.clear(); - } -} diff --git a/Core/AppRuntime/Source/WorkQueue.h b/Core/AppRuntime/Source/WorkQueue.h deleted file mode 100644 index 95a504d7..00000000 --- a/Core/AppRuntime/Source/WorkQueue.h +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include -#include -#include - -#include -#include -#include - -namespace Babylon -{ - class WorkQueue - { - public: - WorkQueue(std::function threadProcedure); - ~WorkQueue(); - - template - void Append(CallableT callable) - { - // Manual dispatcher queueing requires a copyable CallableT, we use a shared pointer trick to make a - // copyable callable if necessary. - if constexpr (std::is_copy_constructible::value) - { - m_dispatcher.queue([this, callable = std::move(callable)]() { - Invoke(callable); - }); - } - else - { - m_dispatcher.queue([this, callablePtr = std::make_shared(std::move(callable))]() { - Invoke(*callablePtr); - }); - } - } - - void Suspend(); - void Resume(); - void Run(Napi::Env); - - private: - template - void Invoke(CallableT& callable) - { - callable(m_env.value()); - } - - std::optional m_env{}; - - std::optional> m_suspensionLock{}; - - arcana::cancellation_source m_cancelSource{}; - arcana::manual_dispatcher<128> m_dispatcher{}; - - std::thread m_thread; - }; -} diff --git a/Tests/UnitTests/Android/app/src/main/cpp/CMakeLists.txt b/Tests/UnitTests/Android/app/src/main/cpp/CMakeLists.txt index b513a866..5ebbc1ad 100644 --- a/Tests/UnitTests/Android/app/src/main/cpp/CMakeLists.txt +++ b/Tests/UnitTests/Android/app/src/main/cpp/CMakeLists.txt @@ -9,6 +9,7 @@ get_filename_component(UNIT_TESTS_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../../.." get_filename_component(TESTS_DIR "${UNIT_TESTS_DIR}/.." ABSOLUTE) get_filename_component(REPO_ROOT_DIR "${TESTS_DIR}/.." ABSOLUTE) +set(JSRUNTIMEHOST_TESTS ON CACHE BOOL "" FORCE) add_subdirectory(${REPO_ROOT_DIR} "${CMAKE_CURRENT_BINARY_DIR}/JsRuntimeHost") FetchContent_MakeAvailable_With_Message(googletest) diff --git a/Tests/UnitTests/Shared/Shared.cpp b/Tests/UnitTests/Shared/Shared.cpp index 1b9b27db..d118d510 100644 --- a/Tests/UnitTests/Shared/Shared.cpp +++ b/Tests/UnitTests/Shared/Shared.cpp @@ -11,8 +11,14 @@ #include #include #include +#ifdef ARCANA_TESTING_HOOKS +#include +#endif +#include +#include #include #include +#include namespace { @@ -125,6 +131,83 @@ TEST(Console, Log) done.get_future().get(); } +#ifdef ARCANA_TESTING_HOOKS +TEST(AppRuntime, DestroyDoesNotDeadlock) +{ + // Deterministic test for the race condition in the AppRuntime destructor. + // + // A global hook sleeps WHILE HOLDING the queue mutex, right before + // condition_variable::wait(). We synchronize so the worker is definitely + // in the hook before triggering destruction. + // + // Old (broken) code: cancel() + notify_all() fire without the mutex, + // so the notification is lost while the worker sleeps → deadlock. + // Fixed code: Append(cancel) calls push() which NEEDS the mutex, + // so it blocks until the worker enters wait() → notification delivered. + for (int i = 0; i < 3; i++) + { + // Shared state for hook synchronization + std::atomic hookEnabled{false}; + std::atomic hookSignaled{false}; + std::promise workerInHook; + + // Set the callback. It checks hookEnabled so we control + // when it actually sleeps. + arcana::set_before_wait_callback([&]() { + if (hookEnabled.load() && !hookSignaled.exchange(true)) + workerInHook.set_value(); + if (hookEnabled.load()) + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + }); + + auto runtime = std::make_unique(); + + // Dispatch work and wait for completion + std::promise ready; + runtime->Dispatch([&ready](Napi::Env) { + ready.set_value(); + }); + ready.get_future().wait(); + + // Enable the hook and dispatch a no-op to wake the worker, + // ensuring it cycles through the hook on its way back to idle + hookEnabled.store(true); + runtime->Dispatch([](Napi::Env) {}); + + // Wait for the worker to be in the hook (holding mutex, sleeping) + auto hookStatus = workerInHook.get_future().wait_for(std::chrono::seconds(5)); + ASSERT_NE(hookStatus, std::future_status::timeout) + << "Worker thread did not enter before-wait hook on iteration " << i; + + // Destroy on a detachable thread so the test doesn't hang if the + // destructor deadlocks (std::async's future destructor would block). + auto runtimePtr = std::make_shared>(std::move(runtime)); + std::promise destroyDone; + auto destroyFuture = destroyDone.get_future(); + std::thread destroyThread([runtimePtr, &destroyDone]() { + runtimePtr->reset(); + destroyDone.set_value(); + }); + + auto status = destroyFuture.wait_for(std::chrono::seconds(5)); + if (status == std::future_status::timeout) + { + destroyThread.detach(); + } + else + { + destroyThread.join(); + } + + // Reset hook + arcana::set_before_wait_callback([]() {}); + + ASSERT_NE(status, std::future_status::timeout) + << "Deadlock detected on iteration " << i; + } +} +#endif + int RunTests() { testing::InitGoogleTest(); diff --git a/Tests/UnitTests/Win32/App.cpp b/Tests/UnitTests/Win32/App.cpp index 84aaac46..7e60a6df 100644 --- a/Tests/UnitTests/Win32/App.cpp +++ b/Tests/UnitTests/Win32/App.cpp @@ -1,13 +1,15 @@ #include "../Shared/Shared.h" #include #include "Babylon/DebugTrace.h" +#include -int main() +int main(int argc, char** argv) { SetConsoleOutputCP(CP_UTF8); Babylon::DebugTrace::EnableDebugTrace(true); Babylon::DebugTrace::SetTraceOutput([](const char* trace) { OutputDebugStringA(trace); }); - return RunTests(); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); } From d53834b5f8632826bbe7cfcc7dd88c43c11db52d Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Thu, 2 Apr 2026 16:59:41 -0700 Subject: [PATCH 02/13] Update arcana.cpp to upstream after microsoft/arcana.cpp#59 merged Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a35de88a..93d88c55 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,8 +14,8 @@ FetchContent_Declare(AndroidExtensions GIT_TAG 2d5af72259cc73e5f249d3c99bee2010be9cb042 EXCLUDE_FROM_ALL) FetchContent_Declare(arcana.cpp - GIT_REPOSITORY https://github.com/bghgary/arcana.cpp.git - GIT_TAG 8328898f159c1778cb16fa41c7ad57994961e5e8 + GIT_REPOSITORY https://github.com/microsoft/arcana.cpp.git + GIT_TAG d5dd03cd66e7ecd0e4b94e35e4fdd7d86aef5b5f EXCLUDE_FROM_ALL) FetchContent_Declare(asio GIT_REPOSITORY https://github.com/chriskohlhoff/asio.git From 1b40f9c3b9ae74ad03ab81ba913252d8c0565f7b Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Thu, 2 Apr 2026 17:17:20 -0700 Subject: [PATCH 03/13] Fix arcana.cpp commit SHA to correct squash merge commit Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 93d88c55..116efa6b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ FetchContent_Declare(AndroidExtensions EXCLUDE_FROM_ALL) FetchContent_Declare(arcana.cpp GIT_REPOSITORY https://github.com/microsoft/arcana.cpp.git - GIT_TAG d5dd03cd66e7ecd0e4b94e35e4fdd7d86aef5b5f + GIT_TAG d5dd03cc6dd138fc17c277f61abbe2bc388444af EXCLUDE_FROM_ALL) FetchContent_Declare(asio GIT_REPOSITORY https://github.com/chriskohlhoff/asio.git From bed656bac1380b14d8cacdaebc7f08a6771d7e6f Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Tue, 7 Apr 2026 13:07:08 -0700 Subject: [PATCH 04/13] Simplify deadlock test to single iteration (deterministic, no loop needed) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Tests/UnitTests/Shared/Shared.cpp | 110 +++++++++++++++--------------- 1 file changed, 54 insertions(+), 56 deletions(-) diff --git a/Tests/UnitTests/Shared/Shared.cpp b/Tests/UnitTests/Shared/Shared.cpp index d118d510..cb540217 100644 --- a/Tests/UnitTests/Shared/Shared.cpp +++ b/Tests/UnitTests/Shared/Shared.cpp @@ -144,67 +144,65 @@ TEST(AppRuntime, DestroyDoesNotDeadlock) // so the notification is lost while the worker sleeps → deadlock. // Fixed code: Append(cancel) calls push() which NEEDS the mutex, // so it blocks until the worker enters wait() → notification delivered. - for (int i = 0; i < 3; i++) - { - // Shared state for hook synchronization - std::atomic hookEnabled{false}; - std::atomic hookSignaled{false}; - std::promise workerInHook; - - // Set the callback. It checks hookEnabled so we control - // when it actually sleeps. - arcana::set_before_wait_callback([&]() { - if (hookEnabled.load() && !hookSignaled.exchange(true)) - workerInHook.set_value(); - if (hookEnabled.load()) - std::this_thread::sleep_for(std::chrono::milliseconds(200)); - }); - auto runtime = std::make_unique(); + // Shared state for hook synchronization + std::atomic hookEnabled{false}; + std::atomic hookSignaled{false}; + std::promise workerInHook; + + // Set the callback. It checks hookEnabled so we control + // when it actually sleeps. + arcana::set_before_wait_callback([&]() { + if (hookEnabled.load() && !hookSignaled.exchange(true)) + workerInHook.set_value(); + if (hookEnabled.load()) + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + }); - // Dispatch work and wait for completion - std::promise ready; - runtime->Dispatch([&ready](Napi::Env) { - ready.set_value(); - }); - ready.get_future().wait(); - - // Enable the hook and dispatch a no-op to wake the worker, - // ensuring it cycles through the hook on its way back to idle - hookEnabled.store(true); - runtime->Dispatch([](Napi::Env) {}); - - // Wait for the worker to be in the hook (holding mutex, sleeping) - auto hookStatus = workerInHook.get_future().wait_for(std::chrono::seconds(5)); - ASSERT_NE(hookStatus, std::future_status::timeout) - << "Worker thread did not enter before-wait hook on iteration " << i; - - // Destroy on a detachable thread so the test doesn't hang if the - // destructor deadlocks (std::async's future destructor would block). - auto runtimePtr = std::make_shared>(std::move(runtime)); - std::promise destroyDone; - auto destroyFuture = destroyDone.get_future(); - std::thread destroyThread([runtimePtr, &destroyDone]() { - runtimePtr->reset(); - destroyDone.set_value(); - }); + auto runtime = std::make_unique(); - auto status = destroyFuture.wait_for(std::chrono::seconds(5)); - if (status == std::future_status::timeout) - { - destroyThread.detach(); - } - else - { - destroyThread.join(); - } - - // Reset hook - arcana::set_before_wait_callback([]() {}); + // Dispatch work and wait for completion + std::promise ready; + runtime->Dispatch([&ready](Napi::Env) { + ready.set_value(); + }); + ready.get_future().wait(); + + // Enable the hook and dispatch a no-op to wake the worker, + // ensuring it cycles through the hook on its way back to idle + hookEnabled.store(true); + runtime->Dispatch([](Napi::Env) {}); + + // Wait for the worker to be in the hook (holding mutex, sleeping) + auto hookStatus = workerInHook.get_future().wait_for(std::chrono::seconds(5)); + ASSERT_NE(hookStatus, std::future_status::timeout) + << "Worker thread did not enter before-wait hook"; + + // Destroy on a detachable thread so the test doesn't hang if the + // destructor deadlocks (std::async's future destructor would block). + auto runtimePtr = std::make_shared>(std::move(runtime)); + std::promise destroyDone; + auto destroyFuture = destroyDone.get_future(); + std::thread destroyThread([runtimePtr, &destroyDone]() { + runtimePtr->reset(); + destroyDone.set_value(); + }); - ASSERT_NE(status, std::future_status::timeout) - << "Deadlock detected on iteration " << i; + auto status = destroyFuture.wait_for(std::chrono::seconds(5)); + if (status == std::future_status::timeout) + { + destroyThread.detach(); } + else + { + destroyThread.join(); + } + + // Reset hook + arcana::set_before_wait_callback([]() {}); + + ASSERT_NE(status, std::future_status::timeout) + << "Deadlock detected: AppRuntime destructor did not complete within 5 seconds"; } #endif From 9789c1e6b31046633cf16bffe45d5be602c32b5d Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Tue, 7 Apr 2026 13:33:14 -0700 Subject: [PATCH 05/13] Fix WorkQueue destructor deadlock by dispatching cancellation as work item The old destructor called cancel() + notify_all() from the main thread without the queue mutex. If the worker thread hadn't entered condition_variable::wait() yet, the notification was lost and join() hung forever. The fix dispatches cancellation as a work item via Append(). Since push() acquires the same mutex that wait() releases, the notification cannot be lost. Also fixes member declaration order in AppRuntime.h so m_options outlives m_workQueue during destruction. Includes a deterministic regression test using arcana testing hooks. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Core/AppRuntime/CMakeLists.txt | 4 +- Core/AppRuntime/Include/Babylon/AppRuntime.h | 31 +---------- Core/AppRuntime/Source/AppRuntime.cpp | 40 +++----------- Core/AppRuntime/Source/AppRuntime_JSI.cpp | 14 +++-- Core/AppRuntime/Source/WorkQueue.cpp | 53 ++++++++++++++++++ Core/AppRuntime/Source/WorkQueue.h | 58 ++++++++++++++++++++ 6 files changed, 132 insertions(+), 68 deletions(-) create mode 100644 Core/AppRuntime/Source/WorkQueue.cpp create mode 100644 Core/AppRuntime/Source/WorkQueue.h diff --git a/Core/AppRuntime/CMakeLists.txt b/Core/AppRuntime/CMakeLists.txt index ba671233..ad8f670d 100644 --- a/Core/AppRuntime/CMakeLists.txt +++ b/Core/AppRuntime/CMakeLists.txt @@ -9,7 +9,9 @@ set(SOURCES "Include/Babylon/AppRuntime.h" "Source/AppRuntime.cpp" "Source/AppRuntime_${NAPI_JAVASCRIPT_ENGINE}.cpp" - "Source/AppRuntime_${JSRUNTIMEHOST_PLATFORM}.${IMPL_EXT}") + "Source/AppRuntime_${JSRUNTIMEHOST_PLATFORM}.${IMPL_EXT}" + "Source/WorkQueue.cpp" + "Source/WorkQueue.h") add_library(AppRuntime ${SOURCES}) warnings_as_errors(AppRuntime) diff --git a/Core/AppRuntime/Include/Babylon/AppRuntime.h b/Core/AppRuntime/Include/Babylon/AppRuntime.h index 6e595c81..ba0ffa71 100644 --- a/Core/AppRuntime/Include/Babylon/AppRuntime.h +++ b/Core/AppRuntime/Include/Babylon/AppRuntime.h @@ -6,18 +6,14 @@ #include -#include - #include #include #include -#include -#include -#include -#include namespace Babylon { + class WorkQueue; + class AppRuntime final { public: @@ -47,23 +43,6 @@ namespace Babylon static void BABYLON_API DefaultUnhandledExceptionHandler(const Napi::Error& error); private: - template - void Append(CallableT callable) - { - if constexpr (std::is_copy_constructible::value) - { - m_dispatcher.queue([this, callable = std::move(callable)]() { - callable(m_env.value()); - }); - } - else - { - m_dispatcher.queue([this, callablePtr = std::make_shared(std::move(callable))]() { - (*callablePtr)(m_env.value()); - }); - } - } - // These three methods are the mechanism by which platform- and JavaScript-specific // code can be "injected" into the execution of the JavaScript thread. These three // functions are implemented in separate files, thus allowing implementations to be @@ -83,10 +62,6 @@ namespace Babylon void Execute(Dispatchable callback); Options m_options; - std::optional m_env{}; - std::optional> m_suspensionLock{}; - arcana::cancellation_source m_cancelSource{}; - arcana::manual_dispatcher<128> m_dispatcher{}; - std::thread m_thread; + std::unique_ptr m_workQueue; }; } diff --git a/Core/AppRuntime/Source/AppRuntime.cpp b/Core/AppRuntime/Source/AppRuntime.cpp index f05115fd..a6d89492 100644 --- a/Core/AppRuntime/Source/AppRuntime.cpp +++ b/Core/AppRuntime/Source/AppRuntime.cpp @@ -1,4 +1,5 @@ #include "AppRuntime.h" +#include "WorkQueue.h" #include namespace Babylon @@ -9,8 +10,8 @@ namespace Babylon } AppRuntime::AppRuntime(Options options) - : m_options{std::move(options)} - , m_thread{[this] { RunPlatformTier(); }} + : m_workQueue{std::make_unique([this] { RunPlatformTier(); })} + , m_options{std::move(options)} { Dispatch([this](Napi::Env env) { JsRuntime::CreateForJavaScript(env, [this](auto func) { Dispatch(std::move(func)); }); @@ -19,53 +20,26 @@ namespace Babylon AppRuntime::~AppRuntime() { - if (m_suspensionLock.has_value()) - { - m_suspensionLock.reset(); - } - - // Dispatch cancellation as a work item so the worker thread processes it - // naturally via blocking_tick, avoiding the race condition where an external - // cancel signal can be missed by the condition variable wait. - Append([this](Napi::Env) { - m_cancelSource.cancel(); - }); - - m_thread.join(); } void AppRuntime::Run(Napi::Env env) { - m_env = std::make_optional(env); - - m_dispatcher.set_affinity(std::this_thread::get_id()); - - while (!m_cancelSource.cancelled()) - { - m_dispatcher.blocking_tick(m_cancelSource); - } - - // The dispatcher can be non-empty if something is dispatched after cancellation. - m_dispatcher.clear(); + m_workQueue->Run(env); } void AppRuntime::Suspend() { - auto suspensionMutex = std::make_shared(); - m_suspensionLock.emplace(*suspensionMutex); - Append([suspensionMutex{std::move(suspensionMutex)}](Napi::Env) { - std::scoped_lock lock{*suspensionMutex}; - }); + m_workQueue->Suspend(); } void AppRuntime::Resume() { - m_suspensionLock.reset(); + m_workQueue->Resume(); } void AppRuntime::Dispatch(Dispatchable func) { - Append([this, func{std::move(func)}](Napi::Env env) mutable { + m_workQueue->Append([this, func{std::move(func)}](Napi::Env env) mutable { Execute([this, env, func{std::move(func)}]() mutable { try { diff --git a/Core/AppRuntime/Source/AppRuntime_JSI.cpp b/Core/AppRuntime/Source/AppRuntime_JSI.cpp index b9a4f09b..020bf83f 100644 --- a/Core/AppRuntime/Source/AppRuntime_JSI.cpp +++ b/Core/AppRuntime/Source/AppRuntime_JSI.cpp @@ -1,4 +1,5 @@ #include "AppRuntime.h" +#include "WorkQueue.h" #include #include @@ -9,15 +10,16 @@ namespace class TaskRunnerAdapter : public v8runtime::JSITaskRunner { public: - TaskRunnerAdapter(Babylon::AppRuntime& appRuntime) - : m_appRuntime(appRuntime) + TaskRunnerAdapter(Babylon::WorkQueue& workQueue) + : m_workQueue(workQueue) { } void postTask(std::unique_ptr task) override { - m_appRuntime.Dispatch([task = std::shared_ptr(std::move(task))](Napi::Env) { - task->run(); + std::shared_ptr shared_task(task.release()); + m_workQueue.Append([shared_task2 = std::move(shared_task)](Napi::Env) { + shared_task2->run(); }); } @@ -25,7 +27,7 @@ namespace TaskRunnerAdapter(const TaskRunnerAdapter&) = delete; TaskRunnerAdapter& operator=(const TaskRunnerAdapter&) = delete; - Babylon::AppRuntime& m_appRuntime; + Babylon::WorkQueue& m_workQueue; }; } @@ -35,7 +37,7 @@ namespace Babylon { v8runtime::V8RuntimeArgs args{}; args.inspectorPort = 5643; - args.foreground_task_runner = std::make_shared(*this); + args.foreground_task_runner = std::make_shared(*m_workQueue); const auto runtime = v8runtime::makeV8Runtime(std::move(args)); const auto env = Napi::Attach(*runtime); diff --git a/Core/AppRuntime/Source/WorkQueue.cpp b/Core/AppRuntime/Source/WorkQueue.cpp new file mode 100644 index 00000000..fd0ea685 --- /dev/null +++ b/Core/AppRuntime/Source/WorkQueue.cpp @@ -0,0 +1,53 @@ +#include "WorkQueue.h" + +namespace Babylon +{ + WorkQueue::WorkQueue(std::function threadProcedure) + : m_thread{std::move(threadProcedure)} + { + } + + WorkQueue::~WorkQueue() + { + if (m_suspensionLock.has_value()) + { + Resume(); + } + + // Dispatch cancellation as a work item so the worker thread processes + // it naturally via blocking_tick, avoiding the race condition where an + // external cancel+notify can be missed by condition_variable::wait. + Append([this](Napi::Env) { + m_cancelSource.cancel(); + }); + + m_thread.join(); + } + + void WorkQueue::Suspend() + { + auto suspensionMutex = std::make_shared(); + m_suspensionLock.emplace(*suspensionMutex); + Append([suspensionMutex{std::move(suspensionMutex)}](Napi::Env) { + std::scoped_lock lock{*suspensionMutex}; + }); + } + + void WorkQueue::Resume() + { + m_suspensionLock.reset(); + } + + void WorkQueue::Run(Napi::Env env) + { + m_env = std::make_optional(env); + m_dispatcher.set_affinity(std::this_thread::get_id()); + + while (!m_cancelSource.cancelled()) + { + m_dispatcher.blocking_tick(m_cancelSource); + } + + m_dispatcher.clear(); + } +} diff --git a/Core/AppRuntime/Source/WorkQueue.h b/Core/AppRuntime/Source/WorkQueue.h new file mode 100644 index 00000000..95a504d7 --- /dev/null +++ b/Core/AppRuntime/Source/WorkQueue.h @@ -0,0 +1,58 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace Babylon +{ + class WorkQueue + { + public: + WorkQueue(std::function threadProcedure); + ~WorkQueue(); + + template + void Append(CallableT callable) + { + // Manual dispatcher queueing requires a copyable CallableT, we use a shared pointer trick to make a + // copyable callable if necessary. + if constexpr (std::is_copy_constructible::value) + { + m_dispatcher.queue([this, callable = std::move(callable)]() { + Invoke(callable); + }); + } + else + { + m_dispatcher.queue([this, callablePtr = std::make_shared(std::move(callable))]() { + Invoke(*callablePtr); + }); + } + } + + void Suspend(); + void Resume(); + void Run(Napi::Env); + + private: + template + void Invoke(CallableT& callable) + { + callable(m_env.value()); + } + + std::optional m_env{}; + + std::optional> m_suspensionLock{}; + + arcana::cancellation_source m_cancelSource{}; + arcana::manual_dispatcher<128> m_dispatcher{}; + + std::thread m_thread; + }; +} From 101d1e52bbe4917b826209f397d59498648cf8f6 Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Tue, 7 Apr 2026 13:54:01 -0700 Subject: [PATCH 06/13] Fix constructor init order to match member declaration order Clang/GCC with -Werror,-Wreorder-ctor requires the initializer list to match the member declaration order. m_options is now declared before m_workQueue, so initialize it first. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Core/AppRuntime/Source/AppRuntime.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Core/AppRuntime/Source/AppRuntime.cpp b/Core/AppRuntime/Source/AppRuntime.cpp index a6d89492..b3a90b1b 100644 --- a/Core/AppRuntime/Source/AppRuntime.cpp +++ b/Core/AppRuntime/Source/AppRuntime.cpp @@ -10,8 +10,8 @@ namespace Babylon } AppRuntime::AppRuntime(Options options) - : m_workQueue{std::make_unique([this] { RunPlatformTier(); })} - , m_options{std::move(options)} + : m_options{std::move(options)} + , m_workQueue{std::make_unique([this] { RunPlatformTier(); })} { Dispatch([this](Napi::Env env) { JsRuntime::CreateForJavaScript(env, [this](auto func) { Dispatch(std::move(func)); }); From f9194d327d13a7226394f1c0f0da503fb0b9c12f Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Tue, 7 Apr 2026 14:06:09 -0700 Subject: [PATCH 07/13] Remove redundant ARCANA_TESTING_HOOKS guards from test code The define is always set when tests are built, so the #ifdef guards in Shared.cpp are unnecessary noise. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Tests/UnitTests/Shared/Shared.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Tests/UnitTests/Shared/Shared.cpp b/Tests/UnitTests/Shared/Shared.cpp index cb540217..fe502c47 100644 --- a/Tests/UnitTests/Shared/Shared.cpp +++ b/Tests/UnitTests/Shared/Shared.cpp @@ -11,9 +11,7 @@ #include #include #include -#ifdef ARCANA_TESTING_HOOKS #include -#endif #include #include #include @@ -131,7 +129,6 @@ TEST(Console, Log) done.get_future().get(); } -#ifdef ARCANA_TESTING_HOOKS TEST(AppRuntime, DestroyDoesNotDeadlock) { // Deterministic test for the race condition in the AppRuntime destructor. @@ -204,7 +201,6 @@ TEST(AppRuntime, DestroyDoesNotDeadlock) ASSERT_NE(status, std::future_status::timeout) << "Deadlock detected: AppRuntime destructor did not complete within 5 seconds"; } -#endif int RunTests() { From 8d27abab4213b358308ff596bfe76a2d6e9ffb49 Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Tue, 7 Apr 2026 14:10:54 -0700 Subject: [PATCH 08/13] Address review: cancel immediately + no-op wake, cleanup on all paths - Destructor now calls cancel() immediately then appends a no-op to wake the worker, preserving prompt shutdown semantics. - Test cleanup (hook reset, detachable destroy) runs on all paths including hook timeout, preventing hangs from scope-exit destruction. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Core/AppRuntime/Source/WorkQueue.cpp | 12 +++--- Tests/UnitTests/Shared/Shared.cpp | 56 +++++++++++++++------------- 2 files changed, 36 insertions(+), 32 deletions(-) diff --git a/Core/AppRuntime/Source/WorkQueue.cpp b/Core/AppRuntime/Source/WorkQueue.cpp index fd0ea685..194a176d 100644 --- a/Core/AppRuntime/Source/WorkQueue.cpp +++ b/Core/AppRuntime/Source/WorkQueue.cpp @@ -14,12 +14,12 @@ namespace Babylon Resume(); } - // Dispatch cancellation as a work item so the worker thread processes - // it naturally via blocking_tick, avoiding the race condition where an - // external cancel+notify can be missed by condition_variable::wait. - Append([this](Napi::Env) { - m_cancelSource.cancel(); - }); + // Cancel immediately so pending work is dropped promptly, then append + // a no-op work item to wake the worker thread from blocking_tick. The + // no-op goes through push() which acquires the queue mutex, avoiding + // the race where a bare notify_all() can be missed by wait(). + m_cancelSource.cancel(); + Append([](Napi::Env) {}); m_thread.join(); } diff --git a/Tests/UnitTests/Shared/Shared.cpp b/Tests/UnitTests/Shared/Shared.cpp index fe502c47..a3eb7057 100644 --- a/Tests/UnitTests/Shared/Shared.cpp +++ b/Tests/UnitTests/Shared/Shared.cpp @@ -139,7 +139,7 @@ TEST(AppRuntime, DestroyDoesNotDeadlock) // // Old (broken) code: cancel() + notify_all() fire without the mutex, // so the notification is lost while the worker sleeps → deadlock. - // Fixed code: Append(cancel) calls push() which NEEDS the mutex, + // Fixed code: cancel() + Append(no-op), where push() NEEDS the mutex, // so it blocks until the worker enters wait() → notification delivered. // Shared state for hook synchronization @@ -156,11 +156,13 @@ TEST(AppRuntime, DestroyDoesNotDeadlock) std::this_thread::sleep_for(std::chrono::milliseconds(200)); }); - auto runtime = std::make_unique(); + // Use shared_ptr so we can move it to a detachable destroy thread + auto runtime = std::make_shared>( + std::make_unique()); // Dispatch work and wait for completion std::promise ready; - runtime->Dispatch([&ready](Napi::Env) { + (*runtime)->Dispatch([&ready](Napi::Env) { ready.set_value(); }); ready.get_future().wait(); @@ -168,36 +170,38 @@ TEST(AppRuntime, DestroyDoesNotDeadlock) // Enable the hook and dispatch a no-op to wake the worker, // ensuring it cycles through the hook on its way back to idle hookEnabled.store(true); - runtime->Dispatch([](Napi::Env) {}); + (*runtime)->Dispatch([](Napi::Env) {}); // Wait for the worker to be in the hook (holding mutex, sleeping) auto hookStatus = workerInHook.get_future().wait_for(std::chrono::seconds(5)); - ASSERT_NE(hookStatus, std::future_status::timeout) - << "Worker thread did not enter before-wait hook"; - - // Destroy on a detachable thread so the test doesn't hang if the - // destructor deadlocks (std::async's future destructor would block). - auto runtimePtr = std::make_shared>(std::move(runtime)); - std::promise destroyDone; - auto destroyFuture = destroyDone.get_future(); - std::thread destroyThread([runtimePtr, &destroyDone]() { - runtimePtr->reset(); - destroyDone.set_value(); - }); - auto status = destroyFuture.wait_for(std::chrono::seconds(5)); - if (status == std::future_status::timeout) - { - destroyThread.detach(); - } - else + // Always clean up, even if the hook didn't fire + auto cleanup = [&]() { + // Destroy on a detachable thread so we don't hang if the destructor deadlocks + std::promise destroyDone; + auto destroyFuture = destroyDone.get_future(); + std::thread destroyThread([runtime, &destroyDone]() { + runtime->reset(); + destroyDone.set_value(); + }); + + auto status = destroyFuture.wait_for(std::chrono::seconds(5)); + if (status == std::future_status::timeout) + destroyThread.detach(); + else + destroyThread.join(); + + arcana::set_before_wait_callback([]() {}); + return status; + }; + + if (hookStatus == std::future_status::timeout) { - destroyThread.join(); + cleanup(); + FAIL() << "Worker thread did not enter before-wait hook"; } - // Reset hook - arcana::set_before_wait_callback([]() {}); - + auto status = cleanup(); ASSERT_NE(status, std::future_status::timeout) << "Deadlock detected: AppRuntime destructor did not complete within 5 seconds"; } From 209b72c0eca6255711424054b582b8f7b136495c Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Tue, 7 Apr 2026 14:29:03 -0700 Subject: [PATCH 09/13] Add ARCANA_TESTING_HOOKS to Android test target The global add_compile_definitions in the subdirectory scope doesn't apply to UnitTestsJNI which is defined in the parent Android CMakeLists. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Tests/UnitTests/Android/app/src/main/cpp/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Tests/UnitTests/Android/app/src/main/cpp/CMakeLists.txt b/Tests/UnitTests/Android/app/src/main/cpp/CMakeLists.txt index 5ebbc1ad..ac5032ed 100644 --- a/Tests/UnitTests/Android/app/src/main/cpp/CMakeLists.txt +++ b/Tests/UnitTests/Android/app/src/main/cpp/CMakeLists.txt @@ -22,6 +22,7 @@ add_library(UnitTestsJNI SHARED ${UNIT_TESTS_DIR}/Shared/Shared.cpp) target_compile_definitions(UnitTestsJNI PRIVATE JSRUNTIMEHOST_PLATFORM="${JSRUNTIMEHOST_PLATFORM}") +target_compile_definitions(UnitTestsJNI PRIVATE ARCANA_TESTING_HOOKS) target_include_directories(UnitTestsJNI PRIVATE ${UNIT_TESTS_DIR}) From 2c3a1be2aa25009201a48b8fa05e88e4c16a6037 Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Tue, 7 Apr 2026 15:48:28 -0700 Subject: [PATCH 10/13] Retrigger CI (previous failures were WebSocket flake + Android emulator issue) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> From 74cd297ddc6120c344e01d1b7dbf9972dc91b269 Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Tue, 7 Apr 2026 16:08:25 -0700 Subject: [PATCH 11/13] Add note about incomplete shutdown handling in destructor comment Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Core/AppRuntime/Source/WorkQueue.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Core/AppRuntime/Source/WorkQueue.cpp b/Core/AppRuntime/Source/WorkQueue.cpp index 194a176d..8ff5efe1 100644 --- a/Core/AppRuntime/Source/WorkQueue.cpp +++ b/Core/AppRuntime/Source/WorkQueue.cpp @@ -18,6 +18,11 @@ namespace Babylon // a no-op work item to wake the worker thread from blocking_tick. The // no-op goes through push() which acquires the queue mutex, avoiding // the race where a bare notify_all() can be missed by wait(). + // + // NOTE: This preserves the existing shutdown behavior where pending + // callbacks are dropped on cancellation. A more complete solution + // would add cooperative shutdown (e.g. NotifyDisposing/Rundown) so + // consumers can finish cleanup work before the runtime is destroyed. m_cancelSource.cancel(); Append([](Napi::Env) {}); From 42f9dc7b11b14c9f0051bf9eca23c34081616a85 Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Tue, 7 Apr 2026 16:11:01 -0700 Subject: [PATCH 12/13] Simplify test: only use detachable thread when hook actually fired MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the hook didn't fire, there's no deadlock risk — the runtime destructs normally on scope exit. The detachable thread is only needed when the worker is confirmed stuck in the hook. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Tests/UnitTests/Shared/Shared.cpp | 51 ++++++++++++++----------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/Tests/UnitTests/Shared/Shared.cpp b/Tests/UnitTests/Shared/Shared.cpp index a3eb7057..45fe98cf 100644 --- a/Tests/UnitTests/Shared/Shared.cpp +++ b/Tests/UnitTests/Shared/Shared.cpp @@ -156,13 +156,11 @@ TEST(AppRuntime, DestroyDoesNotDeadlock) std::this_thread::sleep_for(std::chrono::milliseconds(200)); }); - // Use shared_ptr so we can move it to a detachable destroy thread - auto runtime = std::make_shared>( - std::make_unique()); + auto runtime = std::make_unique(); // Dispatch work and wait for completion std::promise ready; - (*runtime)->Dispatch([&ready](Napi::Env) { + runtime->Dispatch([&ready](Napi::Env) { ready.set_value(); }); ready.get_future().wait(); @@ -170,38 +168,35 @@ TEST(AppRuntime, DestroyDoesNotDeadlock) // Enable the hook and dispatch a no-op to wake the worker, // ensuring it cycles through the hook on its way back to idle hookEnabled.store(true); - (*runtime)->Dispatch([](Napi::Env) {}); + runtime->Dispatch([](Napi::Env) {}); // Wait for the worker to be in the hook (holding mutex, sleeping) auto hookStatus = workerInHook.get_future().wait_for(std::chrono::seconds(5)); - - // Always clean up, even if the hook didn't fire - auto cleanup = [&]() { - // Destroy on a detachable thread so we don't hang if the destructor deadlocks - std::promise destroyDone; - auto destroyFuture = destroyDone.get_future(); - std::thread destroyThread([runtime, &destroyDone]() { - runtime->reset(); - destroyDone.set_value(); - }); - - auto status = destroyFuture.wait_for(std::chrono::seconds(5)); - if (status == std::future_status::timeout) - destroyThread.detach(); - else - destroyThread.join(); - - arcana::set_before_wait_callback([]() {}); - return status; - }; - if (hookStatus == std::future_status::timeout) { - cleanup(); + // Hook didn't fire — no deadlock risk, clean up normally + arcana::set_before_wait_callback([]() {}); FAIL() << "Worker thread did not enter before-wait hook"; } - auto status = cleanup(); + // Worker is in the hook (holding mutex, sleeping). Destroy on a + // detachable thread so the test doesn't hang if the destructor deadlocks. + auto runtimePtr = std::make_shared>(std::move(runtime)); + std::promise destroyDone; + auto destroyFuture = destroyDone.get_future(); + std::thread destroyThread([runtimePtr, &destroyDone]() { + runtimePtr->reset(); + destroyDone.set_value(); + }); + + auto status = destroyFuture.wait_for(std::chrono::seconds(5)); + if (status == std::future_status::timeout) + destroyThread.detach(); + else + destroyThread.join(); + + arcana::set_before_wait_callback([]() {}); + ASSERT_NE(status, std::future_status::timeout) << "Deadlock detected: AppRuntime destructor did not complete within 5 seconds"; } From d14e998a11096543a84e9afa72207d687fff41b7 Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Tue, 7 Apr 2026 16:13:20 -0700 Subject: [PATCH 13/13] Add braces to single-line if statements Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Tests/UnitTests/Shared/Shared.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Tests/UnitTests/Shared/Shared.cpp b/Tests/UnitTests/Shared/Shared.cpp index 45fe98cf..e09da4fd 100644 --- a/Tests/UnitTests/Shared/Shared.cpp +++ b/Tests/UnitTests/Shared/Shared.cpp @@ -151,9 +151,13 @@ TEST(AppRuntime, DestroyDoesNotDeadlock) // when it actually sleeps. arcana::set_before_wait_callback([&]() { if (hookEnabled.load() && !hookSignaled.exchange(true)) + { workerInHook.set_value(); + } if (hookEnabled.load()) + { std::this_thread::sleep_for(std::chrono::milliseconds(200)); + } }); auto runtime = std::make_unique(); @@ -191,9 +195,13 @@ TEST(AppRuntime, DestroyDoesNotDeadlock) auto status = destroyFuture.wait_for(std::chrono::seconds(5)); if (status == std::future_status::timeout) + { destroyThread.detach(); + } else + { destroyThread.join(); + } arcana::set_before_wait_callback([]() {});