diff --git a/src/header/TransferBench.hpp b/src/header/TransferBench.hpp
index 5c16a78..4a841e7 100644
--- a/src/header/TransferBench.hpp
+++ b/src/header/TransferBench.hpp
@@ -1495,6 +1495,15 @@ namespace {
       deviceIdx = GetClosestCpuNumaToGpu(memDevice.memIndex);
     }
 
+    if (IsCpuMemType(memType)) {
+      // Set NUMA policy prior to call to hipHostMalloc
+      numa_set_preferred(deviceIdx);
+    } else if (IsGpuMemType(memType)) {
+      // Switch to the appropriate GPU
+      // IMP: if the remapping above changes, remember to modify this!
+      ERR_CHECK(hipSetDevice(deviceIdx));
+    }
+
     // If memHandle is provided, allocate sharable memory
     if (memHandle != NULL) {
 #ifdef POD_COMM_ENABLED
@@ -1532,13 +1541,14 @@ namespace {
         memset(*memPtr, 0, roundedUpBytes);
         // Check that the allocated pages are actually on the correct NUMA node
         ERR_CHECK(CheckPages((char*)*memPtr, roundedUpBytes, deviceIdx));
+        numa_set_preferred(-1);
       } else if (IsGpuMemType(memType)) {
-        ERR_CHECK(hipSetDevice(memDevice.memIndex));
         ERR_CHECK(hipMemset(*memPtr, 0, numBytes));
         ERR_CHECK(hipDeviceSynchronize());
       }
       return ERR_NONE;
 #else
+      if (IsCpuMemType(memType)) numa_set_preferred(-1);
       return {ERR_FATAL, "Unable to allocate sharable memory if not compiled with pod communication support"};
 #endif
     } else {
@@ -1547,9 +1557,6 @@ namespace {
 
     if (IsCpuMemType(memType)) {
 
-      // Set NUMA policy prior to call to hipHostMalloc
-      numa_set_preferred(deviceIdx);
-
       // Allocate host-pinned memory (should respect NUMA mem policy)
       int flags = 0;
 #if !defined (__NVCC__)
@@ -1590,8 +1597,6 @@ namespace {
       // Reset to default numa mem policy
       numa_set_preferred(-1);
     } else if (IsGpuMemType(memType)) {
-      // Switch to the appropriate GPU
-      ERR_CHECK(hipSetDevice(memDevice.memIndex));
 
       if (memType == MEM_GPU) {
         // Allocate GPU memory on appropriate device