diff --git a/src/header/TransferBench.hpp b/src/header/TransferBench.hpp index 5c16a78..4a841e7 100644 --- a/src/header/TransferBench.hpp +++ b/src/header/TransferBench.hpp @@ -1495,6 +1495,15 @@ namespace { deviceIdx = GetClosestCpuNumaToGpu(memDevice.memIndex); } + if (IsCpuMemType(memType)) { + // Set NUMA policy prior to call to hipHostMalloc + numa_set_preferred(deviceIdx); + } else if (IsGpuMemType(memType)) { + // Switch to the appropriate GPU + // IMP: if the remapping above changes, remember to modify this! + ERR_CHECK(hipSetDevice(deviceIdx)); + } + // If memHandle is provided, allocate sharable memory if (memHandle != NULL) { #ifdef POD_COMM_ENABLED @@ -1532,13 +1541,14 @@ namespace { memset(*memPtr, 0, roundedUpBytes); // Check that the allocated pages are actually on the correct NUMA node ERR_CHECK(CheckPages((char*)*memPtr, roundedUpBytes, deviceIdx)); + numa_set_preferred(-1); } else if (IsGpuMemType(memType)) { - ERR_CHECK(hipSetDevice(memDevice.memIndex)); ERR_CHECK(hipMemset(*memPtr, 0, numBytes)); ERR_CHECK(hipDeviceSynchronize()); } return ERR_NONE; #else + if (IsCpuMemType(memType)) numa_set_preferred(-1); return {ERR_FATAL, "Unable to allocate sharable memory if not compiled with pod communication support"}; #endif } else { @@ -1547,9 +1557,6 @@ namespace { if (IsCpuMemType(memType)) { - // Set NUMA policy prior to call to hipHostMalloc - numa_set_preferred(deviceIdx); - // Allocate host-pinned memory (should respect NUMA mem policy) int flags = 0; #if !defined (__NVCC__) @@ -1590,8 +1597,6 @@ namespace { // Reset to default numa mem policy numa_set_preferred(-1); } else if (IsGpuMemType(memType)) { - // Switch to the appropriate GPU - ERR_CHECK(hipSetDevice(memDevice.memIndex)); if (memType == MEM_GPU) { // Allocate GPU memory on appropriate device