From de5de1d6a63d33f1bcccaa058c5ac7b4a2e5420d Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Sun, 26 Apr 2026 23:57:51 -0400
Subject: [PATCH 01/18] Add MergeDataSegments pass

---
 src/passes/CMakeLists.txt        |   1 +
 src/passes/MergeDataSegments.cpp | 557 +++++++++++++++++++++++++++++++
 src/passes/pass.cpp              |   3 +
 src/passes/passes.h              |   1 +
 4 files changed, 562 insertions(+)
 create mode 100644 src/passes/MergeDataSegments.cpp
diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt
index c2952e174b8..d03d26fe47a 100644
--- a/src/passes/CMakeLists.txt
+++ b/src/passes/CMakeLists.txt
@@ -68,6 +68,7 @@ set(passes_SOURCES
   Memory64Lowering.cpp
   MemoryPacking.cpp
   MergeBlocks.cpp
+  MergeDataSegments.cpp
   MergeSimilarFunctions.cpp
   MergeLocals.cpp
   Metrics.cpp
diff --git a/src/passes/MergeDataSegments.cpp b/src/passes/MergeDataSegments.cpp
new file mode 100644
index 00000000000..3d99d327fc1
--- /dev/null
+++ b/src/passes/MergeDataSegments.cpp
@@ -0,0 +1,557 @@
+/*
+ * Copyright 2026 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Merges adjacent active data segments into a single data segment. The name of
+// the merged segment is the name of the input segment with the lowest offset.
+// If the memory is known to be zero-initialized, we can also merge
+// near-adjacent data segments according to a size heuristic. We must be careful
+// to flush all merged segments for a memory before adding a segment of
+// non-constant offset. Unless TNH is enabled, we must also be careful to flush
+// all merged segments for all memories before adding a segment that may cause
+// an out-of-bounds trap.
+//
+
+#include "pass.h"
+#include "support/stdckdint.h"
+#include "wasm-builder.h"
+#include "wasm.h"
+
+namespace wasm {
+
+namespace {
+
+// The maximum size possible for a single data segment.
+constexpr size_t MAX_SEG_SIZE = std::numeric_limits<int32_t>::max();
+
+struct SegmentEntry {
+  Address start;
+  Name name;
+  mutable std::vector<char> data;
+  Address end() const { return start + data.size(); }
+
+  struct CompareStart {
+    using is_transparent = void;
+
+    bool operator()(const SegmentEntry& lhs, const SegmentEntry& rhs) const {
+      return lhs.start < rhs.start;
+    }
+
+    bool operator()(const SegmentEntry& lhs, Address rhs) const {
+      return lhs.start < rhs;
+    }
+
+    bool operator()(Address lhs, const SegmentEntry& rhs) const {
+      return lhs < rhs.start;
+    }
+  };
+
+  // Check if we can merge this entry while respecting MAX_SEG_SIZE.
+  bool canMergeInto(std::set<SegmentEntry, CompareStart>& map) const {
+    if (data.empty()) {
+      return true;
+    }
+
+    size_t mergedSize = data.size();
+    auto it = map.upper_bound(start);
+    if (it != map.begin()) {
+      --it;
+      if (start <= it->end()) {
+        mergedSize += start - it->start;
+      }
+    }
+    it = map.upper_bound(end());
+    if (it != map.begin()) {
+      --it;
+      if (end() <= it->end()) {
+        mergedSize += it->end() - end();
+      }
+    }
+
+    return mergedSize <= MAX_SEG_SIZE;
+  }
+
+  // Simple merge algorithm, joining together adjacent entries.
+  void mergeInto(std::set<SegmentEntry, CompareStart>& map) const {
+    if (data.empty()) {
+      return;
+    }
+
+    // If there exists an overlapping or adjacent entry before the new entry,
+    // then subsume the new entry into the old entry. Otherwise, simply add the
+    // new entry to the map.
+    auto it = map.upper_bound(start);
+    auto merged = it;
+    if (it != map.begin()) {
+      --merged;
+      if (start <= merged->end()) {
+        auto head = start - merged->start;
+        auto tail = merged->data.size() - head;
+        // Copy all bytes up to the old entry's size, then append any remaining
+        // bytes.
+        if (data.size() <= tail) {
+          std::copy(data.begin(), data.end(), merged->data.begin() + head);
+        } else {
+          std::copy(
+            data.begin(), data.begin() + tail, merged->data.begin() + head);
+          merged->data.insert(
+            merged->data.end(), data.begin() + tail, data.end());
+        }
+      } else {
+        merged = map.emplace_hint(it, *this);
+      }
+    } else {
+      merged = map.emplace_hint(it, *this);
+    }
+
+    // Subsume any further overlapping or adjacent entries into the merged
+    // entry.
+    while (it != map.end() && it->start <= merged->end()) {
+      if (merged->end() < it->end()) {
+        merged->data.insert(merged->data.end(),
+                            it->data.begin() + (merged->end() - it->start),
+                            it->data.end());
+      }
+      it = map.erase(it);
+    }
+  }
+};
+
+using SegmentMap = std::set<SegmentEntry, SegmentEntry::CompareStart>;
+
+// Bytes needed to represent a nonnegative integer in the signed LEB encoding.
+size_t lebSize(uint64_t x) { return (std::bit_width(x) + 7) / 7; }
+
+enum InBounds { No, Maybe, Yes };
+
+struct MergeInfo {
+  Memory* mem;
+  Address knownSize;
+  SegmentMap flushedSegments;
+  SegmentMap newSegments;
+  bool zeroFilled;
+
+  // Determine whether the initialization of a new data segment can possibly
+  // succeed, and update the known size of the memory accordingly. If this
+  // method returns No, then initializing the data segment will invariably
+  // result in a trap during instantiation. This method should return Maybe or
+  // Yes before the segment is added to a SegmentMap, otherwise address
+  // overflows could occur in the merge algorithm.
+  InBounds inBounds(const Literal& offset, size_t size) {
+    if (offset.isNegative() || size > MAX_SEG_SIZE) {
+      return InBounds::No;
+    }
+    uint64_t end;
+    if (std::ckd_add(&end, offset.getUnsigned(), size)) {
+      return InBounds::No;
+    }
+    if (end == 0) {
+      return InBounds::Yes;
+    }
+
+    auto neededSize = ((end - 1) >> mem->pageSizeLog2) + 1;
+    if (neededSize <= knownSize) {
+      return InBounds::Yes;
+    } else if (!mem->imported() || (mem->hasMax() && neededSize > mem->max)) {
+      return InBounds::No;
+    } else {
+      knownSize = neededSize;
+      return InBounds::Maybe;
+    }
+  }
+
+  // Retrieve a range of backing data from flushedSegments. Returns true if all
+  // bytes could be retrieved without any gaps.
+  bool flushedData(std::vector<char>& dest, Address start, size_t size) {
+    dest.clear();
+    dest.reserve(size);
+    Address end = start + size;
+
+    auto it = flushedSegments.upper_bound(start);
+    if (it != flushedSegments.begin()) {
+      auto preIt = it;
+      --preIt;
+      if (start < it->end()) {
+        if (end <= it->end()) {
+          dest.assign(it->data.begin() + (start - it->start),
+                      it->data.begin() + (end - it->start));
+          return true;
+        }
+        dest.assign(it->data.begin() + (start - it->start), it->data.end());
+      }
+    }
+
+    while (it != flushedSegments.end()) {
+      if (dest.size() < it->start - start) {
+        if (!zeroFilled) {
+          return false;
+        }
+        dest.resize(dest.size() + (it->start - start));
+      }
+      if (end <= it->end()) {
+        dest.insert(
+          dest.end(), it->data.begin(), it->data.begin() + (end - it->start));
+        return true;
+      }
+      dest.insert(dest.end(), it->data.begin(), it->data.end());
+      ++it;
+    }
+
+    if (!zeroFilled) {
+      return false;
+    }
+    dest.resize(size);
+    return true;
+  }
+
+  // Merge near-adjacent entries in newSegments according to a size heuristic.
+  void mergeNearAdjacent() {
+    if (newSegments.size() < 2) {
+      return;
+    }
+    // Pessimistically assume that all data segments use the implicit memory 0
+    // encoding. Then, the total size of a data segment is 3 + lebSize(offset) +
+    // lebSize(size) + size. We greedily attempt to merge segments in a single
+    // pass from lower to higher addresses.
+    auto left = newSegments.begin();
+    auto right = left;
+    ++right;
+    std::vector<char> gapData;
+    while (right != newSegments.end()) {
+      uint64_t leftSize = left->data.size();
+      uint64_t rightSize = right->data.size();
+      uint64_t gapSize = right->start - left->end();
+      uint64_t mergedSize = leftSize + gapSize + rightSize;
+      if (mergedSize > MAX_SEG_SIZE) {
+        left = right++;
+        continue;
+      }
+
+      size_t leftSegSize =
+        3 + lebSize(left->start) + lebSize(leftSize) + leftSize;
+      size_t rightSegSize =
+        3 + lebSize(right->start) + lebSize(rightSize) + rightSize;
+      size_t mergedSegSize =
+        3 + lebSize(left->start) + lebSize(mergedSize) + mergedSize;
+      if (leftSegSize + rightSegSize < mergedSegSize) {
+        left = right++;
+        continue;
+      }
+      if (!flushedData(gapData, left->end(), gapSize)) {
+        left = right++;
+        continue;
+      }
+
+      left->data.insert(left->data.end(), gapData.begin(), gapData.end());
+      left->data.insert(
+        left->data.end(), right->data.begin(), right->data.end());
+      right = newSegments.erase(right);
+    }
+  }
+
+  void flushBoundsCheck(Module* module,
+                        std::optional<Name>& boundsCheckSeg,
+                        bool clearFlushed) {
+    // Flush the first merged segment that overlaps the last known page, so that
+    // we hit the bounds check before adding any other segments.
+    assert(knownSize != 0);
+    Address lastPageStart = (knownSize - 1) << mem->pageSizeLog2;
+    auto it = newSegments.upper_bound(lastPageStart);
+    bool hasEntry = false;
+    SegmentEntry entry;
+    if (it != newSegments.begin()) {
+      auto preIt = it;
+      --preIt;
+      if (lastPageStart < preIt->end()) {
+        hasEntry = true;
+        entry = std::move(newSegments.extract(preIt).value());
+      }
+    }
+    if (!hasEntry && it != newSegments.end()) {
+      hasEntry = true;
+      entry = std::move(newSegments.extract(it).value());
+    }
+    if (hasEntry && !clearFlushed) {
+      entry.mergeInto(flushedSegments);
+    }
+    // If the last known page has no nonempty segments, synthesize a new empty
+    // segment.
+    if (!hasEntry) {
+      assert(boundsCheckSeg);
+      entry.start = lastPageStart + 1;
+      entry.name = *boundsCheckSeg;
+      boundsCheckSeg.reset();
+    }
+    flushEntry(module, std::move(entry));
+  }
+
+  void flush(Module* module, bool clearFlushed) {
+    // If the flush is triggered by a segment of non-constant offset, clear all
+    // previous data.
+    if (clearFlushed) {
+      flushedSegments.clear();
+    } else {
+      for (const auto& seg : newSegments) {
+        seg.mergeInto(flushedSegments);
+      }
+    }
+    // Flush merged segments to the module in order.
+    while (!newSegments.empty()) {
+      flushEntry(module,
+                 std::move(newSegments.extract(newSegments.begin()).value()));
+    }
+  }
+
+  void flushEntry(Module* module, SegmentEntry&& entry) {
+    // Finish flushing an entry into a data segment in the underlying module.
+    auto* c = Builder(*module).makeConst(
+      Literal::makeFromInt64(entry.start, mem->addressType));
+    auto seg = Builder::makeDataSegment(entry.name, mem->name, false, c);
+    seg->data = std::move(entry.data);
+    module->dataSegments.push_back(std::move(seg));
+  }
+};
+
+void flushAll(Module* module,
+              std::unordered_map<Name, MergeInfo>& infos,
+              std::optional<Name>& boundsCheckMem,
+              std::optional<Name>& boundsCheckSeg,
+              std::optional<Name> clearFlushedMem) {
+  for (const auto& mem : module->memories) {
+    infos[mem->name].mergeNearAdjacent();
+  }
+  if (boundsCheckMem) {
+    infos[*boundsCheckMem].flushBoundsCheck(
+      module, boundsCheckSeg, boundsCheckMem == clearFlushedMem);
+    boundsCheckMem.reset();
+  }
+  for (const auto& mem : module->memories) {
+    infos[mem->name].flush(module, mem->name == clearFlushedMem);
+  }
+}
+
+} // namespace
+
+struct MergeDataSegments : public Pass {
+  // This pass only modifies data segments and data-segment indices.
+  bool requiresNonNullableLocalFixups() override { return false; }
+
+  void run(Module* module) override {
+    bool trapsNeverHappen = getPassOptions().trapsNeverHappen;
+    bool zeroFilledMemory = getPassOptions().zeroFilledMemory;
+
+    if (module->dataSegments.empty()) {
+      return;
+    }
+
+    // Initialize the MergeInfo list with each memory in the module.
+    std::unordered_map<Name, MergeInfo> infos;
+    for (const auto& mem : module->memories) {
+      auto& info = infos[mem->name];
+      info.mem = mem.get();
+      info.knownSize = mem->initial;
+      info.zeroFilled = zeroFilledMemory || !mem->imported();
+    }
+
+    std::vector<std::unique_ptr<DataSegment>> oldSegments;
+    module->dataSegments.swap(oldSegments);
+
+    // To avoid changing observable behavior, we flush all existing data before
+    // adding a new data segment that may be out-of-bounds. Between flushes, we
+    // use boundsCheckMem to lazily keep track of which memory last triggered a
+    // bounds check, so that we can flush a corresponding bounds-check segment
+    // before flushing any other data. If an empty segment triggers a bounds
+    // check, then it will not show up in boundsCheckMem, so we keep track of
+    // its name in boundsCheckSeg in case we need to synthesize it again.
+    std::optional<Name> boundsCheckMem = std::nullopt;
+    std::optional<Name> boundsCheckSeg = std::nullopt;
+    // We also keep track of the activeNames so that we can rename active
+    // segments referred to by instructions, and retain an emptySegment in case
+    // we need a name but no nonempty active segments are left.
+    std::unordered_set<Name> activeNames;
+    std::unique_ptr<DataSegment> emptySegment = nullptr;
+    // If a segment is guaranteed to cause an out-of-bounds trap, then we flush
+    // all prior segments, copy it verbatim, then drop all remaining segments.
+    std::unique_ptr<DataSegment> trapSegment = nullptr;
+
+    for (auto& seg : oldSegments) {
+      if (seg->isPassive) {
+        module->dataSegments.push_back(std::move(seg));
+        continue;
+      }
+      activeNames.insert(seg->name);
+      auto& info = infos[seg->memory];
+
+      if (auto* c = seg->offset->dynCast<Const>()) {
+        auto inBounds = info.inBounds(c->value, seg->data.size());
+        if (inBounds == InBounds::No) {
+          trapSegment = std::move(seg);
+          break;
+        }
+
+        SegmentEntry entry;
+        entry.start = c->value.getUnsigned();
+        entry.name = seg->name;
+        if (!seg->data.empty()) {
+          entry.data = std::move(seg->data);
+        } else if (!emptySegment) {
+          emptySegment = std::move(seg);
+        }
+
+        // If a constant-offset segment is statically in-bounds, we simply merge
+        // it into its appropriate memory; otherwise, we flush all memories,
+        // then mark its segment as needing a bounds check next flush.
+        if (!trapsNeverHappen && inBounds != InBounds::Yes) {
+          flushAll(module, infos, boundsCheckMem, boundsCheckSeg, std::nullopt);
+          boundsCheckMem = info.mem->name;
+          boundsCheckSeg = entry.name;
+        }
+
+        // As a special fallback, flush the memory early if the merged segment
+        // would not respect MAX_SEG_SIZE.
+        if (!entry.canMergeInto(info.newSegments)) {
+          if (boundsCheckMem) {
+            infos[*boundsCheckMem].mergeNearAdjacent();
+            infos[*boundsCheckMem].flushBoundsCheck(
+              module, boundsCheckSeg, boundsCheckMem == seg->memory);
+            boundsCheckMem.reset();
+          }
+          info.mergeNearAdjacent();
+          info.flush(module, false);
+        }
+
+        entry.mergeInto(info.newSegments);
+      } else {
+        if (!seg->data.empty()) {
+          // A nonempty non-constant-offset segment always flushes its own
+          // memory and invalidates all previous data. Unless TNH is enabled, it
+          // also requires all other memories to be flushed due to the bounds
+          // check.
+          if (trapsNeverHappen) {
+            if (boundsCheckMem) {
+              infos[*boundsCheckMem].mergeNearAdjacent();
+              infos[*boundsCheckMem].flushBoundsCheck(
+                module, boundsCheckSeg, boundsCheckMem == seg->memory);
+              boundsCheckMem.reset();
+            }
+            info.mergeNearAdjacent();
+            info.flush(module, true);
+          } else {
+            flushAll(
+              module, infos, boundsCheckMem, boundsCheckSeg, seg->memory);
+          }
+          info.zeroFilled = false;
+        } else {
+          // An empty non-constant-offset segment only triggers a bounds check.
+          if (!trapsNeverHappen) {
+            flushAll(
+              module, infos, boundsCheckMem, boundsCheckSeg, std::nullopt);
+          }
+        }
+
+        // For the bounds check, we conservatively assume that the offset is 0.
+        auto zero = Literal::makeZero(info.mem->addressType);
+        if (info.inBounds(zero, seg->data.size()) == InBounds::No) {
+          trapSegment = std::move(seg);
+          break;
+        }
+        module->dataSegments.push_back(std::move(seg));
+      }
+    }
+
+    // If there were no active segments in the input, then we have no more work
+    // to do after regenerating the module's map.
+    if (activeNames.empty()) {
+      module->updateDataSegmentsMap();
+      return;
+    }
+
+    // Flush all remaining segments, then copy any trap segment.
+    flushAll(module, infos, boundsCheckMem, boundsCheckSeg, std::nullopt);
+    if (trapSegment) {
+      module->dataSegments.push_back(std::move(trapSegment));
+    }
+    module->updateDataSegmentsMap();
+
+    // Determine a destination segment for any instructions that refer to an
+    // active segment. If there are no active segments left in the output, then
+    // there must have been some empty active segment in the input, which we
+    // have retained in emptySegment.
+    std::optional<Name> firstActive = std::nullopt;
+    for (const auto& seg : module->dataSegments) {
+      if (!seg->isPassive) {
+        firstActive = seg->name;
+        break;
+      }
+    }
+    assert(firstActive || emptySegment);
+    Name destName = firstActive ? *firstActive : emptySegment->name;
+
+    struct ActiveSegmentRenamer
+      : public WalkerPass<PostWalker<ActiveSegmentRenamer>> {
+      // This pass only modifies data-segment indices.
+      bool requiresNonNullableLocalFixups() override { return false; }
+
+      std::unordered_set<Name> srcNames;
+      Name destName;
+      bool destUsed = false;
+
+      ActiveSegmentRenamer(std::unordered_set<Name> srcNames, Name destName)
+        : srcNames(std::move(srcNames)), destName(destName) {}
+
+      void visitMemoryInit(MemoryInit* curr) {
+        if (srcNames.contains(curr->segment)) {
+          curr->segment = destName;
+          destUsed = true;
+        }
+      }
+
+      void visitDataDrop(DataDrop* curr) {
+        if (srcNames.contains(curr->segment)) {
+          curr->segment = destName;
+          destUsed = true;
+        }
+      }
+
+      void visitArrayNewData(ArrayNewData* curr) {
+        if (srcNames.contains(curr->segment)) {
+          curr->segment = destName;
+          destUsed = true;
+        }
+      }
+
+      void visitArrayInitData(ArrayInitData* curr) {
+        if (srcNames.contains(curr->segment)) {
+          curr->segment = destName;
+          destUsed = true;
+        }
+      }
+    };
+
+    // Replace the names, then actually add the empty segment if needed.
+    ActiveSegmentRenamer renamer(std::move(activeNames), destName);
+    renamer.run(getPassRunner(), module);
+    renamer.runOnModuleCode(getPassRunner(), module);
+    if (renamer.destUsed && !firstActive) {
+      module->dataSegments.push_back(std::move(emptySegment));
+      module->updateDataSegmentsMap();
+    }
+  }
+};
+
+Pass* createMergeDataSegmentsPass() { return new MergeDataSegments(); }
+
+} // namespace wasm
diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index e5de76176ba..3a7712b5b82 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -293,6 +293,9 @@ void PassRegistry::registerPasses() {
                createMemoryPackingPass);
   registerPass(
     "merge-blocks", "merges blocks to their parents", createMergeBlocksPass);
+  registerPass("merge-data-segments",
+               "merges adjacent active data segments into a single segment",
+               createMergeDataSegmentsPass);
   registerPass("merge-similar-functions",
                "merges similar functions when benefical",
                createMergeSimilarFunctionsPass);
diff --git a/src/passes/passes.h b/src/passes/passes.h
index be06369a9f8..cc7ca31e2ba 100644
--- a/src/passes/passes.h
+++ b/src/passes/passes.h
@@ -90,6 +90,7 @@ Pass* createLoopInvariantCodeMotionPass();
 Pass* createMemory64LoweringPass();
 Pass* createMemoryPackingPass();
 Pass* createMergeBlocksPass();
+Pass* createMergeDataSegmentsPass();
 Pass* createMergeSimilarFunctionsPass();
 Pass* createMergeLocalsPass();
 Pass* createMinifiedPrinterPass();

From 2e8b52746ab72d57024f52708bedf4dcdbbf9d76 Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Mon, 27 Apr 2026 11:23:22 -0400
Subject: [PATCH 02/18] Add MergeDataSegments to README.md

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 8198b49cc76..4d2e4206ba6 100644
--- a/README.md
+++ b/README.md
@@ -337,6 +337,8 @@ overview of some of the relevant ones:
   removes unneeded parts, etc.
 * **MergeBlocks** - Merge a `block` to an outer one where possible, reducing
   their number.
+* **MergeDataSegments** - Merge active data segments with adjacent offsets into
+  a single data segment.
 * **MergeLocals** - When two locals have the same value in part of their
   overlap, pick in a way to help CoalesceLocals do better later (split off from
   CoalesceLocals to keep the latter simple).

From 28405b785ce098a64015064ee805404869c2df4a Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Mon, 27 Apr 2026 12:59:53 -0400
Subject: [PATCH 03/18] Fix signedness issues

---
 src/passes/MergeDataSegments.cpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/passes/MergeDataSegments.cpp b/src/passes/MergeDataSegments.cpp
index 3d99d327fc1..00ff46804dc 100644
--- a/src/passes/MergeDataSegments.cpp
+++ b/src/passes/MergeDataSegments.cpp
@@ -35,7 +35,7 @@ namespace wasm {
 namespace {
 
 // The maximum size possible for a single data segment.
-constexpr size_t MAX_SEG_SIZE = std::numeric_limits<int32_t>::max();
+constexpr uint64_t MAX_SEG_SIZE = std::numeric_limits<uint32_t>::max();
 
 struct SegmentEntry {
   Address start;
@@ -150,12 +150,12 @@ struct MergeInfo {
   // result in a trap during instantiation. This method should return Maybe or
   // Yes before the segment is added to a SegmentMap, otherwise address
   // overflows could occur in the merge algorithm.
-  InBounds inBounds(const Literal& offset, size_t size) {
-    if (offset.isNegative() || size > MAX_SEG_SIZE) {
+  InBounds inBounds(Address start, size_t size) {
+    if (size > MAX_SEG_SIZE) {
       return InBounds::No;
     }
     uint64_t end;
-    if (std::ckd_add(&end, offset.getUnsigned(), size)) {
+    if (std::ckd_add<uint64_t>(&end, start, size)) {
       return InBounds::No;
     }
     if (end == 0) {
@@ -240,11 +240,11 @@ struct MergeInfo {
         continue;
       }
 
-      size_t leftSegSize =
+      uint64_t leftSegSize =
         3 + lebSize(left->start) + lebSize(leftSize) + leftSize;
-      size_t rightSegSize =
+      uint64_t rightSegSize =
         3 + lebSize(right->start) + lebSize(rightSize) + rightSize;
-      size_t mergedSegSize =
+      uint64_t mergedSegSize =
         3 + lebSize(left->start) + lebSize(mergedSize) + mergedSize;
       if (leftSegSize + rightSegSize < mergedSegSize) {
         left = right++;
@@ -396,14 +396,15 @@ struct MergeDataSegments : public Pass {
       auto& info = infos[seg->memory];
 
       if (auto* c = seg->offset->dynCast<Const>()) {
-        auto inBounds = info.inBounds(c->value, seg->data.size());
+        Address start = c->value.getUnsigned();
+        auto inBounds = info.inBounds(start, seg->data.size());
         if (inBounds == InBounds::No) {
           trapSegment = std::move(seg);
           break;
         }
 
         SegmentEntry entry;
-        entry.start = c->value.getUnsigned();
+        entry.start = start;
         entry.name = seg->name;
         if (!seg->data.empty()) {
           entry.data = std::move(seg->data);
@@ -463,8 +464,7 @@ struct MergeDataSegments : public Pass {
         }
 
         // For the bounds check, we conservatively assume that the offset is 0.
-        auto zero = Literal::makeZero(info.mem->addressType);
-        if (info.inBounds(zero, seg->data.size()) == InBounds::No) {
+        if (info.inBounds(0, seg->data.size()) == InBounds::No) {
           trapSegment = std::move(seg);
           break;
         }

From 81b481118606f173bb21835f5fa7ce1a4da4fccb Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Mon, 27 Apr 2026 13:40:17 -0400
Subject: [PATCH 04/18] Update lit/help tests

---
 test/lit/help/wasm-metadce.test | 3 +++
 test/lit/help/wasm-opt.test     | 3 +++
 test/lit/help/wasm2js.test      | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/test/lit/help/wasm-metadce.test b/test/lit/help/wasm-metadce.test
index 1b0cc7d4569..71c145b0f37 100644
--- a/test/lit/help/wasm-metadce.test
+++ b/test/lit/help/wasm-metadce.test
@@ -265,6 +265,9 @@
 ;; CHECK-NEXT:
 ;; CHECK-NEXT:   --merge-blocks                                merges blocks to their parents
 ;; CHECK-NEXT:
+;; CHECK-NEXT:   --merge-data-segments                         merges adjacent active data
+;; CHECK-NEXT:                                                 segments into a single segment
+;; CHECK-NEXT:
 ;; CHECK-NEXT:   --merge-j2cl-itables                          Merges itable structures into
 ;; CHECK-NEXT:                                                 vtables to make types more
 ;; CHECK-NEXT:                                                 compact
diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test
index d616e1cf085..4e72a3f967e 100644
--- a/test/lit/help/wasm-opt.test
+++ b/test/lit/help/wasm-opt.test
@@ -297,6 +297,9 @@
 ;; CHECK-NEXT:
 ;; CHECK-NEXT:   --merge-blocks                                merges blocks to their parents
 ;; CHECK-NEXT:
+;; CHECK-NEXT:   --merge-data-segments                         merges adjacent active data
+;; CHECK-NEXT:                                                 segments into a single segment
+;; CHECK-NEXT:
 ;; CHECK-NEXT:   --merge-j2cl-itables                          Merges itable structures into
 ;; CHECK-NEXT:                                                 vtables to make types more
 ;; CHECK-NEXT:                                                 compact
diff --git a/test/lit/help/wasm2js.test b/test/lit/help/wasm2js.test
index a91d5b5c050..3307e117d53 100644
--- a/test/lit/help/wasm2js.test
+++ b/test/lit/help/wasm2js.test
@@ -229,6 +229,9 @@
 ;; CHECK-NEXT:
 ;; CHECK-NEXT:   --merge-blocks                                merges blocks to their parents
 ;; CHECK-NEXT:
+;; CHECK-NEXT:   --merge-data-segments                         merges adjacent active data
+;; CHECK-NEXT:                                                 segments into a single segment
+;; CHECK-NEXT:
 ;; CHECK-NEXT:   --merge-j2cl-itables                          Merges itable structures into
 ;; CHECK-NEXT:                                                 vtables to make types more
 ;; CHECK-NEXT:                                                 compact

From 03551c76524e71a88ac835d13c3def4a36e61776 Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Tue, 28 Apr 2026 18:37:17 -0400
Subject: [PATCH 05/18] Adjust size heuristic

---
 src/passes/MergeDataSegments.cpp | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/passes/MergeDataSegments.cpp b/src/passes/MergeDataSegments.cpp
index 00ff46804dc..536ad187629 100644
--- a/src/passes/MergeDataSegments.cpp
+++ b/src/passes/MergeDataSegments.cpp
@@ -132,8 +132,10 @@ struct SegmentEntry {
 
 using SegmentMap = std::set<SegmentEntry, SegmentEntry::CompareStart>;
 
+// Bytes needed to represent a nonnegative integer in the unsigned LEB encoding.
+size_t ulebSize(uint64_t x) { return (std::bit_width(x) + 6) / 7; }
 // Bytes needed to represent a nonnegative integer in the signed LEB encoding.
-size_t lebSize(uint64_t x) { return (std::bit_width(x) + 7) / 7; }
+size_t slebSize(uint64_t x) { return (std::bit_width(x) + 7) / 7; }
 
 enum InBounds { No, Maybe, Yes };
 
@@ -223,9 +225,9 @@ struct MergeInfo {
       return;
     }
     // Pessimistically assume that all data segments use the implicit memory 0
-    // encoding. Then, the total size of a data segment is 3 + lebSize(offset) +
-    // lebSize(size) + size. We greedily attempt to merge segments in a single
-    // pass from lower to higher addresses.
+    // encoding. Then, the total size of a data segment is 3 + slebSize(offset)
+    // + ulebSize(size) + size. We greedily attempt to merge segments in a
+    // single pass from lower to higher addresses.
     auto left = newSegments.begin();
     auto right = left;
     ++right;
@@ -241,11 +243,11 @@ struct MergeInfo {
       }
 
       uint64_t leftSegSize =
-        3 + lebSize(left->start) + lebSize(leftSize) + leftSize;
+        3 + slebSize(left->start) + ulebSize(leftSize) + leftSize;
       uint64_t rightSegSize =
-        3 + lebSize(right->start) + lebSize(rightSize) + rightSize;
+        3 + slebSize(right->start) + ulebSize(rightSize) + rightSize;
       uint64_t mergedSegSize =
-        3 + lebSize(left->start) + lebSize(mergedSize) + mergedSize;
+        3 + slebSize(left->start) + ulebSize(mergedSize) + mergedSize;
       if (leftSegSize + rightSegSize < mergedSegSize) {
         left = right++;
         continue;

From f6df1eb70c83ea9c22dcf875dd2fc592b00c4cf3 Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Tue, 28 Apr 2026 18:37:40 -0400
Subject: [PATCH 06/18] Add MergeDataSegments to fuzz_opt.py

---
 scripts/fuzz_opt.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py
index b002151e0f2..89571298af9 100755
--- a/scripts/fuzz_opt.py
+++ b/scripts/fuzz_opt.py
@@ -2671,6 +2671,7 @@ def write_commands(commands, filename):
     ("--generate-stack-ir",),
     ("--licm",),
     ("--local-subtyping",),
+    ("--merge-data-segments",),
     ("--memory-packing",),
     ("--merge-blocks",),
     ('--merge-locals',),

From e29be638fcf86fc72bfe10fe73b99f148a909254 Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Tue, 28 Apr 2026 22:07:38 -0400
Subject: [PATCH 07/18] Retain all segment names following a trap segment

---
 src/passes/MergeDataSegments.cpp | 68 ++++++++++++++++----------------
 1 file changed, 35 insertions(+), 33 deletions(-)

diff --git a/src/passes/MergeDataSegments.cpp b/src/passes/MergeDataSegments.cpp
index 536ad187629..dcf70e107e3 100644
--- a/src/passes/MergeDataSegments.cpp
+++ b/src/passes/MergeDataSegments.cpp
@@ -368,8 +368,17 @@ struct MergeDataSegments : public Pass {
       info.zeroFilled = zeroFilledMemory || !mem->imported();
     }
 
-    std::vector<std::unique_ptr<DataSegment>> oldSegments;
-    module->dataSegments.swap(oldSegments);
+    // Gather all active segments from the module, leaving passive segments
+    // behind. Also gather all active segment names for the final renaming step.
+    std::vector<std::unique_ptr<DataSegment>> activeSegments;
+    std::unordered_set<Name> activeNames;
+    for (auto& seg : module->dataSegments) {
+      if (!seg->isPassive) {
+        activeNames.insert(seg->name);
+        activeSegments.push_back(std::move(seg));
+      }
+    }
+    std::erase(module->dataSegments, nullptr);
 
     // To avoid changing observable behavior, we flush all existing data before
     // adding a new data segment that may be out-of-bounds. Between flushes, we
@@ -380,21 +389,14 @@ struct MergeDataSegments : public Pass {
     // its name in boundsCheckSeg in case we need to synthesize it again.
     std::optional<Name> boundsCheckMem = std::nullopt;
     std::optional<Name> boundsCheckSeg = std::nullopt;
-    // We also keep track of the activeNames so that we can rename active
-    // segments referred to by instructions, and retain an emptySegment in case
-    // we need a name but no nonempty active segments are left.
-    std::unordered_set<Name> activeNames;
+    // Retain an emptySegment in case we need a target for the renaming step,
+    // but no active segments remain after removing empty segments.
     std::unique_ptr<DataSegment> emptySegment = nullptr;
     // If a segment is guaranteed to cause an out-of-bounds trap, then we flush
     // all prior segments, copy it verbatim, then drop all remaining segments.
     std::unique_ptr<DataSegment> trapSegment = nullptr;
 
-    for (auto& seg : oldSegments) {
-      if (seg->isPassive) {
-        module->dataSegments.push_back(std::move(seg));
-        continue;
-      }
-      activeNames.insert(seg->name);
+    for (auto& seg : activeSegments) {
       auto& info = infos[seg->memory];
 
       if (auto* c = seg->offset->dynCast<Const>()) {
@@ -416,7 +418,7 @@ struct MergeDataSegments : public Pass {
 
         // If a constant-offset segment is statically in-bounds, we simply merge
         // it into its appropriate memory; otherwise, we flush all memories,
-        // then mark its segment as needing a bounds check next flush.
+        // then mark its own memory as needing a bounds check next flush.
         if (!trapsNeverHappen && inBounds != InBounds::Yes) {
           flushAll(module, infos, boundsCheckMem, boundsCheckSeg, std::nullopt);
           boundsCheckMem = info.mem->name;
@@ -488,10 +490,10 @@ struct MergeDataSegments : public Pass {
     }
     module->updateDataSegmentsMap();
 
-    // Determine a destination segment for any instructions that refer to an
-    // active segment. If there are no active segments left in the output, then
-    // there must have been some empty active segment in the input, which we
-    // have retained in emptySegment.
+    // Determine a target segment for any instructions that refer to an active
+    // segment. If there are no active segments left in the output, then there
+    // must have been an empty active segment in the input, which we have
+    // retained in emptySegment.
     std::optional<Name> firstActive = std::nullopt;
     for (const auto& seg : module->dataSegments) {
       if (!seg->isPassive) {
@@ -500,7 +502,7 @@ struct MergeDataSegments : public Pass {
       }
     }
     assert(firstActive || emptySegment);
-    Name destName = firstActive ? *firstActive : emptySegment->name;
+    Name targetName = firstActive ? *firstActive : emptySegment->name;
 
     struct ActiveSegmentRenamer
       : public WalkerPass<PostWalker<ActiveSegmentRenamer>> {
@@ -508,46 +510,46 @@ struct MergeDataSegments : public Pass {
       bool requiresNonNullableLocalFixups() override { return false; }
 
       std::unordered_set<Name> srcNames;
-      Name destName;
-      bool destUsed = false;
+      Name targetName;
+      bool targetUsed = false;
 
-      ActiveSegmentRenamer(std::unordered_set<Name> srcNames, Name destName)
-        : srcNames(std::move(srcNames)), destName(destName) {}
+      ActiveSegmentRenamer(std::unordered_set<Name> srcNames, Name targetName)
+        : srcNames(std::move(srcNames)), targetName(targetName) {}
 
       void visitMemoryInit(MemoryInit* curr) {
         if (srcNames.contains(curr->segment)) {
-          curr->segment = destName;
-          destUsed = true;
+          curr->segment = targetName;
+          targetUsed = true;
         }
       }
 
       void visitDataDrop(DataDrop* curr) {
         if (srcNames.contains(curr->segment)) {
-          curr->segment = destName;
-          destUsed = true;
+          curr->segment = targetName;
+          targetUsed = true;
         }
       }
 
       void visitArrayNewData(ArrayNewData* curr) {
         if (srcNames.contains(curr->segment)) {
-          curr->segment = destName;
-          destUsed = true;
+          curr->segment = targetName;
+          targetUsed = true;
         }
       }
 
       void visitArrayInitData(ArrayInitData* curr) {
         if (srcNames.contains(curr->segment)) {
-          curr->segment = destName;
-          destUsed = true;
+          curr->segment = targetName;
+          targetUsed = true;
         }
       }
     };
 
-    // Replace the names, then actually add the empty segment if needed.
-    ActiveSegmentRenamer renamer(std::move(activeNames), destName);
+    // Replace the names, then add an empty target segment if needed.
+    ActiveSegmentRenamer renamer(std::move(activeNames), targetName);
     renamer.run(getPassRunner(), module);
     renamer.runOnModuleCode(getPassRunner(), module);
-    if (renamer.destUsed && !firstActive) {
+    if (renamer.targetUsed && !firstActive) {
       module->dataSegments.push_back(std::move(emptySegment));
       module->updateDataSegmentsMap();
     }

From 2da098e82634452c540f36decdf2c229d752b00e Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Wed, 29 Apr 2026 00:07:19 -0400
Subject: [PATCH 08/18] Detect segments ending at address 2^64

---
 src/passes/MergeDataSegments.cpp | 33 ++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/src/passes/MergeDataSegments.cpp b/src/passes/MergeDataSegments.cpp
index dcf70e107e3..02b4d269c1e 100644
--- a/src/passes/MergeDataSegments.cpp
+++ b/src/passes/MergeDataSegments.cpp
@@ -156,21 +156,34 @@ struct MergeInfo {
     if (size > MAX_SEG_SIZE) {
       return InBounds::No;
     }
-    uint64_t end;
+    bool end64 = false;
+    uint64_t end, lastAddr = std::numeric_limits<uint64_t>::max();
     if (std::ckd_add<uint64_t>(&end, start, size)) {
-      return InBounds::No;
-    }
-    if (end == 0) {
-      return InBounds::Yes;
+      // The spec permits a segment to end at address 2^64 exactly, but we
+      // cannot handle it, so either return No or throw an error.
+      if (end != 0) {
+        return InBounds::No;
+      }
+      end64 = true;
+    } else {
+      if (end == 0) {
+        return InBounds::Yes;
+      }
+      lastAddr = end - 1;
     }
-
-    auto neededSize = ((end - 1) >> mem->pageSizeLog2) + 1;
-    if (neededSize <= knownSize) {
+    uint64_t lastPage = lastAddr >> mem->pageSizeLog2;
+    if (lastPage < knownSize) {
+      if (end64) {
+        Fatal() << "MergeDataSegments does not support offset 2^64-1";
+      }
       return InBounds::Yes;
-    } else if (!mem->imported() || (mem->hasMax() && neededSize > mem->max)) {
+    } else if (!mem->imported() || (mem->hasMax() && lastPage >= mem->max)) {
       return InBounds::No;
     } else {
-      knownSize = neededSize;
+      if (end64) {
+        Fatal() << "MergeDataSegments does not support offset 2^64-1";
+      }
+      knownSize = lastPage + 1;
       return InBounds::Maybe;
     }
   }

From 622785b71e43f2fd0f14a081feef34e86b600f78 Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Wed, 29 Apr 2026 00:34:56 -0400
Subject: [PATCH 09/18] Fix and simplify bounds-check tracking

---
 src/passes/MergeDataSegments.cpp | 89 +++++++++++++++++---------------
 1 file changed, 46 insertions(+), 43 deletions(-)

diff --git a/src/passes/MergeDataSegments.cpp b/src/passes/MergeDataSegments.cpp
index 02b4d269c1e..e14ecf7aec6 100644
--- a/src/passes/MergeDataSegments.cpp
+++ b/src/passes/MergeDataSegments.cpp
@@ -132,6 +132,14 @@ struct SegmentEntry {
 
 using SegmentMap = std::set<SegmentEntry, SegmentEntry::CompareStart>;
 
+// Information about the bounds check that triggered the previous flush. The
+// segment name is used as a hint when synthesizing an empty segment.
+struct BoundsCheck {
+  Name mem;
+  Name seg;
+  Address lastPageStart;
+};
+
 // Bytes needed to represent a nonnegative integer in the unsigned LEB encoding.
 size_t ulebSize(uint64_t x) { return (std::bit_width(x) + 6) / 7; }
 // Bytes needed to represent a nonnegative integer in the signed LEB encoding.
@@ -278,19 +286,18 @@ struct MergeInfo {
   }
 
   void flushBoundsCheck(Module* module,
-                        std::optional<Name>& boundsCheckSeg,
+                        const BoundsCheck& boundsCheck,
                         bool clearFlushed) {
-    // Flush the first merged segment that overlaps the last known page, so that
-    // we hit the bounds check before adding any other segments.
+    // Flush the first merged segment that overlaps the bounds-check page, so
+    // that the bounds check is triggered before any other segments are added.
     assert(knownSize != 0);
-    Address lastPageStart = (knownSize - 1) << mem->pageSizeLog2;
-    auto it = newSegments.upper_bound(lastPageStart);
+    auto it = newSegments.upper_bound(boundsCheck.lastPageStart);
     bool hasEntry = false;
     SegmentEntry entry;
     if (it != newSegments.begin()) {
       auto preIt = it;
       --preIt;
-      if (lastPageStart < preIt->end()) {
+      if (boundsCheck.lastPageStart < preIt->end()) {
         hasEntry = true;
         entry = std::move(newSegments.extract(preIt).value());
       }
@@ -305,10 +312,8 @@ struct MergeInfo {
     // If the last known page has no nonempty segments, synthesize a new empty
     // segment.
     if (!hasEntry) {
-      assert(boundsCheckSeg);
-      entry.start = lastPageStart + 1;
-      entry.name = *boundsCheckSeg;
-      boundsCheckSeg.reset();
+      entry.start = boundsCheck.lastPageStart + 1;
+      entry.name = boundsCheck.seg;
     }
     flushEntry(module, std::move(entry));
   }
@@ -342,16 +347,15 @@ struct MergeInfo {
 
 void flushAll(Module* module,
               std::unordered_map<Name, MergeInfo>& infos,
-              std::optional<Name>& boundsCheckMem,
-              std::optional<Name>& boundsCheckSeg,
+              std::optional<BoundsCheck>& boundsCheck,
               std::optional<Name> clearFlushedMem) {
   for (const auto& mem : module->memories) {
     infos[mem->name].mergeNearAdjacent();
   }
-  if (boundsCheckMem) {
-    infos[*boundsCheckMem].flushBoundsCheck(
-      module, boundsCheckSeg, boundsCheckMem == clearFlushedMem);
-    boundsCheckMem.reset();
+  if (boundsCheck) {
+    infos[boundsCheck->mem].flushBoundsCheck(
+      module, *boundsCheck, boundsCheck->mem == clearFlushedMem);
+    boundsCheck.reset();
   }
   for (const auto& mem : module->memories) {
     infos[mem->name].flush(module, mem->name == clearFlushedMem);
@@ -395,13 +399,10 @@ struct MergeDataSegments : public Pass {
 
     // To avoid changing observable behavior, we flush all existing data before
     // adding a new data segment that may be out-of-bounds. Between flushes, we
-    // use boundsCheckMem to lazily keep track of which memory last triggered a
+    // use boundsCheck to lazily keep track of which memory last triggered a
     // bounds check, so that we can flush a corresponding bounds-check segment
-    // before flushing any other data. If an empty segment triggers a bounds
-    // check, then it will not show up in boundsCheckMem, so we keep track of
-    // its name in boundsCheckSeg in case we need to synthesize it again.
-    std::optional<Name> boundsCheckMem = std::nullopt;
-    std::optional<Name> boundsCheckSeg = std::nullopt;
+    // before flushing any other data.
+    std::optional<BoundsCheck> boundsCheck = std::nullopt;
     // Retain an emptySegment in case we need a target for the renaming step,
     // but no active segments remain after removing empty segments.
     std::unique_ptr<DataSegment> emptySegment = nullptr;
@@ -429,23 +430,27 @@ struct MergeDataSegments : public Pass {
           emptySegment = std::move(seg);
         }
 
-        // If a constant-offset segment is statically in-bounds, we simply merge
-        // it into its appropriate memory; otherwise, we flush all memories,
-        // then mark its own memory as needing a bounds check next flush.
+        // If a constant-offset segment is not statically in-bounds, flush all
+        // memories and mark its page as the next bounds-check page.
         if (!trapsNeverHappen && inBounds != InBounds::Yes) {
-          flushAll(module, infos, boundsCheckMem, boundsCheckSeg, std::nullopt);
-          boundsCheckMem = info.mem->name;
-          boundsCheckSeg = entry.name;
+          auto neededSize = info.knownSize;
+          assert(neededSize != 0);
+          flushAll(module, infos, boundsCheck, std::nullopt);
+          boundsCheck = BoundsCheck();
+          boundsCheck->mem = info.mem->name;
+          boundsCheck->seg = entry.name;
+          boundsCheck->lastPageStart = (neededSize - 1)
+                                       << info.mem->pageSizeLog2;
         }
 
         // As a special fallback, flush the memory early if the merged segment
         // would not respect MAX_SEG_SIZE.
         if (!entry.canMergeInto(info.newSegments)) {
-          if (boundsCheckMem) {
-            infos[*boundsCheckMem].mergeNearAdjacent();
-            infos[*boundsCheckMem].flushBoundsCheck(
-              module, boundsCheckSeg, boundsCheckMem == seg->memory);
-            boundsCheckMem.reset();
+          if (boundsCheck) {
+            infos[boundsCheck->mem].mergeNearAdjacent();
+            infos[boundsCheck->mem].flushBoundsCheck(
+              module, *boundsCheck, boundsCheck->mem == seg->memory);
+            boundsCheck.reset();
           }
           info.mergeNearAdjacent();
           info.flush(module, false);
@@ -459,24 +464,22 @@ struct MergeDataSegments : public Pass {
           // also requires all other memories to be flushed due to the bounds
           // check.
           if (trapsNeverHappen) {
-            if (boundsCheckMem) {
-              infos[*boundsCheckMem].mergeNearAdjacent();
-              infos[*boundsCheckMem].flushBoundsCheck(
-                module, boundsCheckSeg, boundsCheckMem == seg->memory);
-              boundsCheckMem.reset();
+            if (boundsCheck) {
+              infos[boundsCheck->mem].mergeNearAdjacent();
+              infos[boundsCheck->mem].flushBoundsCheck(
+                module, *boundsCheck, boundsCheck->mem == seg->memory);
+              boundsCheck.reset();
             }
             info.mergeNearAdjacent();
             info.flush(module, true);
           } else {
-            flushAll(
-              module, infos, boundsCheckMem, boundsCheckSeg, seg->memory);
+            flushAll(module, infos, boundsCheck, seg->memory);
           }
           info.zeroFilled = false;
         } else {
           // An empty non-constant-offset segment only triggers a bounds check.
           if (!trapsNeverHappen) {
-            flushAll(
-              module, infos, boundsCheckMem, boundsCheckSeg, std::nullopt);
+            flushAll(module, infos, boundsCheck, std::nullopt);
           }
         }
 
@@ -497,7 +500,7 @@ struct MergeDataSegments : public Pass {
     }
 
     // Flush all remaining segments, then copy any trap segment.
-    flushAll(module, infos, boundsCheckMem, boundsCheckSeg, std::nullopt);
+    flushAll(module, infos, boundsCheck, std::nullopt);
     if (trapSegment) {
       module->dataSegments.push_back(std::move(trapSegment));
     }

From 85f59ddc7b8d6fafaa8210452d24ba88d1a853f9 Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Wed, 29 Apr 2026 12:51:51 -0400
Subject: [PATCH 10/18] Fix flushed data buffer sizing

---
 src/passes/MergeDataSegments.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/passes/MergeDataSegments.cpp b/src/passes/MergeDataSegments.cpp
index e14ecf7aec6..957b76690f6 100644
--- a/src/passes/MergeDataSegments.cpp
+++ b/src/passes/MergeDataSegments.cpp
@@ -222,7 +222,7 @@ struct MergeInfo {
         if (!zeroFilled) {
           return false;
         }
-        dest.resize(dest.size() + (it->start - start));
+        dest.resize(it->start - start);
       }
       if (end <= it->end()) {
         dest.insert(
@@ -232,7 +232,6 @@ struct MergeInfo {
       dest.insert(dest.end(), it->data.begin(), it->data.end());
       ++it;
     }
-
     if (!zeroFilled) {
       return false;
     }
@@ -264,11 +263,11 @@ struct MergeInfo {
       }
 
       uint64_t leftSegSize =
-        3 + slebSize(left->start) + ulebSize(leftSize) + leftSize;
+        leftSize + 3 + slebSize(left->start) + ulebSize(leftSize);
       uint64_t rightSegSize =
-        3 + slebSize(right->start) + ulebSize(rightSize) + rightSize;
+        rightSize + 3 + slebSize(right->start) + ulebSize(rightSize);
       uint64_t mergedSegSize =
-        3 + slebSize(left->start) + ulebSize(mergedSize) + mergedSize;
+        mergedSize + 3 + slebSize(left->start) + ulebSize(mergedSize);
       if (leftSegSize + rightSegSize < mergedSegSize) {
         left = right++;
         continue;

From a3e671ed0ab83f5c72d8236a9d4782132e6d0725 Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Wed, 29 Apr 2026 13:06:17 -0400
Subject: [PATCH 11/18] Fix flushed data buffer initialization

---
 src/passes/MergeDataSegments.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/passes/MergeDataSegments.cpp b/src/passes/MergeDataSegments.cpp
index 957b76690f6..1aed6701f71 100644
--- a/src/passes/MergeDataSegments.cpp
+++ b/src/passes/MergeDataSegments.cpp
@@ -207,13 +207,14 @@ struct MergeInfo {
     if (it != flushedSegments.begin()) {
       auto preIt = it;
       --preIt;
-      if (start < it->end()) {
-        if (end <= it->end()) {
-          dest.assign(it->data.begin() + (start - it->start),
-                      it->data.begin() + (end - it->start));
+      if (start < preIt->end()) {
+        if (end <= preIt->end()) {
+          dest.assign(preIt->data.begin() + (start - preIt->start),
+                      preIt->data.begin() + (end - preIt->start));
           return true;
         }
-        dest.assign(it->data.begin() + (start - it->start), it->data.end());
+        dest.assign(preIt->data.begin() + (start - preIt->start),
+                    preIt->data.end());
       }
     }
 

From 305bbe76db0c5adc2bec608f1fd5db26c729195b Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Wed, 29 Apr 2026 13:29:37 -0400
Subject: [PATCH 12/18] Omit empty non-constant-offset segments under TNH

---
 src/passes/MergeDataSegments.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/passes/MergeDataSegments.cpp b/src/passes/MergeDataSegments.cpp
index 1aed6701f71..151168e78b0 100644
--- a/src/passes/MergeDataSegments.cpp
+++ b/src/passes/MergeDataSegments.cpp
@@ -476,19 +476,20 @@ struct MergeDataSegments : public Pass {
             flushAll(module, infos, boundsCheck, seg->memory);
           }
           info.zeroFilled = false;
+
+          // For the bounds check, conservatively assume that the offset is 0.
+          if (info.inBounds(0, seg->data.size()) == InBounds::No) {
+            trapSegment = std::move(seg);
+            break;
+          }
+          module->dataSegments.push_back(std::move(seg));
         } else {
           // An empty non-constant-offset segment only triggers a bounds check.
           if (!trapsNeverHappen) {
             flushAll(module, infos, boundsCheck, std::nullopt);
+            module->dataSegments.push_back(std::move(seg));
           }
         }
-
-        // For the bounds check, we conservatively assume that the offset is 0.
-        if (info.inBounds(0, seg->data.size()) == InBounds::No) {
-          trapSegment = std::move(seg);
-          break;
-        }
-        module->dataSegments.push_back(std::move(seg));
       }
     }
 

From d261737b2b12b5c80e2ae4f00543f7d260f4dfcb Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Wed, 29 Apr 2026 13:38:35 -0400
Subject: [PATCH 13/18] Add lit tests for MergeDataSegments

---
 test/lit/passes/merge-data-segments-tnh.wast | 147 ++++
 test/lit/passes/merge-data-segments.wast     | 824 +++++++++++++++++++
 2 files changed, 971 insertions(+)
 create mode 100644 test/lit/passes/merge-data-segments-tnh.wast
 create mode 100644 test/lit/passes/merge-data-segments.wast

diff --git a/test/lit/passes/merge-data-segments-tnh.wast b/test/lit/passes/merge-data-segments-tnh.wast
new file mode 100644
index 00000000000..032bd999fa4
--- /dev/null
+++ b/test/lit/passes/merge-data-segments-tnh.wast
@@ -0,0 +1,147 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+;; RUN: foreach %s %t wasm-opt -all --merge-data-segments -tnh -S -o - | filecheck %s
+
+;; Guaranteed traps remain guaranteed under TNH.
+(module
+  ;; CHECK:      (memory $0 0 0)
+  (memory $0 0 0)
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  (data $0 (i32.const 0) "")
+  ;; CHECK:      (data $1 (i32.const 1) "")
+  (data $1 (i32.const 1) "")
+  (data $2 (memory $1) (i32.const 0) "dead")
+)
+
+(module
+  ;; CHECK:      (memory $0 0)
+  (memory $0 0)
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  (data $0 (i32.const 0) "")
+  ;; CHECK:      (data $1 (i32.const 1) "")
+  (data $1 (i32.const 1) "")
+  (data $2 (memory $1) (i32.const 0) "dead")
+)
+
+(module
+  ;; CHECK:      (import "" "" (memory $0 0 0))
+  (import "" "" (memory $0 0 0))
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  (data $0 (i32.const 0) "")
+  ;; CHECK:      (data $1 (i32.const 1) "")
+  (data $1 (i32.const 1) "")
+  (data $2 (memory $1) (i32.const 0) "dead")
+)
+
+;; Empty non-constant-offset segments are dropped under TNH.
+(module
+  ;; CHECK:      (global $0 i32 (i32.const 0))
+  (global $0 i32 (i32.const 0))
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $0 (i32.const 0) "foobar")
+  (data $0 (i32.const 0) "foo")
+  (data $1 (global.get $0) "")
+  (data $2 (i32.const 3) "bar")
+)
+
+;; Nonempty non-constant-offset segments trigger no bounds checks under TNH.
+(module
+  ;; CHECK:      (global $0 i32 (i32.const 0))
+  (global $0 i32 (i32.const 0))
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  ;; CHECK:      (data $1 (memory $1) (global.get $0) "bar")
+
+  ;; CHECK:      (data $0 (i32.const 0) "foopez")
+  (data $0 (i32.const 0) "foo")
+  (data $1 (memory $1) (global.get $0) "bar")
+  (data $2 (i32.const 3) "pez")
+)
+
+;; Nonempty non-constant-offset segments still flush their own memory.
+(module
+  ;; CHECK:      (global $0 i32 (i32.const 0))
+  (global $0 i32 (i32.const 0))
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $0 (i32.const 0) "foo")
+  (data $0 (i32.const 0) "foo")
+  ;; CHECK:      (data $1 (global.get $0) "bar")
+  (data $1 (global.get $0) "bar")
+  ;; CHECK:      (data $2 (i32.const 3) "pez")
+  (data $2 (i32.const 3) "pez")
+)
+
+;; Nonempty non-constant-offset segments still invalidate flushed data.
+(module
+  ;; CHECK:      (global $0 i32 (i32.const 0))
+  (global $0 i32 (i32.const 0))
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $0 (i32.const 3) "bar")
+  (data $0 (i32.const 3) "bar")
+  ;; CHECK:      (data $1 (global.get $0) "\00")
+  (data $1 (global.get $0) "\00")
+  ;; CHECK:      (data $2 (i32.const 0) "foo")
+  (data $2 (i32.const 0) "foo")
+  ;; CHECK:      (data $3 (i32.const 6) "pez")
+  (data $3 (i32.const 6) "pez")
+)
+
+;; Bounds checks within memory limits are assumed to succeed under TNH.
+(module
+  ;; CHECK:      (import "" "" (memory $0 0))
+  (import "" "" (memory $0 0))
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  ;; CHECK:      (data $1 (i32.const 0) "fooQUX")
+
+  ;; CHECK:      (data $3 (i32.const 65536) "bar")
+
+  ;; CHECK:      (data $5 (i32.const 131072) "pez")
+
+  ;; CHECK:      (data $7 (i32.const 196608) "qux")
+
+  ;; CHECK:      (data $0 (memory $1) (i32.const 0) "post")
+  (data $0 (memory $1) (i32.const 0) "pre")
+  (data $1 (i32.const 0) "foo")
+  (data $2 (i32.const 3) "FOO")
+  (data $3 (i32.const 65536) "bar")
+  (data $4 (i32.const 3) "BAR")
+  (data $5 (i32.const 131072) "pez")
+  (data $6 (i32.const 3) "PEZ")
+  (data $7 (i32.const 196608) "qux")
+  (data $8 (i32.const 3) "QUX")
+  (data $9 (memory $1) (i32.const 0) "post")
+)
+
+(module
+  ;; CHECK:      (import "" "" (memory $0 0))
+  (import "" "" (memory $0 0))
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  ;; CHECK:      (data $7 (i32.const 0) "fooFOO")
+
+  ;; CHECK:      (data $5 (i32.const 65536) "bar")
+
+  ;; CHECK:      (data $3 (i32.const 131072) "pez")
+
+  ;; CHECK:      (data $1 (i32.const 196608) "qux")
+
+  ;; CHECK:      (data $0 (memory $1) (i32.const 0) "post")
+  (data $0 (memory $1) (i32.const 0) "pre")
+  (data $1 (i32.const 196608) "qux")
+  (data $2 (i32.const 3) "QUX")
+  (data $3 (i32.const 131072) "pez")
+  (data $4 (i32.const 3) "PEZ")
+  (data $5 (i32.const 65536) "bar")
+  (data $6 (i32.const 3) "BAR")
+  (data $7 (i32.const 0) "foo")
+  (data $8 (i32.const 3) "FOO")
+  (data $9 (memory $1) (i32.const 0) "post")
+)
diff --git a/test/lit/passes/merge-data-segments.wast b/test/lit/passes/merge-data-segments.wast
new file mode 100644
index 00000000000..30f61842974
--- /dev/null
+++ b/test/lit/passes/merge-data-segments.wast
@@ -0,0 +1,824 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+;; RUN: foreach %s %t wasm-opt -all --merge-data-segments -S -o - | filecheck %s
+
+;; Basic tests for merge algorithm.
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $0 (i32.const 3) "foobarpezqux")
+  (data $0 (i32.const 3) "foo")
+  (data $1 (i32.const 6) "bar")
+  (data $2 (i32.const 9) "pez")
+  (data $3 (i32.const 12) "qux")
+)
+
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  (data $0 (i32.const 12) "qux")
+  (data $1 (i32.const 9) "pez")
+  (data $2 (i32.const 6) "bar")
+  ;; CHECK:      (data $3 (i32.const 3) "foobarpezqux")
+  (data $3 (i32.const 3) "foo")
+)
+
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $0 (i32.const 0) "quuuux")
+  (data $0 (i32.const 0) "foo")
+  (data $1 (i32.const 0) "baar")
+  (data $2 (i32.const 0) "peeez")
+  (data $3 (i32.const 0) "quuuux")
+)
+
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  (data $0 (i32.const 3) "foo")
+  (data $1 (i32.const 2) "baar")
+  (data $2 (i32.const 1) "peeez")
+  ;; CHECK:      (data $3 (i32.const 0) "quuuux")
+  (data $3 (i32.const 0) "quuuux")
+)
+
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  (data $0 (i32.const 3) "foo")
+  (data $1 (i32.const 2) "baaar")
+  (data $2 (i32.const 1) "peeeeez")
+  ;; CHECK:      (data $3 (i32.const 0) "quuuuuuux")
+  (data $3 (i32.const 0) "quuuuuuux")
+)
+
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $0 (i32.const 0) "foorzx")
+  (data $0 (i32.const 0) "quuuux")
+  (data $1 (i32.const 0) "peeez")
+  (data $2 (i32.const 0) "baar")
+  (data $3 (i32.const 0) "foo")
+)
+
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $0 (i32.const 0) "qpbfoo")
+  (data $0 (i32.const 0) "quuuux")
+  (data $1 (i32.const 1) "peeez")
+  (data $2 (i32.const 2) "baar")
+  (data $3 (i32.const 3) "foo")
+)
+
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $0 (i32.const 0) "qpbfoorzx")
+  (data $0 (i32.const 0) "quuuuuuux")
+  (data $1 (i32.const 1) "peeeeez")
+  (data $2 (i32.const 2) "baaar")
+  (data $3 (i32.const 3) "foo")
+)
+
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $0 (i32.const 0) "fobapequFOBAPEQUX")
+  (data $0 (i32.const 0) "foo")
+  (data $1 (i32.const 2) "bar")
+  (data $2 (i32.const 4) "pez")
+  (data $3 (i32.const 6) "qux")
+  (data $4 (i32.const 8) "FOO")
+  (data $5 (i32.const 10) "BAR")
+  (data $6 (i32.const 12) "PEZ")
+  (data $7 (i32.const 14) "QUX")
+)
+
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  (data $0 (i32.const 14) "QUX")
+  (data $1 (i32.const 12) "PEZ")
+  (data $2 (i32.const 10) "BAR")
+  (data $3 (i32.const 8) "FOO")
+  (data $4 (i32.const 6) "qux")
+  (data $5 (i32.const 4) "pez")
+  (data $6 (i32.const 2) "bar")
+  ;; CHECK:      (data $7 (i32.const 0) "fooarezuxOOAREZUX")
+  (data $7 (i32.const 0) "foo")
+)
+
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $0 (i32.const 0) "QUXPEZ")
+  (data $0 (i32.const 0) "foo")
+  (data $1 (i32.const 3) "bar")
+  (data $2 (i32.const 0) "pez")
+  (data $3 (i32.const 3) "qux")
+  (data $4 (i32.const 3) "FOO")
+  (data $5 (i32.const 0) "BAR")
+  (data $6 (i32.const 3) "PEZ")
+  (data $7 (i32.const 0) "QUX")
+)
+
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  (data $0 (i32.const 3) "foobar")
+  (data $1 (i32.const 2) "qux")
+  ;; CHECK:      (data $2 (i32.const 1) "quxxobfooux")
+  (data $2 (i32.const 1) "qux")
+  (data $3 (i32.const 9) "qux")
+  (data $4 (i32.const 7) "foo")
+)
+
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $0 (i32.const 0) "fobapez")
+  (data $0 (i32.const 0) "foo")
+  (data $1 (i32.const 20) "PEZ")
+  (data $2 (i32.const 2) "bar")
+  (data $3 (i32.const 18) "BAR")
+  (data $4 (i32.const 4) "pez")
+  ;; CHECK:      (data $5 (i32.const 16) "FOOAREZ")
+  (data $5 (i32.const 16) "FOO")
+)
+
+(module
+  ;; CHECK:      (memory $0 i64 1 1)
+  (memory $0 i64 1 1)
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  (data $0 (memory $1) (i32.const 5) "PEZ")
+  ;; CHECK:      (data $1 (i64.const 0) "fobapez")
+  (data $1 (i64.const 0) "foo")
+  (data $2 (memory $1) (i32.const 3) "BAR")
+  (data $3 (i64.const 2) "bar")
+  ;; CHECK:      (data $4 (memory $1) (i32.const 1) "FOOAREZ")
+  (data $4 (memory $1) (i32.const 1) "FOO")
+  (data $5 (i64.const 4) "pez")
+)
+
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  ;; CHECK:      (memory $2 1 1)
+  (memory $2 1 1)
+  ;; CHECK:      (memory $3 1 1)
+  (memory $3 1 1)
+  ;; CHECK:      (data $4 (i32.const 0) "foobar")
+
+  ;; CHECK:      (data $5 (memory $1) (i32.const 1) "foor")
+
+  ;; CHECK:      (data $1 (memory $2) (i32.const 1) "bfoo")
+
+  ;; CHECK:      (data $0 (memory $3) (i32.const 0) "barfoo")
+  (data $0 (memory $3) (i32.const 0) "bar")
+  (data $1 (memory $2) (i32.const 1) "bar")
+  (data $2 (memory $1) (i32.const 2) "bar")
+  (data $3 (i32.const 3) "bar")
+  (data $4 (i32.const 0) "foo")
+  (data $5 (memory $1) (i32.const 1) "foo")
+  (data $6 (memory $2) (i32.const 2) "foo")
+  (data $7 (memory $3) (i32.const 3) "foo")
+)
+
+;; Tests for passive segments and instruction rewriting.
+(module
+  ;; CHECK:      (type $1 (array (mut i8)))
+
+  ;; CHECK:      (type $0 (func))
+  (type $0 (func))
+  (type $1 (array (mut i8)))
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  (data $0 (i32.const 3) "qux")
+  ;; CHECK:      (data $1 "foo")
+  (data $1 "foo")
+  ;; CHECK:      (data $3 "pez")
+
+  ;; CHECK:      (data $2 (i32.const 0) "barqux")
+  (data $2 (i32.const 0) "bar")
+  (data $3 "pez")
+  ;; CHECK:      (data $4 (i32.const 16) "FOO")
+  (data $4 (i32.const 16) "FOO")
+  ;; CHECK:      (func $0 (type $0)
+  ;; CHECK-NEXT:  (memory.init $3
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (data.drop $1)
+  ;; CHECK-NEXT:  (array.init_data $1 $1
+  ;; CHECK-NEXT:   (array.new_data $1 $3
+  ;; CHECK-NEXT:    (i32.const 0)
+  ;; CHECK-NEXT:    (i32.const 0)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (memory.init $2
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (data.drop $2)
+  ;; CHECK-NEXT:  (array.init_data $1 $2
+  ;; CHECK-NEXT:   (array.new_data $1 $2
+  ;; CHECK-NEXT:    (i32.const 0)
+  ;; CHECK-NEXT:    (i32.const 0)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $0 (type $0)
+    (memory.init $3
+      (i32.const 0)
+      (i32.const 0)
+      (i32.const 0)
+    )
+    (data.drop $1)
+    (array.init_data $1 $1
+      (array.new_data $1 $3
+        (i32.const 0)
+        (i32.const 0)
+      )
+      (i32.const 0)
+      (i32.const 0)
+      (i32.const 0)
+    )
+    (memory.init $2
+      (i32.const 0)
+      (i32.const 0)
+      (i32.const 0)
+    )
+    (data.drop $0)
+    (array.init_data $1 $0
+      (array.new_data $1 $2
+        (i32.const 0)
+        (i32.const 0)
+      )
+      (i32.const 0)
+      (i32.const 0)
+      (i32.const 0)
+    )
+  )
+)
+
+(module
+  ;; CHECK:      (type $1 (array (mut i8)))
+
+  ;; CHECK:      (type $0 (func))
+  (type $0 (func))
+  (type $1 (array (mut i8)))
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $0 "foo")
+  (data $0 "foo")
+  ;; CHECK:      (data $5 "bar")
+
+  ;; CHECK:      (data $1 (i32.const 64) "")
+  (data $1 (i32.const 64) "")
+  (data $2 (i32.const 48) "")
+  (data $3 (i32.const 32) "")
+  (data $4 (i32.const 16) "")
+  (data $5 "bar")
+  ;; CHECK:      (func $0 (type $0)
+  ;; CHECK-NEXT:  (memory.init $5
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (data.drop $0)
+  ;; CHECK-NEXT:  (array.init_data $1 $0
+  ;; CHECK-NEXT:   (array.new_data $1 $5
+  ;; CHECK-NEXT:    (i32.const 0)
+  ;; CHECK-NEXT:    (i32.const 0)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (memory.init $1
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (data.drop $1)
+  ;; CHECK-NEXT:  (array.init_data $1 $1
+  ;; CHECK-NEXT:   (array.new_data $1 $1
+  ;; CHECK-NEXT:    (i32.const 0)
+  ;; CHECK-NEXT:    (i32.const 0)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $0 (type $0)
+    (memory.init $5
+      (i32.const 0)
+      (i32.const 0)
+      (i32.const 0)
+    )
+    (data.drop $0)
+    (array.init_data $1 $0
+      (array.new_data $1 $5
+        (i32.const 0)
+        (i32.const 0)
+      )
+      (i32.const 0)
+      (i32.const 0)
+      (i32.const 0)
+    )
+    (memory.init $4
+      (i32.const 0)
+      (i32.const 0)
+      (i32.const 0)
+    )
+    (data.drop $3)
+    (array.init_data $1 $2
+      (array.new_data $1 $1
+        (i32.const 0)
+        (i32.const 0)
+      )
+      (i32.const 0)
+      (i32.const 0)
+      (i32.const 0)
+    )
+  )
+)
+
+;; Test that passive segments following a trap segment are retained.
+(module
+  ;; CHECK:      (type $0 (func))
+  (type $0 (func))
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $2 "bar")
+
+  ;; CHECK:      (data $4 "qux")
+
+  ;; CHECK:      (data $0 (i32.const 0) "foo")
+  (data $0 (i32.const 0) "foo")
+  ;; CHECK:      (data $1 (i32.const 65536) "trap")
+  (data $1 (i32.const 65536) "trap")
+  (data $2 "bar")
+  (data $3 (i32.const 3) "pez")
+  (data $4 "qux")
+  ;; CHECK:      (func $0 (type $0)
+  ;; CHECK-NEXT:  (data.drop $4)
+  ;; CHECK-NEXT:  (data.drop $0)
+  ;; CHECK-NEXT: )
+  (func $0 (type $0)
+    (data.drop $4)
+    (data.drop $3)
+  )
+)
+
+;; Tests for guaranteed traps: remaining active segments should be dropped.
+(module
+  ;; CHECK:      (memory $0 0 0)
+  (memory $0 0 0)
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  (data $0 (i32.const 0) "")
+  ;; CHECK:      (data $1 (i32.const 1) "")
+  (data $1 (i32.const 1) "")
+  (data $2 (memory $1) (i32.const 0) "dead")
+)
+
+(module
+  ;; CHECK:      (memory $0 0)
+  (memory $0 0)
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  (data $0 (i32.const 0) "")
+  ;; CHECK:      (data $1 (i32.const 1) "")
+  (data $1 (i32.const 1) "")
+  (data $2 (memory $1) (i32.const 0) "dead")
+)
+
+(module
+  ;; CHECK:      (import "" "" (memory $0 0 0))
+  (import "" "" (memory $0 0 0))
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  (data $0 (i32.const 0) "")
+  ;; CHECK:      (data $1 (i32.const 1) "")
+  (data $1 (i32.const 1) "")
+  (data $2 (memory $1) (i32.const 0) "dead")
+)
+
+(module
+  ;; CHECK:      (global $0 i32 (i32.const 64))
+  (global $0 i32 (i32.const 64))
+  ;; CHECK:      (memory $0 4 4 (pagesize 1))
+  (memory $0 4 4 (pagesize 1))
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  ;; CHECK:      (data $0 (global.get $0) "foo")
+  (data $0 (global.get $0) "foo")
+  ;; CHECK:      (data $1 (global.get $0) "baar")
+  (data $1 (global.get $0) "baar")
+  ;; CHECK:      (data $2 (global.get $0) "peeez")
+  (data $2 (global.get $0) "peeez")
+  (data $3 (memory $1) (i32.const 0) "dead")
+)
+
+;; Not a guaranteed trap: the imported memory could be longer.
+(module
+  ;; CHECK:      (import "" "" (memory $0 0))
+  (import "" "" (memory $0 0))
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  (data $0 (i32.const 0) "")
+  ;; CHECK:      (data $1 (i32.const 1) "")
+  (data $1 (i32.const 1) "")
+  ;; CHECK:      (data $2 (memory $1) (i32.const 0) "dead")
+  (data $2 (memory $1) (i32.const 0) "dead")
+)
+
+;; Merged segment, trap segment, and two dead segments.
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $0 (i32.const 65532) "ffoo")
+  (data $0 (i32.const 65532) "foo")
+  (data $1 (i32.const 65533) "foo")
+  ;; CHECK:      (data $2 (i32.const 65534) "foo")
+  (data $2 (i32.const 65534) "foo")
+  (data $3 (i32.const 65535) "foo")
+  (data $4 (i32.const 65536) "foo")
+)
+
+;; Test for address overflow.
+(module
+  ;; CHECK:      (import "" "" (memory $0 i64 0))
+  (import "" "" (memory $0 i64 0))
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  ;; CHECK:      (data $0 (i64.const 0) "foo")
+  (data $0 (i64.const 0) "foo")
+  ;; CHECK:      (data $1 (i64.const -4) "bar")
+  (data $1 (i64.const -4) "bar")
+  ;; CHECK:      (data $2 (i64.const -4) "barpez")
+  (data $2 (i64.const -4) "barpez")
+  (data $3 (memory $1) (i32.const 0) "dead")
+)
+
+;; Tests for bounds checks and flushing.
+(module
+  ;; CHECK:      (import "" "" (memory $0 0))
+  (import "" "" (memory $0 0))
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  ;; CHECK:      (data $0 (memory $1) (i32.const 0) "pre")
+  (data $0 (memory $1) (i32.const 0) "pre")
+  ;; CHECK:      (data $1 (i32.const 0) "fooFOO")
+  (data $1 (i32.const 0) "foo")
+  (data $2 (i32.const 3) "FOO")
+  ;; CHECK:      (data $3 (i32.const 65536) "bar")
+  (data $3 (i32.const 65536) "bar")
+  ;; CHECK:      (data $4 (i32.const 3) "BAR")
+  (data $4 (i32.const 3) "BAR")
+  ;; CHECK:      (data $5 (i32.const 131072) "pez")
+  (data $5 (i32.const 131072) "pez")
+  ;; CHECK:      (data $6 (i32.const 3) "PEZ")
+  (data $6 (i32.const 3) "PEZ")
+  ;; CHECK:      (data $7 (i32.const 196608) "qux")
+  (data $7 (i32.const 196608) "qux")
+  ;; CHECK:      (data $8 (i32.const 3) "QUX")
+  (data $8 (i32.const 3) "QUX")
+  ;; CHECK:      (data $9 (memory $1) (i32.const 0) "post")
+  (data $9 (memory $1) (i32.const 0) "post")
+)
+
+(module
+  ;; CHECK:      (import "" "" (memory $0 0))
+  (import "" "" (memory $0 0))
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  ;; CHECK:      (data $0 (memory $1) (i32.const 0) "pre")
+  (data $0 (memory $1) (i32.const 0) "pre")
+  ;; CHECK:      (data $1 (i32.const 196608) "qux")
+  (data $1 (i32.const 196608) "qux")
+  (data $2 (i32.const 3) "QUX")
+  ;; CHECK:      (data $7 (i32.const 0) "fooFOO")
+
+  ;; CHECK:      (data $5 (i32.const 65536) "bar")
+
+  ;; CHECK:      (data $3 (i32.const 131072) "pez")
+  (data $3 (i32.const 131072) "pez")
+  (data $4 (i32.const 3) "PEZ")
+  (data $5 (i32.const 65536) "bar")
+  (data $6 (i32.const 3) "BAR")
+  (data $7 (i32.const 0) "foo")
+  (data $8 (i32.const 3) "FOO")
+  ;; CHECK:      (data $9 (memory $1) (i32.const 0) "post")
+  (data $9 (memory $1) (i32.const 0) "post")
+)
+
+
+(module
+  ;; CHECK:      (import "" "" (memory $0 0 (pagesize 1)))
+  (import "" "" (memory $0 0 (pagesize 1)))
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  ;; CHECK:      (data $0 (memory $1) (i32.const 0) "pre")
+  (data $0 (memory $1) (i32.const 0) "pre")
+  ;; CHECK:      (data $1 (i32.const 0) "foo")
+  (data $1 (i32.const 0) "foo")
+  ;; CHECK:      (data $2 (i32.const 1) "bar")
+  (data $2 (i32.const 1) "bar")
+  ;; CHECK:      (data $3 (i32.const 2) "pez")
+  (data $3 (i32.const 2) "pez")
+  ;; CHECK:      (data $4 (i32.const 3) "qux")
+  (data $4 (i32.const 3) "qux")
+  (data $5 (i32.const 19) "QUX")
+  (data $6 (i32.const 18) "PEZ")
+  (data $7 (i32.const 17) "BAR")
+  ;; CHECK:      (data $8 (i32.const 16) "FOORZX")
+  (data $8 (i32.const 16) "FOO")
+  ;; CHECK:      (data $9 (memory $1) (i32.const 0) "post")
+  (data $9 (memory $1) (i32.const 0) "post")
+)
+
+(module
+  ;; CHECK:      (import "" "" (memory $0 i64 0 (pagesize 1)))
+  (import "" "" (memory $0 i64 0 (pagesize 1)))
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  ;; CHECK:      (data $0 (memory $1) (i32.const 0) "pre")
+  (data $0 (memory $1) (i32.const 0) "pre")
+  ;; CHECK:      (data $1 (i64.const -7) "foo")
+  (data $1 (i64.const -7) "foo")
+  ;; CHECK:      (data $2 (i64.const -6) "bar")
+  (data $2 (i64.const -6) "bar")
+  ;; CHECK:      (data $3 (i64.const -5) "pez")
+  (data $3 (i64.const -5) "pez")
+  (data $4 (i64.const -4) "qux")
+  ;; CHECK:      (data $5 (i64.const -5) "PEZx")
+  (data $5 (i64.const -5) "PEZ")
+  ;; CHECK:      (data $6 (memory $1) (i32.const 0) "post")
+  (data $6 (memory $1) (i32.const 0) "post")
+)
+
+(module
+  ;; CHECK:      (import "" "" (memory $0 0 (pagesize 1)))
+  (import "" "" (memory $0 0 (pagesize 1)))
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  ;; CHECK:      (data $0 (memory $1) (i32.const 0) "pre")
+  (data $0 (memory $1) (i32.const 0) "pre")
+  ;; CHECK:      (data $1 (i32.const 0) "fooo")
+  (data $1 (i32.const 0) "fooo")
+  (data $2 (i32.const 12) "QUUX")
+  (data $3 (i32.const 8) "PEEZ")
+  (data $4 (i32.const 4) "BAAR")
+  ;; CHECK:      (data $5 (i32.const 0) "FOOOBAARPEEZquux")
+  (data $5 (i32.const 0) "FOOO")
+  (data $6 (i32.const 12) "quux")
+  ;; CHECK:      (data $7 (memory $1) (i32.const 0) "post")
+  (data $7 (memory $1) (i32.const 0) "post")
+)
+
+;; Tests for near-adjacent merge algorithm and zero-fill tracking.
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  ;; CHECK:      (data $0 (i32.const 0) "foo\00bar\00pez\00qux")
+  (data $0 (i32.const 0) "foo")
+  (data $1 (memory $1) (i32.const 12) "QUX")
+  (data $2 (i32.const 4) "bar")
+  (data $3 (memory $1) (i32.const 8) "PEZ")
+  (data $4 (i32.const 8) "pez")
+  (data $5 (memory $1) (i32.const 4) "BAR")
+  (data $6 (i32.const 12) "qux")
+  ;; CHECK:      (data $7 (memory $1) (i32.const 0) "FOO\00BAR\00PEZ\00QUX")
+  (data $7 (memory $1) (i32.const 0) "FOO")
+)
+
+(module
+  ;; CHECK:      (import "" "" (memory $0 1 1))
+  (import "" "" (memory $0 1 1))
+  ;; CHECK:      (import "" "" (memory $1 1 1))
+  (import "" "" (memory $1 1 1))
+  ;; CHECK:      (data $0 (i32.const 0) "foo")
+  (data $0 (i32.const 0) "foo")
+  ;; CHECK:      (data $2 (i32.const 4) "bar")
+
+  ;; CHECK:      (data $4 (i32.const 8) "pez")
+
+  ;; CHECK:      (data $6 (i32.const 12) "qux")
+
+  ;; CHECK:      (data $7 (memory $1) (i32.const 0) "FOO")
+
+  ;; CHECK:      (data $5 (memory $1) (i32.const 4) "BAR")
+
+  ;; CHECK:      (data $3 (memory $1) (i32.const 8) "PEZ")
+
+  ;; CHECK:      (data $1 (memory $1) (i32.const 12) "QUX")
+  (data $1 (memory $1) (i32.const 12) "QUX")
+  (data $2 (i32.const 4) "bar")
+  (data $3 (memory $1) (i32.const 8) "PEZ")
+  (data $4 (i32.const 8) "pez")
+  (data $5 (memory $1) (i32.const 4) "BAR")
+  (data $6 (i32.const 12) "qux")
+  (data $7 (memory $1) (i32.const 0) "FOO")
+)
+
+(module
+  ;; CHECK:      (global $0 i32 (i32.const 0))
+  (global $0 i32 (i32.const 0))
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  ;; CHECK:      (data $0 (i32.const 0) "foo\00bar")
+  (data $0 (i32.const 0) "foo")
+  (data $1 (memory $1) (i32.const 12) "QUX")
+  (data $2 (i32.const 4) "bar")
+  ;; CHECK:      (data $3 (memory $1) (i32.const 8) "PEZ\00QUX")
+  (data $3 (memory $1) (i32.const 8) "PEZ")
+  ;; CHECK:      (data $4 (global.get $0) "\00")
+  (data $4 (global.get $0) "\00")
+  ;; CHECK:      (data $5 (memory $1) (global.get $0) "\00")
+  (data $5 (memory $1) (global.get $0) "\00")
+  ;; CHECK:      (data $6 (i32.const 8) "pez")
+  (data $6 (i32.const 8) "pez")
+  ;; CHECK:      (data $8 (i32.const 12) "qux")
+
+  ;; CHECK:      (data $9 (memory $1) (i32.const 0) "FOO")
+
+  ;; CHECK:      (data $7 (memory $1) (i32.const 4) "BAR")
+  (data $7 (memory $1) (i32.const 4) "BAR")
+  (data $8 (i32.const 12) "qux")
+  (data $9 (memory $1) (i32.const 0) "FOO")
+)
+
+;; Tests for near-adjacent merge algorithm and size heuristic.
+(module
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $0 (i32.const 0) "bar")
+  (data $0 (i32.const 0) "foo")
+  (data $1 (i32.const 0) "bar")
+  (data $2 (i32.const 17) "bar")
+  ;; CHECK:      (data $3 (i32.const 16) "foor")
+  (data $3 (i32.const 16) "foo")
+  ;; CHECK:      (data $4 (i32.const 32) "fobar")
+  (data $4 (i32.const 32) "foo")
+  (data $5 (i32.const 34) "bar")
+  (data $6 (i32.const 51) "bar")
+  ;; CHECK:      (data $7 (i32.const 48) "foobar")
+  (data $7 (i32.const 48) "foo")
+  ;; CHECK:      (data $8 (i32.const 64) "foo\00bar")
+  (data $8 (i32.const 64) "foo")
+  (data $9 (i32.const 68) "bar")
+  (data $10 (i32.const 85) "bar")
+  ;; CHECK:      (data $11 (i32.const 80) "foo\00\00bar")
+  (data $11 (i32.const 80) "foo")
+  ;; CHECK:      (data $12 (i32.const 96) "foo\00\00\00bar")
+  (data $12 (i32.const 96) "foo")
+  (data $13 (i32.const 102) "bar")
+  (data $14 (i32.const 119) "bar")
+  ;; CHECK:      (data $15 (i32.const 112) "foo\00\00\00\00bar")
+  (data $15 (i32.const 112) "foo")
+)
+
+(module
+  ;; CHECK:      (memory $0 17 17)
+  (memory $0 17 17)
+  ;; CHECK:      (data $0 (i32.const 0) "foo\00\00\00\00\00bar")
+  (data $0 (i32.const 0) "foo")
+  (data $1 (i32.const 8) "bar")
+  ;; CHECK:      (data $3 (i32.const 32) "foo")
+
+  ;; CHECK:      (data $2 (i32.const 41) "bar")
+  (data $2 (i32.const 41) "bar")
+  (data $3 (i32.const 32) "foo")
+  ;; CHECK:      (data $4 (i32.const 64) "foo\00\00\00\00\00\00bar")
+  (data $4 (i32.const 64) "foo")
+  (data $5 (i32.const 73) "bar")
+  ;; CHECK:      (data $7 (i32.const 96) "foo")
+
+  ;; CHECK:      (data $6 (i32.const 106) "bar")
+  (data $6 (i32.const 106) "bar")
+  (data $7 (i32.const 96) "foo")
+  ;; CHECK:      (data $8 (i32.const 4096) "foo")
+  (data $8 (i32.const 4096) "foo")
+  ;; CHECK:      (data $9 (i32.const 4106) "bar")
+  (data $9 (i32.const 4106) "bar")
+  (data $10 (i32.const 8202) "bar")
+  ;; CHECK:      (data $11 (i32.const 8192) "foo\00\00\00\00\00\00\00bar")
+  (data $11 (i32.const 8192) "foo")
+  ;; CHECK:      (data $12 (i32.const 8224) "foo")
+  (data $12 (i32.const 8224) "foo")
+  ;; CHECK:      (data $13 (i32.const 8235) "bar")
+  (data $13 (i32.const 8235) "bar")
+  ;; CHECK:      (data $15 (i32.const 524288) "foo")
+
+  ;; CHECK:      (data $14 (i32.const 524299) "bar")
+  (data $14 (i32.const 524299) "bar")
+  (data $15 (i32.const 524288) "foo")
+  ;; CHECK:      (data $16 (i32.const 1048576) "foo\00\00\00\00\00\00\00\00bar")
+  (data $16 (i32.const 1048576) "foo")
+  (data $17 (i32.const 1048587) "bar")
+  ;; CHECK:      (data $19 (i32.const 1048608) "foo")
+
+  ;; CHECK:      (data $18 (i32.const 1048620) "bar")
+  (data $18 (i32.const 1048620) "bar")
+  (data $19 (i32.const 1048608) "foo")
+)
+
+;; Tests for near-adjacent merge algorithm and flushing.
+(module
+  ;; CHECK:      (global $0 i32 (i32.const 0))
+  (global $0 i32 (i32.const 0))
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $0 (i32.const 4) "bar")
+  (data $0 (i32.const 4) "bar")
+  ;; CHECK:      (data $1 (global.get $0) "")
+  (data $1 (global.get $0) "")
+  (data $2 (i32.const 8) "pez")
+  ;; CHECK:      (data $3 (i32.const 0) "foo\00bar\00pez")
+  (data $3 (i32.const 0) "foo")
+)
+
+(module
+  ;; CHECK:      (global $0 i32 (i32.const 0))
+  (global $0 i32 (i32.const 0))
+  ;; CHECK:      (memory $0 1 1)
+  (memory $0 1 1)
+  ;; CHECK:      (data $0 (i32.const 6) "B")
+  (data $0 (i32.const 6) "B")
+  ;; CHECK:      (data $1 (global.get $0) "")
+  (data $1 (global.get $0) "")
+  ;; CHECK:      (data $2 (i32.const 4) "A")
+  (data $2 (i32.const 4) "A")
+  ;; CHECK:      (data $3 (global.get $0) "")
+  (data $3 (global.get $0) "")
+  (data $4 (i32.const 8) "bar")
+  ;; CHECK:      (data $5 (i32.const 0) "foo\00A\00B\00bar")
+  (data $5 (i32.const 0) "foo")
+)
+
+(module
+  ;; CHECK:      (import "" "" (memory $0 1 1))
+  (import "" "" (memory $0 1 1))
+  ;; CHECK:      (global $0 i32 (i32.const 0))
+  (global $0 i32 (i32.const 0))
+  ;; CHECK:      (data $0 (i32.const 4) "B")
+  (data $0 (i32.const 4) "B")
+  ;; CHECK:      (data $1 (global.get $0) "")
+  (data $1 (global.get $0) "")
+  ;; CHECK:      (data $2 (i32.const 0) "foo")
+  (data $2 (i32.const 0) "foo")
+  ;; CHECK:      (data $3 (i32.const 6) "pez")
+  (data $3 (i32.const 6) "pez")
+)
+
+(module
+  ;; CHECK:      (import "" "" (memory $0 1 1))
+  (import "" "" (memory $0 1 1))
+  ;; CHECK:      (global $0 i32 (i32.const 0))
+  (global $0 i32 (i32.const 0))
+  ;; CHECK:      (data $0 (i32.const 3) "bar")
+  (data $0 (i32.const 3) "bar")
+  ;; CHECK:      (data $1 (global.get $0) "")
+  (data $1 (global.get $0) "")
+  ;; CHECK:      (data $2 (i32.const 0) "foobarpez")
+  (data $2 (i32.const 0) "foo")
+  (data $3 (i32.const 6) "pez")
+)
+
+(module
+  ;; CHECK:      (import "" "" (memory $0 1))
+  (import "" "" (memory $0 1))
+  ;; CHECK:      (global $0 i32 (i32.const 0))
+  (global $0 i32 (i32.const 0))
+  ;; CHECK:      (data $0 (i32.const 65535) ".")
+  (data $0 (i32.const 65535) ".")
+  ;; CHECK:      (data $1 (global.get $0) "")
+  (data $1 (global.get $0) "")
+  (data $2 (i32.const 65536) "pez")
+  ;; CHECK:      (data $3 (i32.const 65532) "bar.pez")
+  (data $3 (i32.const 65532) "bar")
+  ;; CHECK:      (data $4 (i32.const 131072) "qux")
+  (data $4 (i32.const 131072) "qux")
+  ;; CHECK:      (data $5 (i32.const 0) "foo")
+  (data $5 (i32.const 0) "foo")
+)

From 63d65a8353f7b3df19d5c57249347d3d2e1630f4 Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Wed, 29 Apr 2026 18:29:29 -0400
Subject: [PATCH 14/18] Add missing <bit> include

---
 src/passes/MergeDataSegments.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/passes/MergeDataSegments.cpp b/src/passes/MergeDataSegments.cpp
index 151168e78b0..1856bd41b70 100644
--- a/src/passes/MergeDataSegments.cpp
+++ b/src/passes/MergeDataSegments.cpp
@@ -25,6 +25,8 @@
 // an out-of-bounds trap.
 //
 
+#include <bit>
+
 #include "pass.h"
 #include "support/stdckdint.h"
 #include "wasm-builder.h"

From d5ced8551b0e5229f9f3ea340b63c7fab7241ce9 Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Wed, 29 Apr 2026 20:03:50 -0400
Subject: [PATCH 15/18] Expand test comments

---
 test/lit/passes/merge-data-segments-tnh.wast |  26 ++--
 test/lit/passes/merge-data-segments.wast     | 125 +++++++++++++++----
 2 files changed, 116 insertions(+), 35 deletions(-)

diff --git a/test/lit/passes/merge-data-segments-tnh.wast b/test/lit/passes/merge-data-segments-tnh.wast
index 032bd999fa4..2e74a27c3ca 100644
--- a/test/lit/passes/merge-data-segments-tnh.wast
+++ b/test/lit/passes/merge-data-segments-tnh.wast
@@ -1,7 +1,10 @@
 ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
 ;; RUN: foreach %s %t wasm-opt -all --merge-data-segments -tnh -S -o - | filecheck %s
 
-;; Guaranteed traps remain guaranteed under TNH.
+;; Guaranteed traps remain guaranteed under TNH: these are identical to the
+;; non-TNH case. We use a second memory for the dead segment to distinguish
+;; between the trap behavior and the drop behavior.
+
 (module
   ;; CHECK:      (memory $0 0 0)
   (memory $0 0 0)
@@ -14,8 +17,8 @@
 )
 
 (module
-  ;; CHECK:      (memory $0 0)
-  (memory $0 0)
+  ;; CHECK:      (import "" "" (memory $0 0 0))
+  (import "" "" (memory $0 0 0))
   ;; CHECK:      (memory $1 1 1)
   (memory $1 1 1)
   (data $0 (i32.const 0) "")
@@ -25,8 +28,8 @@
 )
 
 (module
-  ;; CHECK:      (import "" "" (memory $0 0 0))
-  (import "" "" (memory $0 0 0))
+  ;; CHECK:      (memory $0 0)
+  (memory $0 0)
   ;; CHECK:      (memory $1 1 1)
   (memory $1 1 1)
   (data $0 (i32.const 0) "")
@@ -47,7 +50,8 @@
   (data $2 (i32.const 3) "bar")
 )
 
-;; Nonempty non-constant-offset segments trigger no bounds checks under TNH.
+;; Nonempty non-constant-offset segments trigger no bounds checks under TNH:
+;; segment $1 does not cause memory $0 to be flushed.
 (module
   ;; CHECK:      (global $0 i32 (i32.const 0))
   (global $0 i32 (i32.const 0))
@@ -63,7 +67,8 @@
   (data $2 (i32.const 3) "pez")
 )
 
-;; Nonempty non-constant-offset segments still flush their own memory.
+;; Nonempty non-constant-offset segments still flush their own memory: segment
+;; $1 causes memory $0 to be flushed.
 (module
   ;; CHECK:      (global $0 i32 (i32.const 0))
   (global $0 i32 (i32.const 0))
@@ -77,7 +82,8 @@
   (data $2 (i32.const 3) "pez")
 )
 
-;; Nonempty non-constant-offset segments still invalidate flushed data.
+;; Nonempty non-constant-offset segments still invalidate flushed data: the data
+;; in segment $0 cannot be used to merge the near-adjacent segments $2 and $3.
 (module
   ;; CHECK:      (global $0 i32 (i32.const 0))
   (global $0 i32 (i32.const 0))
@@ -93,7 +99,8 @@
   (data $3 (i32.const 6) "pez")
 )
 
-;; Bounds checks within memory limits are assumed to succeed under TNH.
+;; Bounds checks within memory limits are assumed to succeed under TNH: no
+;; flushes occur, and segments are merged freely.
 (module
   ;; CHECK:      (import "" "" (memory $0 0))
   (import "" "" (memory $0 0))
@@ -120,6 +127,7 @@
   (data $9 (memory $1) (i32.const 0) "post")
 )
 
+;; In this case, reordering input segments does not change flushing behavior.
 (module
   ;; CHECK:      (import "" "" (memory $0 0))
   (import "" "" (memory $0 0))
diff --git a/test/lit/passes/merge-data-segments.wast b/test/lit/passes/merge-data-segments.wast
index 30f61842974..63c24a1c087 100644
--- a/test/lit/passes/merge-data-segments.wast
+++ b/test/lit/passes/merge-data-segments.wast
@@ -1,7 +1,9 @@
 ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
 ;; RUN: foreach %s %t wasm-opt -all --merge-data-segments -S -o - | filecheck %s
 
-;; Basic tests for merge algorithm.
+;; Basic tests for the merge algorithm: it should merge adjacent and overlapping
+;; segments in their order of appearance.
+
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -148,6 +150,9 @@
   (data $5 (i32.const 16) "FOO")
 )
 
+;; The merge algorithm should operate independently on different memories, and
+;; it should ensure that the offset types on merged segments correspond to their
+;; respective memories.
 (module
   ;; CHECK:      (memory $0 i64 1 1)
   (memory $0 i64 1 1)
@@ -163,6 +168,7 @@
   (data $5 (i64.const 4) "pez")
 )
 
+;; More shuffling between data segments of different memories.
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -190,6 +196,10 @@
 )
 
 ;; Tests for passive segments and instruction rewriting.
+
+;; Basic rewriting: Passive segments are output before anything else, and
+;; references to them are unmodified; references to active segments are renamed
+;; to the first active segment.
 (module
   ;; CHECK:      (type $1 (array (mut i8)))
 
@@ -274,6 +284,12 @@
   )
 )
 
+;; Usage of the empty target segment. Again, passive segments are output before
+;; anything else, and references to them are left unmodified. But in this case,
+;; no active segments are left in the output to act as the target segment to
+;; rename into. So if the renamer detects a reference to an active segment, we
+;; add back the first empty active segment from the input to act as the target,
+;; in this case segment $1.
 (module
   ;; CHECK:      (type $1 (array (mut i8)))
 
@@ -358,7 +374,8 @@
   )
 )
 
-;; Test that passive segments following a trap segment are retained.
+;; Passive segments following a trap segment should be retained, and references
+;; to dead active segments should be renamed to a live segment.
 (module
   ;; CHECK:      (type $0 (func))
   (type $0 (func))
@@ -385,7 +402,11 @@
   )
 )
 
-;; Tests for guaranteed traps: remaining active segments should be dropped.
+;; Tests for guaranteed traps: remaining active segments should be dropped. We
+;; use a second memory for the dead segment to distinguish between the trap
+;; behavior and the drop behavior.
+
+;; Segment $1 ends outside memory $0's max size.
 (module
   ;; CHECK:      (memory $0 0 0)
   (memory $0 0 0)
@@ -397,6 +418,22 @@
   (data $2 (memory $1) (i32.const 0) "dead")
 )
 
+;; An imported segment's max size must be no greater than its declared max size.
+;; Thus, segment $1 must still end outside the imported memory $0's max size.
+(module
+  ;; CHECK:      (import "" "" (memory $0 0 0))
+  (import "" "" (memory $0 0 0))
+  ;; CHECK:      (memory $1 1 1)
+  (memory $1 1 1)
+  (data $0 (i32.const 0) "")
+  ;; CHECK:      (data $1 (i32.const 1) "")
+  (data $1 (i32.const 1) "")
+  (data $2 (memory $1) (i32.const 0) "dead")
+)
+
+;; Segment $1 ends outside memory $0's initial size. Since the memory is not
+;; imported, its actual size during initialization is equal to its initial size,
+;; so the trap is still guaranteed.
 (module
   ;; CHECK:      (memory $0 0)
   (memory $0 0)
@@ -408,17 +445,23 @@
   (data $2 (memory $1) (i32.const 0) "dead")
 )
 
+;; Not a guaranteed trap: the initial size of an imported memory may be greater
+;; than its initial size. When simulating bounds-check behavior, we use the
+;; "known size" of a memory to track how large it must be, for all previous
+;; initializations to have succeeded.
 (module
-  ;; CHECK:      (import "" "" (memory $0 0 0))
-  (import "" "" (memory $0 0 0))
+  ;; CHECK:      (import "" "" (memory $0 0))
+  (import "" "" (memory $0 0))
   ;; CHECK:      (memory $1 1 1)
   (memory $1 1 1)
   (data $0 (i32.const 0) "")
   ;; CHECK:      (data $1 (i32.const 1) "")
   (data $1 (i32.const 1) "")
+  ;; CHECK:      (data $2 (memory $1) (i32.const 0) "dead")
   (data $2 (memory $1) (i32.const 0) "dead")
 )
 
+;; Two in-bounds segments and an out-of-bounds segment.
 (module
   ;; CHECK:      (global $0 i32 (i32.const 64))
   (global $0 i32 (i32.const 64))
@@ -435,19 +478,6 @@
   (data $3 (memory $1) (i32.const 0) "dead")
 )
 
-;; Not a guaranteed trap: the imported memory could be longer.
-(module
-  ;; CHECK:      (import "" "" (memory $0 0))
-  (import "" "" (memory $0 0))
-  ;; CHECK:      (memory $1 1 1)
-  (memory $1 1 1)
-  (data $0 (i32.const 0) "")
-  ;; CHECK:      (data $1 (i32.const 1) "")
-  (data $1 (i32.const 1) "")
-  ;; CHECK:      (data $2 (memory $1) (i32.const 0) "dead")
-  (data $2 (memory $1) (i32.const 0) "dead")
-)
-
 ;; Merged segment, trap segment, and two dead segments.
 (module
   ;; CHECK:      (memory $0 1 1)
@@ -461,7 +491,9 @@
   (data $4 (i32.const 65536) "foo")
 )
 
-;; Test for address overflow.
+;; Test for address overflow. Note that a module may validly contain a 2^64-byte
+;; memory and a data segment ending at offset 2^64 exactly, but the pass rejects
+;; that particular edge case as a fatal error.
 (module
   ;; CHECK:      (import "" "" (memory $0 i64 0))
   (import "" "" (memory $0 i64 0))
@@ -476,7 +508,13 @@
   (data $3 (memory $1) (i32.const 0) "dead")
 )
 
-;; Tests for bounds checks and flushing.
+;; Tests for bounds checks and flushing. We use a separate memory to distinguish
+;; between bounds-check behavior and flushing behavior.
+
+;; Before each point that a bounds-check trap may occur, all merged segments in
+;; all memories should be flushed. After a trap, the embedder can observe any
+;; prior modifications to imported memories, but to be conservative we simply
+;; flush all memories.
 (module
   ;; CHECK:      (import "" "" (memory $0 0))
   (import "" "" (memory $0 0))
@@ -503,6 +541,8 @@
   (data $9 (memory $1) (i32.const 0) "post")
 )
 
+;; The same test, but writing in reverse order. Only segment $1 may trigger a
+;; bounds-check trap: all future segments end earlier than $1 does.
 (module
   ;; CHECK:      (import "" "" (memory $0 0))
   (import "" "" (memory $0 0))
@@ -528,7 +568,9 @@
   (data $9 (memory $1) (i32.const 0) "post")
 )
 
-
+;; Segments written in forward and reverse order. Segments $1 to $4 each cause a
+;; flush: but after segment $5 causes a flush, segments $5 to $8 may be freely
+;; merged.
 (module
   ;; CHECK:      (import "" "" (memory $0 0 (pagesize 1)))
   (import "" "" (memory $0 0 (pagesize 1)))
@@ -553,6 +595,7 @@
   (data $9 (memory $1) (i32.const 0) "post")
 )
 
+;; Bounds-check behavior approaching the offset 2^64-1 boundary.
 (module
   ;; CHECK:      (import "" "" (memory $0 i64 0 (pagesize 1)))
   (import "" "" (memory $0 i64 0 (pagesize 1)))
@@ -573,6 +616,12 @@
   (data $6 (memory $1) (i32.const 0) "post")
 )
 
+;; After previous segments are flushed, the data in a bounds-check segment need
+;; not be flushed immediately: it can be freely merged with and overwritten by
+;; later segments. But at the next flush, we must be careful to output this
+;; merged segment before any other segments, so that it has the same
+;; bounds-check behavior. We track this based on the start address of the last
+;; page asserted to exist from a bounds check.
 (module
   ;; CHECK:      (import "" "" (memory $0 0 (pagesize 1)))
   (import "" "" (memory $0 0 (pagesize 1)))
@@ -593,6 +642,9 @@
 )
 
 ;; Tests for near-adjacent merge algorithm and zero-fill tracking.
+
+;; Basic near-adjacent merge, in forward and reverse order. It should operate
+;; independently on different memories.
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -610,6 +662,8 @@
   (data $7 (memory $1) (i32.const 0) "FOO")
 )
 
+;; Since the memories are imported in this case, we don't know what data lies
+;; in the gaps, so a near-adjacent merge is not possible.
 (module
   ;; CHECK:      (import "" "" (memory $0 1 1))
   (import "" "" (memory $0 1 1))
@@ -639,6 +693,9 @@
   (data $7 (memory $1) (i32.const 0) "FOO")
 )
 
+;; Near-adjacent merge, in forward and reverse order, is possible before a
+;; nonempty non-constant-offset segment, but not afterward, since we don't know
+;; where it may have written its data.
 (module
   ;; CHECK:      (global $0 i32 (i32.const 0))
   (global $0 i32 (i32.const 0))
@@ -669,6 +726,10 @@
 )
 
 ;; Tests for near-adjacent merge algorithm and size heuristic.
+
+;; Near-adjacent merge is universally applied for small gaps, acting as an
+;; extension of overlapping and adjacent merge. It works in forward and reverse
+;; order.
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -698,6 +759,10 @@
   (data $15 (i32.const 112) "foo")
 )
 
+;; For large gaps, the size heuristic depends on the encoded size of the offset.
+;; In this case, we influence the merge decision by alternately increasing the
+;; gap size and increasing the offset size. (The heuristic similarly depends on
+;; the encoded size of the data length, but that is more difficult to test.)
 (module
   ;; CHECK:      (memory $0 17 17)
   (memory $0 17 17)
@@ -743,7 +808,10 @@
   (data $19 (i32.const 1048608) "foo")
 )
 
-;; Tests for near-adjacent merge algorithm and flushing.
+;; Tests for near-adjacent merge algorithm and flushing. We use empty
+;; non-constant-offset segments simply to force a flush.
+
+;; Near-adjacent merge uses data from previously flushed segments to fill gaps.
 (module
   ;; CHECK:      (global $0 i32 (i32.const 0))
   (global $0 i32 (i32.const 0))
@@ -758,6 +826,7 @@
   (data $3 (i32.const 0) "foo")
 )
 
+;; Flushed segment data is combined across repeated flushes.
 (module
   ;; CHECK:      (global $0 i32 (i32.const 0))
   (global $0 i32 (i32.const 0))
@@ -776,6 +845,8 @@
   (data $5 (i32.const 0) "foo")
 )
 
+;; In this case, the memory is imported, and the flushed segment data is too
+;; short to fill the gap, so near-adjacent merge is not possible.
 (module
   ;; CHECK:      (import "" "" (memory $0 1 1))
   (import "" "" (memory $0 1 1))
@@ -791,6 +862,8 @@
   (data $3 (i32.const 6) "pez")
 )
 
+;; In this case, even though the memory is imported, the flushed segment data
+;; completely fills the gap, so near-adjacent merge is possible.
 (module
   ;; CHECK:      (import "" "" (memory $0 1 1))
   (import "" "" (memory $0 1 1))
@@ -805,6 +878,8 @@
   (data $3 (i32.const 6) "pez")
 )
 
+;; Additional segments can be merged into a bounds-check segment via
+;; near-adjacent merge, and it is still flushed before any other segments.
 (module
   ;; CHECK:      (import "" "" (memory $0 1))
   (import "" "" (memory $0 1))
@@ -817,8 +892,6 @@
   (data $2 (i32.const 65536) "pez")
   ;; CHECK:      (data $3 (i32.const 65532) "bar.pez")
   (data $3 (i32.const 65532) "bar")
-  ;; CHECK:      (data $4 (i32.const 131072) "qux")
-  (data $4 (i32.const 131072) "qux")
-  ;; CHECK:      (data $5 (i32.const 0) "foo")
-  (data $5 (i32.const 0) "foo")
+  ;; CHECK:      (data $4 (i32.const 0) "foo")
+  (data $4 (i32.const 0) "foo")
 )

From ce65f6c1d8aa1cd375c9f80c8cd0c9cdaeae3b39 Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Wed, 29 Apr 2026 20:08:53 -0400
Subject: [PATCH 16/18] Add MergeDataSegments to default opt passes

---
 src/passes/pass.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp
index 3a7712b5b82..ddffa335edf 100644
--- a/src/passes/pass.cpp
+++ b/src/passes/pass.cpp
@@ -824,6 +824,7 @@ void PassRunner::addDefaultGlobalOptimizationPostPasses() {
   } else {
     addIfNoDWARFIssues("simplify-globals");
   }
+  addIfNoDWARFIssues("merge-data-segments");
   addIfNoDWARFIssues("remove-unused-module-elements");
   if (options.optimizeLevel >= 2 && wasm->features.hasStrings()) {
     // Gather strings to globals right before reorder-globals, which will then

From 191c51df6453e9064d303825df311140fab02f26 Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Fri, 1 May 2026 15:19:04 -0400
Subject: [PATCH 17/18] Expand test comments further

---
 test/lit/passes/merge-data-segments-tnh.wast | 10 ++-
 test/lit/passes/merge-data-segments.wast     | 75 +++++++++++++++-----
 2 files changed, 64 insertions(+), 21 deletions(-)

diff --git a/test/lit/passes/merge-data-segments-tnh.wast b/test/lit/passes/merge-data-segments-tnh.wast
index 2e74a27c3ca..9a8d9d8565d 100644
--- a/test/lit/passes/merge-data-segments-tnh.wast
+++ b/test/lit/passes/merge-data-segments-tnh.wast
@@ -5,6 +5,7 @@
 ;; non-TNH case. We use a second memory for the dead segment to distinguish
 ;; between the trap behavior and the drop behavior.
 
+;; Segment $1 ends outside memory $0's max size.
 (module
   ;; CHECK:      (memory $0 0 0)
   (memory $0 0 0)
@@ -16,6 +17,7 @@
   (data $2 (memory $1) (i32.const 0) "dead")
 )
 
+;; Segment $1 ends outside memory $0's max size.
 (module
   ;; CHECK:      (import "" "" (memory $0 0 0))
   (import "" "" (memory $0 0 0))
@@ -27,6 +29,8 @@
   (data $2 (memory $1) (i32.const 0) "dead")
 )
 
+;; Segment $1 ends outside memory $0's initial size, and the memory cannot be
+;; any larger since it is not imported.
 (module
   ;; CHECK:      (memory $0 0)
   (memory $0 0)
@@ -51,7 +55,8 @@
 )
 
 ;; Nonempty non-constant-offset segments trigger no bounds checks under TNH:
-;; segment $1 does not cause memory $0 to be flushed.
+;; segment $1 overwriting memory $1 does not cause memory $0 to be flushed, so
+;; the other segments are merged freely.
 (module
   ;; CHECK:      (global $0 i32 (i32.const 0))
   (global $0 i32 (i32.const 0))
@@ -127,7 +132,8 @@
   (data $9 (memory $1) (i32.const 0) "post")
 )
 
-;; In this case, reordering input segments does not change flushing behavior.
+;; Since no flushes occur, reordering input segments in the previous test does
+;; not change the shape of the final data segments, only their contents.
 (module
   ;; CHECK:      (import "" "" (memory $0 0))
   (import "" "" (memory $0 0))
diff --git a/test/lit/passes/merge-data-segments.wast b/test/lit/passes/merge-data-segments.wast
index 63c24a1c087..2d271eb328a 100644
--- a/test/lit/passes/merge-data-segments.wast
+++ b/test/lit/passes/merge-data-segments.wast
@@ -2,8 +2,15 @@
 ;; RUN: foreach %s %t wasm-opt -all --merge-data-segments -S -o - | filecheck %s
 
 ;; Basic tests for the merge algorithm: it should merge adjacent and overlapping
-;; segments in their order of appearance.
+;; segments in their order of appearance. The name of the merged segment should
+;; be the name of an input segment with the lowest start address. As a
+;; tiebreaker, when multiple input segments have the same start address, the
+;; name is taken from the first-appearing input segment with the lowest start
+;; address.
 
+;; A typical forward merge of adjacent segments. Segments that are directly
+;; adjacent can always be merged, except in the edge case that the merged data
+;; would be too long for a single data segment.
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -14,6 +21,10 @@
   (data $3 (i32.const 12) "qux")
 )
 
+;; The same merge, but with the segments appearing in reverse order. The
+;; resulting data should be the same, since adjacent merge respects address
+;; order instead of appearance order. Even though segment $3 appears last, the
+;; merged segment takes its name since it has the lowest start address.
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -24,6 +35,13 @@
   (data $3 (i32.const 3) "foo")
 )
 
+;; The following tests cover overlapping merge cases. Per the spec, active data
+;; segments are written to their memories in order of appearance, so when they
+;; overlap, we should take the data from the last-appearing segment for each
+;; address.
+
+;; Growing segments with a common start address. We take the name of segment $0
+;; since it is the first-appearing segment at address 0.
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -34,6 +52,7 @@
   (data $3 (i32.const 0) "quuuux")
 )
 
+;; Growing segments with a common end address.
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -44,6 +63,7 @@
   (data $3 (i32.const 0) "quuuux")
 )
 
+;; Growing segments with differing start and end addresses.
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -54,6 +74,8 @@
   (data $3 (i32.const 0) "quuuuuuux")
 )
 
+;; Shrinking segments with a common start address. Again, we take the name of
+;; segment $0 since it is the first-appearing segment at address 0.
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -64,6 +86,7 @@
   (data $3 (i32.const 0) "foo")
 )
 
+;; Shrinking segments with a common end address.
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -74,6 +97,7 @@
   (data $3 (i32.const 3) "foo")
 )
 
+;; Shrinking segments with differing start and end addresses.
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -84,6 +108,7 @@
   (data $3 (i32.const 3) "foo")
 )
 
+;; Long forward chain of partially overlapping merges.
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -98,6 +123,8 @@
   (data $7 (i32.const 14) "QUX")
 )
 
+;; Long reverse chain of partially overlapping merges. We take the name of
+;; segment $7 since it has the lowest start address.
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -112,6 +139,7 @@
   (data $7 (i32.const 0) "foo")
 )
 
+;; Repeatedly overwriting different parts of a merged segment.
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -126,6 +154,8 @@
   (data $7 (i32.const 0) "QUX")
 )
 
+;; Overwriting a merged segment while expanding it on both sides. We use the
+;; name of segment $2 since it has the lowest start address.
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -137,6 +167,8 @@
   (data $4 (i32.const 7) "foo")
 )
 
+;; Interleaving merges between two different merged segments, in forward and
+;; reverse.
 (module
   ;; CHECK:      (memory $0 1 1)
   (memory $0 1 1)
@@ -197,9 +229,9 @@
 
 ;; Tests for passive segments and instruction rewriting.
 
-;; Basic rewriting: Passive segments are output before anything else, and
+;; Basic rewriting: All passive segments appear first in the output, and
 ;; references to them are unmodified; references to active segments are renamed
-;; to the first active segment.
+;; to the first-appearing active segment in the output.
 (module
   ;; CHECK:      (type $1 (array (mut i8)))
 
@@ -284,12 +316,12 @@
   )
 )
 
-;; Usage of the empty target segment. Again, passive segments are output before
-;; anything else, and references to them are left unmodified. But in this case,
+;; Usage of the empty target segment. Again, all passive segments appear first
+;; in the output, and references to them are left unmodified. But in this case,
 ;; no active segments are left in the output to act as the target segment to
 ;; rename into. So if the renamer detects a reference to an active segment, we
-;; add back the first empty active segment from the input to act as the target,
-;; in this case segment $1.
+;; add back the first-appearing empty active segment in the input to act as the
+;; target, in this case segment $1.
 (module
   ;; CHECK:      (type $1 (array (mut i8)))
 
@@ -374,6 +406,13 @@
   )
 )
 
+;; Tests for guaranteed traps. A "trap segment" is an active segment which is
+;; statically out-of-bounds and whose initialization will always raise a trap
+;; during module instantiation. After a trap segment, all remaining active
+;; segments should be dropped. In most of these tests, we use a second memory
+;; for the dead segment to distinguish between the trap behavior and the drop
+;; behavior, since dropping should not depend on the memory.
+
 ;; Passive segments following a trap segment should be retained, and references
 ;; to dead active segments should be renamed to a live segment.
 (module
@@ -402,10 +441,6 @@
   )
 )
 
-;; Tests for guaranteed traps: remaining active segments should be dropped. We
-;; use a second memory for the dead segment to distinguish between the trap
-;; behavior and the drop behavior.
-
 ;; Segment $1 ends outside memory $0's max size.
 (module
   ;; CHECK:      (memory $0 0 0)
@@ -447,8 +482,8 @@
 
 ;; Not a guaranteed trap: the initial size of an imported memory may be greater
 ;; than its initial size. When simulating bounds-check behavior, we use the
-;; "known size" of a memory to track how large it must be, for all previous
-;; initializations to have succeeded.
+;; "known size" of a memory to track how large it must presently be, for all
+;; previous initializations to have succeeded.
 (module
   ;; CHECK:      (import "" "" (memory $0 0))
   (import "" "" (memory $0 0))
@@ -461,7 +496,7 @@
   (data $2 (memory $1) (i32.const 0) "dead")
 )
 
-;; Two in-bounds segments and an out-of-bounds segment.
+;; Two in-bounds segments and an out-of-bounds segment for a small memory.
 (module
   ;; CHECK:      (global $0 i32 (i32.const 64))
   (global $0 i32 (i32.const 64))
@@ -492,8 +527,8 @@
 )
 
 ;; Test for address overflow. Note that a module may validly contain a 2^64-byte
-;; memory and a data segment ending at offset 2^64 exactly, but the pass rejects
-;; that particular edge case as a fatal error.
+;; memory and active data segments ending at offset 2^64 exactly, but the pass
+;; rejects that particular edge case as a fatal error.
 (module
   ;; CHECK:      (import "" "" (memory $0 i64 0))
   (import "" "" (memory $0 i64 0))
@@ -509,12 +544,14 @@
 )
 
 ;; Tests for bounds checks and flushing. We use a separate memory to distinguish
-;; between bounds-check behavior and flushing behavior.
+;; between bounds-check behavior and flushing behavior, since after a potential
+;; bounds-check trap, all memories should be flushed at once.
 
 ;; Before each point that a bounds-check trap may occur, all merged segments in
 ;; all memories should be flushed. After a trap, the embedder can observe any
-;; prior modifications to imported memories, but to be conservative we simply
-;; flush all memories.
+;; prior modifications to imported memories, and all of these modifications
+;; should be preserved to avoid differences in behavior. To simplify the logic,
+;; we flush all memories, not just imported memories.
 (module
   ;; CHECK:      (import "" "" (memory $0 0))
   (import "" "" (memory $0 0))

From 1666b7e8aadd00bebfafcef977cf88459a0fc61b Mon Sep 17 00:00:00 2001
From: LegionMammal978 <mattlloydhouse@gmail.com>
Date: Mon, 4 May 2026 17:05:55 -0400
Subject: [PATCH 18/18] Expand test comments further

---
 test/lit/passes/merge-data-segments-tnh.wast | 9 ++++++---
 test/lit/passes/merge-data-segments.wast     | 7 ++++++-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/test/lit/passes/merge-data-segments-tnh.wast b/test/lit/passes/merge-data-segments-tnh.wast
index 9a8d9d8565d..cb8c941719b 100644
--- a/test/lit/passes/merge-data-segments-tnh.wast
+++ b/test/lit/passes/merge-data-segments-tnh.wast
@@ -1,9 +1,12 @@
 ;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
 ;; RUN: foreach %s %t wasm-opt -all --merge-data-segments -tnh -S -o - | filecheck %s
 
-;; Guaranteed traps remain guaranteed under TNH: these are identical to the
-;; non-TNH case. We use a second memory for the dead segment to distinguish
-;; between the trap behavior and the drop behavior.
+;; Guaranteed traps remain guaranteed under TNH. It's nontrivial to "drop
+;; absolutely everything" from a module that can never be instantiated, so to
+;; simplify the implementation, we just reuse the non-TNH behavior. Thus, these
+;; are identical to the non-TNH tests for guaranteed traps. We use a second
+;; memory for the dead segment to distinguish between the trap behavior and the
+;; drop behavior.
 
 ;; Segment $1 ends outside memory $0's max size.
 (module
diff --git a/test/lit/passes/merge-data-segments.wast b/test/lit/passes/merge-data-segments.wast
index 2d271eb328a..e1b8c3e2e7f 100644
--- a/test/lit/passes/merge-data-segments.wast
+++ b/test/lit/passes/merge-data-segments.wast
@@ -227,7 +227,12 @@
   (data $7 (memory $3) (i32.const 3) "foo")
 )
 
-;; Tests for passive segments and instruction rewriting.
+;; Tests for passive segments and instruction rewriting. The spec demands that
+;; every active segment effectively go through data.drop after initialization,
+;; so every time an explicit instruction refers to an active segment, it has the
+;; same effect as referring to an empty passive segment. When manipulating
+;; segments, we just have to find some active segment name in the output to
+;; replace all active segment names appearing in instructions in the input.
 
 ;; Basic rewriting: All passive segments appear first in the output, and
 ;; references to them are unmodified; references to active segments are renamed