From 91c30a0ed34bc34429ea26ca785a8ff003564795 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 23 Apr 2026 15:28:40 -0700 Subject: [PATCH 01/47] start --- src/tools/fuzzing.h | 8 ++++++++ src/tools/wasm-opt.cpp | 10 ++++++++++ 2 files changed, 18 insertions(+) diff --git a/src/tools/fuzzing.h b/src/tools/fuzzing.h index 78057877031..8d54ffd0d2f 100644 --- a/src/tools/fuzzing.h +++ b/src/tools/fuzzing.h @@ -132,6 +132,9 @@ class TranslateToFuzzReader { void setPreserveImportsAndExports(bool preserveImportsAndExports_) { preserveImportsAndExports = preserveImportsAndExports_; } + void setAgainstJS(bool againstJS_) { + againstJS = againstJS_; + } void setImportedModule(std::string importedModuleName); void build(); @@ -159,6 +162,11 @@ class TranslateToFuzzReader { // existing testcase (using initial-content). bool preserveImportsAndExports = false; + // Whether the wasm will be used from JS and in no other way. This lets us + // modify the wasm in ways that keep it valid from JS's point of view, but + // which might cause issues when linked against wasm or used otherwise. + bool againstJS = false; + // An optional module to import from. std::optional importedModule; diff --git a/src/tools/wasm-opt.cpp b/src/tools/wasm-opt.cpp index 5c2807c25e4..23d7b2ef191 100644 --- a/src/tools/wasm-opt.cpp +++ b/src/tools/wasm-opt.cpp @@ -87,6 +87,7 @@ int main(int argc, const char* argv[]) { bool fuzzMemory = true; bool fuzzOOB = true; bool fuzzPreserveImportsAndExports = false; + bool fuzzAgainstJS = false; std::string fuzzImport; std::string emitSpecWrapper; std::string emitWasm2CWrapper; @@ -212,6 +213,14 @@ For more on how to optimize effectively, see [&](Options* o, const std::string& arguments) { fuzzPreserveImportsAndExports = true; }) + .add("--fuzz-against-js", + "", + "modify the wasm in valid ways that assume it is used only from JS", + WasmOptOption, + Options::Arguments::Zero, + [&](Options* o, const std::string& arguments) { + fuzzAgainstJS = true; + }) .add( "--fuzz-import", "", @@ -349,6 +358,7 @@ For more on how to optimize effectively, see reader.setAllowMemory(fuzzMemory); reader.setAllowOOB(fuzzOOB); reader.setPreserveImportsAndExports(fuzzPreserveImportsAndExports); + reader.setAgainstJS(fuzzAgainstJS); if (!fuzzImport.empty()) { reader.setImportedModule(fuzzImport); } From 65c2a31a82d85b553ae749903ca2fd84da396484 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 23 Apr 2026 15:52:40 -0700 Subject: [PATCH 02/47] work --- src/tools/fuzzing.h | 4 +++ src/tools/fuzzing/fuzzing.cpp | 49 +++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/src/tools/fuzzing.h b/src/tools/fuzzing.h index 8d54ffd0d2f..121d720297c 100644 --- a/src/tools/fuzzing.h +++ b/src/tools/fuzzing.h @@ -417,6 +417,10 @@ class TranslateToFuzzReader { void fixAfterChanges(Function* func); void modifyInitialFunctions(); + // Mutate the JS boundary, that is, make changes on the wasm side that JS + // would not be broken by (JS does not care about types). + void mutateJSBoundary(); + // Note a global for use during code generation. void useGlobalLater(Global* global); diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index e7696532472..6b16ffda1fb 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -413,6 +413,11 @@ void TranslateToFuzzReader::build() { PassRunner runner(&wasm); ReFinalize().run(&runner, &wasm); ReFinalize().walkModuleCode(&wasm); + + // If fuzzing against JS, we can refine + if (againstJS) { + mutateJSBoundary(); + } } void TranslateToFuzzReader::setupMemory() { @@ -2389,6 +2394,50 @@ void TranslateToFuzzReader::modifyInitialFunctions() { } } +void TranslateToFuzzReader::mutateJSBoundary() { + assert(againstJS); + + // Scan to find functions whose address is taken. We cannot modify their + // signatures at all. + + struct FunctionInfo { + std::atomic refs; + }; + + using NameInfoMap = std::unordered_map; + + struct FunctionInfoScanner + : public WalkerPass> { + bool isFunctionParallel() override { return true; } + + bool modifiesBinaryenIR() override { return false; } + + FunctionInfoScanner(NameInfoMap& map) : map(map) {} + + std::unique_ptr create() override { + return std::make_unique(map); + } + + void visitRefFunc(RefFunc* curr) { + map[curr->func].refs++; + } + }; + + NameInfoMap map; + FunctionInfoScanner scanner(map); + PassRunner runner(&wasm); + scanner.run(&runner, &wasm); + scanner.walkModuleCode(&wasm); + + // If a function does not have its address taken, we can refine types. This is + // safe because we will still send and receive the right number of values (we + // are not changing the arity, which JS might notice). + // + // First, refine params sent to imports. + for ( + // Second, refine results sent from exports. +} + void TranslateToFuzzReader::dropToLog(Function* func) { // Don't always do this. if (oneIn(2)) { From d08c29f8acf836077d074a699e071c2a1297ad39 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 23 Apr 2026 16:15:25 -0700 Subject: [PATCH 03/47] work --- src/tools/fuzzing/fuzzing.cpp | 53 ++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 6b16ffda1fb..f2d701a5dad 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -19,6 +19,7 @@ #include "ir/glbs.h" #include "ir/iteration.h" #include "ir/local-structural-dominance.h" +#include "ir/lubs.h" #include "ir/module-utils.h" #include "ir/names.h" #include "ir/subtype-exprs.h" @@ -2401,7 +2402,11 @@ void TranslateToFuzzReader::mutateJSBoundary() { // signatures at all. struct FunctionInfo { - std::atomic refs; + // Whether there are references to this function itself. + std::atomic reffed = false; + + // Calls to imports from this function. + std::vector callImports; }; using NameInfoMap = std::unordered_map; @@ -2418,24 +2423,64 @@ void TranslateToFuzzReader::mutateJSBoundary() { return std::make_unique(map); } + void visitCall(Call* curr) { + if (getModule()->getFunction(curr->target)->imported()) { + map[curr->func].callImports.push_back(curr); + } + } + void visitRefFunc(RefFunc* curr) { - map[curr->func].refs++; + map[curr->func].reffed = true; } }; NameInfoMap map; FunctionInfoScanner scanner(map); PassRunner runner(&wasm); + scanner.setModule(&wasm); scanner.run(&runner, &wasm); scanner.walkModuleCode(&wasm); // If a function does not have its address taken, we can refine types. This is // safe because we will still send and receive the right number of values (we - // are not changing the arity, which JS might notice). - // + // are not changing the arity, which JS might notice). Each place we may + // refine, we are given the maximum refinement and pick a random type between + // it and the old type. + auto maybeRefine = [](Type old, Type new_) { + if (new_ == Type::unreachable) { + // No values reach this place, so it does not matter. + return old; + } + + assert(Type::isSubType(new_, old)); + }; + // First, refine params sent to imports. for ( + // Second, refine results sent from exports. + for (auto& exp : wasm.exports) { + if (exp->kind != ExternalKind::Function) { + continue; + } + auto name = exp->getInternalName(); + if (map[name].reffed) { + continue; + } + + // Find the LUB, which is the most we can refine. + auto* func = wasm.getFunction(name); + auto lub = LUB::getResultsLUB(func, wasm); + + // Refine. + auto oldResults = func->getResults(); + assert(oldResults.size() == lub.size()); + std::vector newResults; + for (Index i = 0; i < lub.size(); i++) { + newResults.push_back(maybeRefine(oldResults[i], lub[i])); + } + setResults(Type(newResults)); + } } void TranslateToFuzzReader::dropToLog(Function* func) { From f3a07c8ec5d43c95cce925181037d8d4c52495cc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 23 Apr 2026 16:25:03 -0700 Subject: [PATCH 04/47] work --- src/tools/fuzzing/fuzzing.cpp | 37 ++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index f2d701a5dad..c5b86eb28e8 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2447,12 +2447,43 @@ void TranslateToFuzzReader::mutateJSBoundary() { // refine, we are given the maximum refinement and pick a random type between // it and the old type. auto maybeRefine = [](Type old, Type new_) { - if (new_ == Type::unreachable) { - // No values reach this place, so it does not matter. + if (!new_.isRef()) { + // A non-reference like i32, or unreachable (no values reach this place), + // so it does not matter. return old; } - assert(Type::isSubType(new_, old)); + // Pick the heap type. + auto oldHeapType = old.getHeapType(); + auto newHeapType = new_.getHeapType(); + assert(HeapType::isSubType(newHeapType, oldHeapType)); + std::vector options; + options.push_back(oldHeapType); + while (newHeapType != oldHeapType) { + options.push_back(newHeapType); + // We continue until we reach the old type. + auto next = newHeapType.getSuperType(); + assert(next); + newHeapType = *next; + } + newHeapType = pick(options); + + // Pick the nullability. + auto oldNullability = old.getNullability(); + auto newNullability = new_.getNullability(); + if (newNullability != oldNullability) { + newNullability = getNullability(); + } + + // Pick the exactness. + auto oldExactness = old.getExactness(); + auto newExactness = new_.getExactness(); + if (newExactness != oldExactness) { + // TODO: once getExactness is fixed, use + newExactness = oneIn(2) ? Exact : Inexact; + } + + return Type(newHeapType, newNullability, newExactness); }; // First, refine params sent to imports. From 823c6650e72232b8fdcb83fc22212fa24a6b71b1 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 23 Apr 2026 16:35:04 -0700 Subject: [PATCH 05/47] work --- src/tools/fuzzing/fuzzing.cpp | 44 ++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index c5b86eb28e8..41dc220800f 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2486,8 +2486,43 @@ void TranslateToFuzzReader::mutateJSBoundary() { return Type(newHeapType, newNullability, newExactness); }; - // First, refine params sent to imports. - for ( + // First, refine params sent to imports. Gather the LUB sent to each import, + // and then refine. + std::unordered_map paramLUBs; + for (auto& [_, info]) { + for (auto* call : info.callImports) { + std::vector sent; + for (auto* operand : call->operands) { + sent.push_back(operand->type); + } + paramLUBs[call->target].note(Type(sent)); + } + } + + for (auto& name : wasm.functions) { + auto* func = wasm.getFunction(name); + if (!func->imported()) { + continue; + } + if (map[name].reffed) { + continue; + } + + // Find the LUB, which is the most we can refine. + auto lub = paramLUBs[name]; + if (!lub.noted()) { + continue; + } + + // Refine. + auto oldParams = func->getParams(); + assert(oldParams.size() == lub.size()); + std::vector newParams; + for (Index i = 0; i < lub.size(); i++) { + newParams.push_back(maybeRefine(oldParams[i], lub[i])); + } + setParams(Type(newParams)); + } // Second, refine results sent from exports. for (auto& exp : wasm.exports) { @@ -2499,9 +2534,12 @@ void TranslateToFuzzReader::mutateJSBoundary() { continue; } - // Find the LUB, which is the most we can refine. + // Find the LUB. auto* func = wasm.getFunction(name); auto lub = LUB::getResultsLUB(func, wasm); + if (!lub.noted()) { + continue; + } // Refine. auto oldResults = func->getResults(); From 767c8b4986f060c90cc44f4b2dbdf42cd28b8d23 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 23 Apr 2026 16:35:13 -0700 Subject: [PATCH 06/47] work --- src/tools/fuzzing.h | 4 +--- src/tools/fuzzing/fuzzing.cpp | 4 +--- src/tools/wasm-opt.cpp | 15 +++++++-------- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/tools/fuzzing.h b/src/tools/fuzzing.h index 121d720297c..e06160332b0 100644 --- a/src/tools/fuzzing.h +++ b/src/tools/fuzzing.h @@ -132,9 +132,7 @@ class TranslateToFuzzReader { void setPreserveImportsAndExports(bool preserveImportsAndExports_) { preserveImportsAndExports = preserveImportsAndExports_; } - void setAgainstJS(bool againstJS_) { - againstJS = againstJS_; - } + void setAgainstJS(bool againstJS_) { againstJS = againstJS_; } void setImportedModule(std::string importedModuleName); void build(); diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 41dc220800f..680c2703bc2 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2429,9 +2429,7 @@ void TranslateToFuzzReader::mutateJSBoundary() { } } - void visitRefFunc(RefFunc* curr) { - map[curr->func].reffed = true; - } + void visitRefFunc(RefFunc* curr) { map[curr->func].reffed = true; } }; NameInfoMap map; diff --git a/src/tools/wasm-opt.cpp b/src/tools/wasm-opt.cpp index 23d7b2ef191..f593428d2b6 100644 --- a/src/tools/wasm-opt.cpp +++ b/src/tools/wasm-opt.cpp @@ -213,14 +213,13 @@ For more on how to optimize effectively, see [&](Options* o, const std::string& arguments) { fuzzPreserveImportsAndExports = true; }) - .add("--fuzz-against-js", - "", - "modify the wasm in valid ways that assume it is used only from JS", - WasmOptOption, - Options::Arguments::Zero, - [&](Options* o, const std::string& arguments) { - fuzzAgainstJS = true; - }) + .add( + "--fuzz-against-js", + "", + "modify the wasm in valid ways that assume it is used only from JS", + WasmOptOption, + Options::Arguments::Zero, + [&](Options* o, const std::string& arguments) { fuzzAgainstJS = true; }) .add( "--fuzz-import", "", From c7d7f1b6cd3fbcc6725d7f92f6245221efbba883 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 23 Apr 2026 16:36:36 -0700 Subject: [PATCH 07/47] work --- src/tools/fuzzing/fuzzing.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 680c2703bc2..eef60d77354 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2417,6 +2417,8 @@ void TranslateToFuzzReader::mutateJSBoundary() { bool modifiesBinaryenIR() override { return false; } + NameInfoMap& map; + FunctionInfoScanner(NameInfoMap& map) : map(map) {} std::unique_ptr create() override { @@ -2519,7 +2521,7 @@ void TranslateToFuzzReader::mutateJSBoundary() { for (Index i = 0; i < lub.size(); i++) { newParams.push_back(maybeRefine(oldParams[i], lub[i])); } - setParams(Type(newParams)); + func->setParams(Type(newParams)); } // Second, refine results sent from exports. @@ -2546,7 +2548,7 @@ void TranslateToFuzzReader::mutateJSBoundary() { for (Index i = 0; i < lub.size(); i++) { newResults.push_back(maybeRefine(oldResults[i], lub[i])); } - setResults(Type(newResults)); + func->setResults(Type(newResults)); } } From fc981c784ffcf020eca2ec610b7e95806741d31b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 23 Apr 2026 16:37:15 -0700 Subject: [PATCH 08/47] work --- src/tools/fuzzing/fuzzing.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index eef60d77354..bd73bbe0bb9 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2427,7 +2427,7 @@ void TranslateToFuzzReader::mutateJSBoundary() { void visitCall(Call* curr) { if (getModule()->getFunction(curr->target)->imported()) { - map[curr->func].callImports.push_back(curr); + map[curr->target].callImports.push_back(curr); } } @@ -2446,7 +2446,7 @@ void TranslateToFuzzReader::mutateJSBoundary() { // are not changing the arity, which JS might notice). Each place we may // refine, we are given the maximum refinement and pick a random type between // it and the old type. - auto maybeRefine = [](Type old, Type new_) { + auto maybeRefine = [&](Type old, Type new_) { if (!new_.isRef()) { // A non-reference like i32, or unreachable (no values reach this place), // so it does not matter. From ebb1106bdc95e5f46b614cd9cdff40adf3627d44 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 23 Apr 2026 16:48:50 -0700 Subject: [PATCH 09/47] work --- src/tools/fuzzing/fuzzing.cpp | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index bd73bbe0bb9..1a8068fab2c 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2489,7 +2489,7 @@ void TranslateToFuzzReader::mutateJSBoundary() { // First, refine params sent to imports. Gather the LUB sent to each import, // and then refine. std::unordered_map paramLUBs; - for (auto& [_, info]) { + for (auto& [_, info] : map) { for (auto* call : info.callImports) { std::vector sent; for (auto* operand : call->operands) { @@ -2499,27 +2499,27 @@ void TranslateToFuzzReader::mutateJSBoundary() { } } - for (auto& name : wasm.functions) { - auto* func = wasm.getFunction(name); + for (auto& func : wasm.functions) { if (!func->imported()) { continue; } - if (map[name].reffed) { + if (map[func->name].reffed) { continue; } // Find the LUB, which is the most we can refine. - auto lub = paramLUBs[name]; + auto lub = paramLUBs[func->name]; if (!lub.noted()) { continue; } // Refine. auto oldParams = func->getParams(); - assert(oldParams.size() == lub.size()); + auto lubType = lub.getLUB(); + assert(oldParams.size() == lubType.size()); std::vector newParams; - for (Index i = 0; i < lub.size(); i++) { - newParams.push_back(maybeRefine(oldParams[i], lub[i])); + for (Index i = 0; i < lubType.size(); i++) { + newParams.push_back(maybeRefine(oldParams[i], lubType[i])); } func->setParams(Type(newParams)); } @@ -2529,7 +2529,7 @@ void TranslateToFuzzReader::mutateJSBoundary() { if (exp->kind != ExternalKind::Function) { continue; } - auto name = exp->getInternalName(); + auto name = *exp->getInternalName(); if (map[name].reffed) { continue; } @@ -2543,10 +2543,11 @@ void TranslateToFuzzReader::mutateJSBoundary() { // Refine. auto oldResults = func->getResults(); - assert(oldResults.size() == lub.size()); + auto lubType = lub.getLUB(); + assert(oldResults.size() == lubType.size()); std::vector newResults; - for (Index i = 0; i < lub.size(); i++) { - newResults.push_back(maybeRefine(oldResults[i], lub[i])); + for (Index i = 0; i < lubType.size(); i++) { + newResults.push_back(maybeRefine(oldResults[i], lubType[i])); } func->setResults(Type(newResults)); } From fa2da6eb36a8e58dd44fac8a42e8e667eabd3eb3 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 23 Apr 2026 16:50:29 -0700 Subject: [PATCH 10/47] work --- test/lit/fuzz-preserve-imports-exports.wast | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/lit/fuzz-preserve-imports-exports.wast b/test/lit/fuzz-preserve-imports-exports.wast index e8cde8dadbe..d9867ff4710 100644 --- a/test/lit/fuzz-preserve-imports-exports.wast +++ b/test/lit/fuzz-preserve-imports-exports.wast @@ -57,3 +57,5 @@ ) ) + +how to test new flagg? From 2c3c192d1ab0246c7e27b083792a653df0dfd592 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 27 Apr 2026 12:13:18 -0700 Subject: [PATCH 11/47] help --- test/lit/help/wasm-opt.test | 4 ++++ test/unit/test_fuzz_preserve.py | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 test/unit/test_fuzz_preserve.py diff --git a/test/lit/help/wasm-opt.test b/test/lit/help/wasm-opt.test index d616e1cf085..c61196fe3e3 100644 --- a/test/lit/help/wasm-opt.test +++ b/test/lit/help/wasm-opt.test @@ -72,6 +72,10 @@ ;; CHECK-NEXT: --fuzz-preserve-imports-exports don't add imports and exports in ;; CHECK-NEXT: -ttf mode, and keep the start ;; CHECK-NEXT: +;; CHECK-NEXT: --fuzz-against-js modify the wasm in valid ways +;; CHECK-NEXT: that assume it is used only from +;; CHECK-NEXT: JS +;; CHECK-NEXT: ;; CHECK-NEXT: --fuzz-import a module to use as an import in ;; CHECK-NEXT: -ttf mode ;; CHECK-NEXT: diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py new file mode 100644 index 00000000000..d96bd7b27cd --- /dev/null +++ b/test/unit/test_fuzz_preserve.py @@ -0,0 +1,37 @@ +import subprocess + +from scripts.test import shared + +from . import utils + + +class InitialFuzzTest(utils.BinaryenTestCase): + def test_empty_initial(self): + # generate fuzz from random data + data = self.input_path('random_data.txt') + a = shared.run_process(shared.WASM_OPT + ['-ttf', '--print', data], + stdout=subprocess.PIPE).stdout + + # generate fuzz from random data with initial empty wasm + empty_wasm = self.input_path('empty.wasm') + b = shared.run_process( + shared.WASM_OPT + ['-ttf', '--print', data, + '--initial-fuzz=' + empty_wasm], + stdout=subprocess.PIPE).stdout + + # an empty initial wasm causes no changes + self.assertEqual(a, b) + + def test_small_initial(self): + data = self.input_path('random_data.txt') + hello_wat = self.input_path('hello_world.wat') + out = shared.run_process(shared.WASM_OPT + ['-ttf', '--print', data, + '--initial-fuzz=' + hello_wat], + stdout=subprocess.PIPE).stdout + + # the function should be there (perhaps with modified contents - don't + # check that) + self.assertIn('(export "add" (func $add))', out) + + # there should be other fuzz contents added as well + self.assertGreater(out.count('(export '), 1) From 53b143820e3d092b9a3e9638390af01d00bb6f19 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 27 Apr 2026 12:27:40 -0700 Subject: [PATCH 12/47] work --- test/unit/input/fuzz.wat | 22 ++++++++++++ test/unit/test_fuzz_preserve.py | 59 ++++++++++++++++----------------- 2 files changed, 51 insertions(+), 30 deletions(-) create mode 100644 test/unit/input/fuzz.wat diff --git a/test/unit/input/fuzz.wat b/test/unit/input/fuzz.wat new file mode 100644 index 00000000000..4f18f02f4dd --- /dev/null +++ b/test/unit/input/fuzz.wat @@ -0,0 +1,22 @@ +(module + ;; Two imports, one which will be reffed. + (import "module" "base" (func $import (param i32 f64) (result f32))) + (import "module" "base" (func $import-reffed (param i32 f64) (result f32))) + + ;; Two exports, one which will be reffed. + + (func $export (export "export") (param $0 i32) (param $1 f64) (result f32) + (drop + (ref.func $import-reffed) + ) + (drop + (ref.func $export-reffed) + ) + (f32.const 3.14159) + ) + + (func $export-reffed (export "export-reffed") (param $0 i32) (param $1 f64) (result f32) + ;; Use the GC types. + (f32.const 99.123) + ) +) diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index d96bd7b27cd..aa475eb01ee 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -5,33 +5,32 @@ from . import utils -class InitialFuzzTest(utils.BinaryenTestCase): - def test_empty_initial(self): - # generate fuzz from random data - data = self.input_path('random_data.txt') - a = shared.run_process(shared.WASM_OPT + ['-ttf', '--print', data], - stdout=subprocess.PIPE).stdout - - # generate fuzz from random data with initial empty wasm - empty_wasm = self.input_path('empty.wasm') - b = shared.run_process( - shared.WASM_OPT + ['-ttf', '--print', data, - '--initial-fuzz=' + empty_wasm], - stdout=subprocess.PIPE).stdout - - # an empty initial wasm causes no changes - self.assertEqual(a, b) - - def test_small_initial(self): - data = self.input_path('random_data.txt') - hello_wat = self.input_path('hello_world.wat') - out = shared.run_process(shared.WASM_OPT + ['-ttf', '--print', data, - '--initial-fuzz=' + hello_wat], - stdout=subprocess.PIPE).stdout - - # the function should be there (perhaps with modified contents - don't - # check that) - self.assertIn('(export "add" (func $add))', out) - - # there should be other fuzz contents added as well - self.assertGreater(out.count('(export '), 1) +class PreserveFuzzTest(utils.BinaryenTestCase): + def test_against_js(self): + # When --fuzz-against-js is used, the wasm is only going to be fuzzed + # against JS, so the fuzzer mutates the boundary in valid ways, even if + # --fuzz-preserve-imports-exports is set. + # + # Testing this deterministically is too hard (as the fuzzer evolves, it + # will handle random data differently, and the test would constantly get + # out of date). Instead, test randomly, in a way that the chance of a + # flake is unrealistic. + size = 10 * 1024 + iters = 1000 + temp_dat = tempfile.NamedTemporaryFile(suffix='.dat') + initial = self.input_path('fuzz.wat') + + for _ in range(iters): + # Generate raw random data + with open(temp_dat.name, 'wb') as f: + f.write(bytes([random.randint(0, 255) for x in range(size)])) + + # Generate the fuzz testcase from the random data + the initial + # contents. + args = ['-ttf', temp_dat.name, '--initial-fuzz=' + initial] + args += ['--fuzz-preserve-imports-exports', '--fuzz-against-js'] + args += ['--print'] + wat = shared.run_process(shared.WASM_OPT + args, + stdout=subprocess.PIPE).stdout + + From 255250d33fff6ddaa7483ab6d63292b1a9803f97 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 27 Apr 2026 12:48:12 -0700 Subject: [PATCH 13/47] work --- test/unit/input/fuzz.wat | 41 +++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/test/unit/input/fuzz.wat b/test/unit/input/fuzz.wat index 4f18f02f4dd..1afc63a46dc 100644 --- a/test/unit/input/fuzz.wat +++ b/test/unit/input/fuzz.wat @@ -1,22 +1,49 @@ (module + ;; Two structs, A and B, each of which has a subtype. + (rec + (type $A (sub (struct))) + (type $A2 (sub $A (struct))) + + (type $B (sub (struct))) + (type $B2 (sub $B(struct))) + ) + ;; Two imports, one which will be reffed. - (import "module" "base" (func $import (param i32 f64) (result f32))) - (import "module" "base" (func $import-reffed (param i32 f64) (result f32))) + (import "module" "base" (func $import (param i32 anyref) (result eqref))) + (import "module" "base" (func $import-reffed (param i32 anyref) (result eqref))) ;; Two exports, one which will be reffed. - (func $export (export "export") (param $0 i32) (param $1 f64) (result f32) + (func $export (export "export") (param $0 i32) (param $1 anyref) (result eqref) + ;; Add the refs. (drop (ref.func $import-reffed) ) (drop (ref.func $export-reffed) ) - (f32.const 3.14159) + + ;; Call the imports. + (drop + (call $import + (i32.const 10) + ;; Send $A. We can refine the anyref to $A or $A2 (but not $B or $B2). + (struct.new $A) + ) + ) + (drop + (call $import-reffed + (i32.const 20) + ;; Send $A. We can refine the anyref to $A or $A2 (but not $B or $B2). + (struct.new $A) + ) + ) + + ;; Return $B. We can refine the eqref to $B or $B2 (but not $A or $A2). + (struct.new $B) ) - (func $export-reffed (export "export-reffed") (param $0 i32) (param $1 f64) (result f32) - ;; Use the GC types. - (f32.const 99.123) + (func $export-reffed (export "export-reffed") (param $0 i32) (param $1 anyref) (result eqref) + (struct.new $A) ) ) From 169f4c85df04b7860d2e82cfa6532fbb7a3ed1ee Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 27 Apr 2026 12:58:18 -0700 Subject: [PATCH 14/47] work --- test/unit/input/fuzz.wat | 1 - test/unit/test_fuzz_preserve.py | 37 ++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/test/unit/input/fuzz.wat b/test/unit/input/fuzz.wat index 1afc63a46dc..147589e8723 100644 --- a/test/unit/input/fuzz.wat +++ b/test/unit/input/fuzz.wat @@ -34,7 +34,6 @@ (drop (call $import-reffed (i32.const 20) - ;; Send $A. We can refine the anyref to $A or $A2 (but not $B or $B2). (struct.new $A) ) ) diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index aa475eb01ee..ecbd24db045 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -20,6 +20,11 @@ def test_against_js(self): temp_dat = tempfile.NamedTemporaryFile(suffix='.dat') initial = self.input_path('fuzz.wat') + # The set of all params we see, for the import that is refinable. Ditto + # for export results. + import_params = Set() + export_results = Set() + for _ in range(iters): # Generate raw random data with open(temp_dat.name, 'wb') as f: @@ -33,4 +38,34 @@ def test_against_js(self): wat = shared.run_process(shared.WASM_OPT + args, stdout=subprocess.PIPE).stdout - + # Find the params/results that might be refined. + for line in wat.splitlines(): + if line.startswith(' (import "module" "base" (func $import '): + params, results = parse_params_results(line) + import_params.insert(params) + assert results == 'eqref', 'cannot refine import result' + elif line.startswith(' (import "module" "base" (func $import-reffed '): + params, results = parse_params_results(line) + assert params == 'i32 anyref', 'cannot refine reffed stuff' + assert results == 'eqref', 'cannot refine import result' + if line.startswith(' (func $export '): + params, results = parse_params_results(line) + assert params == '(param $0 i32) (param $1 anyref)', 'cannot refine export params' + export_results.insert(results) + if line.startswith(' (func $export-reffed '): + params, results = parse_params_results(line) + assert params == '(param $0 i32) (param $1 anyref)', 'cannot refine export params' + assert results == 'eqref', 'cannot refine reffed stuff' + + # We looked at 1000 cases, and we should be refining half the time, so + # we must see more than one refinement, unless we are so lucky we'd win + # the lottery a thousand times and more. + print(f'import_params: {import_params}') + assert len(import_params) >= 2 + print(f'export_results: {export_results}') + assert len(export_results) >= 2 + + def parse_params_results(self, line): + # Given a line with wat params and results, parse and return them. + + From 2fe14e0df5d4ef3467f508d4fb92c6d83ab144de Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 27 Apr 2026 13:07:29 -0700 Subject: [PATCH 15/47] work --- test/unit/test_fuzz_preserve.py | 42 +++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index ecbd24db045..a87f3655871 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -1,4 +1,6 @@ +import random import subprocess +import tempfile from scripts.test import shared @@ -22,8 +24,8 @@ def test_against_js(self): # The set of all params we see, for the import that is refinable. Ditto # for export results. - import_params = Set() - export_results = Set() + import_params = set() + export_results = set() for _ in range(iters): # Generate raw random data @@ -32,7 +34,7 @@ def test_against_js(self): # Generate the fuzz testcase from the random data + the initial # contents. - args = ['-ttf', temp_dat.name, '--initial-fuzz=' + initial] + args = ['-ttf', temp_dat.name, '--initial-fuzz=' + initial, '-all'] args += ['--fuzz-preserve-imports-exports', '--fuzz-against-js'] args += ['--print'] wat = shared.run_process(shared.WASM_OPT + args, @@ -65,7 +67,39 @@ def test_against_js(self): print(f'export_results: {export_results}') assert len(export_results) >= 2 + # Given a line with wat params and results, parse and return them. def parse_params_results(self, line): - # Given a line with wat params and results, parse and return them. + # Find either params or results. + def get(what, line): + ret = '' + pos = 0 + while True: + # Find the thing we are looking for. + start = line.find(what, pos) + if start < 0: + break + + # Find the end paren. + parens = 1 + end = start + 1 + while parens > 0: + if line[end] == '(': + parens += 1 + elif line[end] == ')': + parens += 1 + end += 1 + + # Add (separated by a space). + if ret: + ret += ' ' + ret += line[start:end] + + # Keep looking. + start = end + + print('find', what, line, ' ======>>>>> ', ret) + return ret + + return get('(param', line), get('(result', line) From 300c953cbec70778bb0f473cbb004ee6965a97b6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 27 Apr 2026 15:43:53 -0700 Subject: [PATCH 16/47] work --- src/tools/fuzzing/fuzzing.cpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 1a8068fab2c..3b15389b1ac 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2479,9 +2479,12 @@ void TranslateToFuzzReader::mutateJSBoundary() { auto oldExactness = old.getExactness(); auto newExactness = new_.getExactness(); if (newExactness != oldExactness) { - // TODO: once getExactness is fixed, use + // TODO: once getExactness() is fixed (see there), use that newExactness = oneIn(2) ? Exact : Inexact; } + if (newHeapType.isBasic()) { + newExactness = Inexact; + } return Type(newHeapType, newNullability, newExactness); }; @@ -2551,6 +2554,26 @@ void TranslateToFuzzReader::mutateJSBoundary() { } func->setResults(Type(newResults)); } + + // Update return types from calls to exports whose results we refined. + struct CallUpdater : public WalkerPass> { + bool isFunctionParallel() override { return true; } + + std::unique_ptr create() override { + return std::make_unique(); + } + + void visitCall(Call* curr) { + if (curr->type != Type::unreachable) { + curr->type = getModule()->getFunction(curr->target)->getResults(); + } + } + } updater; + updater.setModule(&wasm); + updater.run(&runner, &wasm); + + // Propagate after our changes. + ReFinalize().run(&runner, &wasm); } void TranslateToFuzzReader::dropToLog(Function* func) { From 39f57ee9f40aac606160311bfdbca7f3bfd0061e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 27 Apr 2026 15:48:54 -0700 Subject: [PATCH 17/47] work --- test/unit/test_fuzz_preserve.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index a87f3655871..8686faa8f6c 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -28,6 +28,8 @@ def test_against_js(self): export_results = set() for _ in range(iters): + print('.', end='') + # Generate raw random data with open(temp_dat.name, 'wb') as f: f.write(bytes([random.randint(0, 255) for x in range(size)])) @@ -43,21 +45,21 @@ def test_against_js(self): # Find the params/results that might be refined. for line in wat.splitlines(): if line.startswith(' (import "module" "base" (func $import '): - params, results = parse_params_results(line) - import_params.insert(params) - assert results == 'eqref', 'cannot refine import result' + params, results = self.parse_params_results(line) + import_params.add(params) + assert results == '(result eqref)', 'cannot refine import result' elif line.startswith(' (import "module" "base" (func $import-reffed '): - params, results = parse_params_results(line) - assert params == 'i32 anyref', 'cannot refine reffed stuff' - assert results == 'eqref', 'cannot refine import result' + params, results = self.parse_params_results(line) + assert params == '(param i32 anyref)', 'cannot refine reffed stuff' + assert results == '(result eqref)', 'cannot refine import result' if line.startswith(' (func $export '): - params, results = parse_params_results(line) + params, results = self.parse_params_results(line) assert params == '(param $0 i32) (param $1 anyref)', 'cannot refine export params' - export_results.insert(results) + export_results.add(results) if line.startswith(' (func $export-reffed '): - params, results = parse_params_results(line) + params, results = self.parse_params_results(line) assert params == '(param $0 i32) (param $1 anyref)', 'cannot refine export params' - assert results == 'eqref', 'cannot refine reffed stuff' + assert results == '(result eqref)', 'cannot refine reffed stuff' # We looked at 1000 cases, and we should be refining half the time, so # we must see more than one refinement, unless we are so lucky we'd win @@ -87,7 +89,7 @@ def get(what, line): if line[end] == '(': parens += 1 elif line[end] == ')': - parens += 1 + parens -= 1 end += 1 # Add (separated by a space). @@ -96,7 +98,7 @@ def get(what, line): ret += line[start:end] # Keep looking. - start = end + pos = end print('find', what, line, ' ======>>>>> ', ret) return ret From cffacbcbb250ed70732c4a3b15dc3e7b0c164d34 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 27 Apr 2026 15:51:52 -0700 Subject: [PATCH 18/47] work --- test/unit/test_fuzz_preserve.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index 8686faa8f6c..387105a338b 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -27,8 +27,8 @@ def test_against_js(self): import_params = set() export_results = set() - for _ in range(iters): - print('.', end='') + for i in range(iters): + print(i) # Generate raw random data with open(temp_dat.name, 'wb') as f: @@ -42,6 +42,11 @@ def test_against_js(self): wat = shared.run_process(shared.WASM_OPT + args, stdout=subprocess.PIPE).stdout + # The things that begin reffed might end up not reffed, if mutation + # removes the refs. Check for that. + import_reffed_is_reffed = '(ref.func $import-reffed)' in wat + export_reffed_is_reffed = '(ref.func $export-reffed)' in wat + # Find the params/results that might be refined. for line in wat.splitlines(): if line.startswith(' (import "module" "base" (func $import '): @@ -50,7 +55,8 @@ def test_against_js(self): assert results == '(result eqref)', 'cannot refine import result' elif line.startswith(' (import "module" "base" (func $import-reffed '): params, results = self.parse_params_results(line) - assert params == '(param i32 anyref)', 'cannot refine reffed stuff' + if import_reffed_is_reffed: + assert params == '(param i32 anyref)', 'cannot refine reffed stuff' assert results == '(result eqref)', 'cannot refine import result' if line.startswith(' (func $export '): params, results = self.parse_params_results(line) @@ -59,7 +65,8 @@ def test_against_js(self): if line.startswith(' (func $export-reffed '): params, results = self.parse_params_results(line) assert params == '(param $0 i32) (param $1 anyref)', 'cannot refine export params' - assert results == '(result eqref)', 'cannot refine reffed stuff' + if export_reffed_is_reffed: + assert results == '(result eqref)', 'cannot refine reffed stuff' # We looked at 1000 cases, and we should be refining half the time, so # we must see more than one refinement, unless we are so lucky we'd win From 41cd320f55d79dc097b7aed9248da6372861b5e1 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 27 Apr 2026 15:56:18 -0700 Subject: [PATCH 19/47] work --- src/tools/fuzzing/fuzzing.cpp | 2 +- test/unit/test_fuzz_preserve.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 3b15389b1ac..e19ecd64194 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2413,7 +2413,7 @@ void TranslateToFuzzReader::mutateJSBoundary() { struct FunctionInfoScanner : public WalkerPass> { - bool isFunctionParallel() override { return true; } + // Not parallel for simplicity, see the map update below. bool modifiesBinaryenIR() override { return false; } diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index 387105a338b..946aafc9bde 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -34,6 +34,9 @@ def test_against_js(self): with open(temp_dat.name, 'wb') as f: f.write(bytes([random.randint(0, 255) for x in range(size)])) + import shutil + shutil.copyfile(temp_dat.name, '/tmp/waka') + # Generate the fuzz testcase from the random data + the initial # contents. args = ['-ttf', temp_dat.name, '--initial-fuzz=' + initial, '-all'] From 063b415f54e403ca163ebc29bd88ecb6bb579cd3 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 27 Apr 2026 16:28:31 -0700 Subject: [PATCH 20/47] work --- src/tools/fuzzing/fuzzing.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index e19ecd64194..e5ea7810647 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2486,6 +2486,9 @@ void TranslateToFuzzReader::mutateJSBoundary() { newExactness = Inexact; } +//std::cout << "old: " << oldHeapType << " : " << oldNullability << " : " << oldExactness << '\n'; +//std::cout << "new: " << newHeapType << " : " << newNullability << " : " << newExactness << '\n'; + return Type(newHeapType, newNullability, newExactness); }; @@ -2494,9 +2497,16 @@ void TranslateToFuzzReader::mutateJSBoundary() { std::unordered_map paramLUBs; for (auto& [_, info] : map) { for (auto* call : info.callImports) { + auto declaredParams = wasm.getFunction(call->target)->getParams(); std::vector sent; - for (auto* operand : call->operands) { - sent.push_back(operand->type); + for (Index i = 0; i < call->operands.size(); i++) { + auto type = call->operands[i]->type; + if (type == Type::unreachable) { + // Nothing sent here, so use the declared type - what we refine to + // must still validate even though this call is unreachable. + type = declaredParams[i]; + } + sent.push_back(type); } paramLUBs[call->target].note(Type(sent)); } From 12d35ef250466b52395719a366928220820d3c8b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 27 Apr 2026 16:38:06 -0700 Subject: [PATCH 21/47] work --- src/tools/fuzzing/fuzzing.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index e5ea7810647..40ab106dde8 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2459,12 +2459,18 @@ void TranslateToFuzzReader::mutateJSBoundary() { assert(HeapType::isSubType(newHeapType, oldHeapType)); std::vector options; options.push_back(oldHeapType); - while (newHeapType != oldHeapType) { + // We continue until we reach the old type. Note we cannot do that if + // newHeapType is null, because it has more than one super, and getSuperType + // does not work. TODO: handle all possible supers. + if (newHeapType.isNull()) { options.push_back(newHeapType); - // We continue until we reach the old type. - auto next = newHeapType.getSuperType(); - assert(next); - newHeapType = *next; + } else { + while (newHeapType != oldHeapType) { + options.push_back(newHeapType); + auto next = newHeapType.getSuperType(); + assert(next); + newHeapType = *next; + } } newHeapType = pick(options); From e5d4ea5e7e04c348d7fa14189b543df105a31d3b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 27 Apr 2026 16:41:58 -0700 Subject: [PATCH 22/47] work --- src/tools/fuzzing/fuzzing.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 40ab106dde8..a4971daddeb 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2462,7 +2462,7 @@ void TranslateToFuzzReader::mutateJSBoundary() { // We continue until we reach the old type. Note we cannot do that if // newHeapType is null, because it has more than one super, and getSuperType // does not work. TODO: handle all possible supers. - if (newHeapType.isNull()) { + if (newHeapType.isBottom()) { options.push_back(newHeapType); } else { while (newHeapType != oldHeapType) { From d968315234318744ff702bd74c5fc400f2bd70d1 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Mon, 27 Apr 2026 16:46:14 -0700 Subject: [PATCH 23/47] work --- test/unit/test_fuzz_preserve.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index 946aafc9bde..a30877ea892 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -18,7 +18,7 @@ def test_against_js(self): # out of date). Instead, test randomly, in a way that the chance of a # flake is unrealistic. size = 10 * 1024 - iters = 1000 + iters = 100 temp_dat = tempfile.NamedTemporaryFile(suffix='.dat') initial = self.input_path('fuzz.wat') @@ -28,7 +28,7 @@ def test_against_js(self): export_results = set() for i in range(iters): - print(i) + print(f"\r{i}/{iters}...", end='', flush=True) # Generate raw random data with open(temp_dat.name, 'wb') as f: @@ -79,6 +79,9 @@ def test_against_js(self): print(f'export_results: {export_results}') assert len(export_results) >= 2 + # We should see struct types + # We should see exactness + # Given a line with wat params and results, parse and return them. def parse_params_results(self, line): # Find either params or results. @@ -110,7 +113,7 @@ def get(what, line): # Keep looking. pos = end - print('find', what, line, ' ======>>>>> ', ret) + # print('find', what, line, ' ======>>>>> ', ret) return ret return get('(param', line), get('(result', line) From 45ce1dce2456770e0ce89c3e1efa095287ab80f0 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 28 Apr 2026 09:54:07 -0700 Subject: [PATCH 24/47] work --- src/tools/fuzzing/fuzzing.cpp | 34 +++++++++++------ test/unit/test_fuzz_preserve.py | 67 ++++++++++++++++++++++++++------- 2 files changed, 75 insertions(+), 26 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index a4971daddeb..bb8c449be65 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2447,31 +2447,38 @@ void TranslateToFuzzReader::mutateJSBoundary() { // refine, we are given the maximum refinement and pick a random type between // it and the old type. auto maybeRefine = [&](Type old, Type new_) { + std::cout << "maybe " << old << " to " << new_ << '\n'; if (!new_.isRef()) { // A non-reference like i32, or unreachable (no values reach this place), // so it does not matter. return old; } - // Pick the heap type. + // Find all heap types between the old and new, starting from new. auto oldHeapType = old.getHeapType(); auto newHeapType = new_.getHeapType(); assert(HeapType::isSubType(newHeapType, oldHeapType)); std::vector options; - options.push_back(oldHeapType); - // We continue until we reach the old type. Note we cannot do that if - // newHeapType is null, because it has more than one super, and getSuperType - // does not work. TODO: handle all possible supers. - if (newHeapType.isBottom()) { + while (1) { options.push_back(newHeapType); - } else { - while (newHeapType != oldHeapType) { - options.push_back(newHeapType); - auto next = newHeapType.getSuperType(); - assert(next); - newHeapType = *next; +std::cout << " happy push " << newHeapType << '\n'; + // We cannot look at a bottom type's supers (there can be many, and the + // getSuperType() API doesn't return them). + // TODO: handle all possible supers. + if (newHeapType.isBottom()) { +std::cout << " sad push " << oldHeapType << '\n'; + options.push_back(oldHeapType); + break; + } + // Continue until we reach the old type. + if (newHeapType == oldHeapType) { + break; } + auto next = newHeapType.getSuperType(); + assert(next); + newHeapType = *next; } +std::cout << "opts: " << options.size() << '\n'; newHeapType = pick(options); // Pick the nullability. @@ -2494,6 +2501,7 @@ void TranslateToFuzzReader::mutateJSBoundary() { //std::cout << "old: " << oldHeapType << " : " << oldNullability << " : " << oldExactness << '\n'; //std::cout << "new: " << newHeapType << " : " << newNullability << " : " << newExactness << '\n'; + std::cout << " => " << Type(newHeapType, newNullability, newExactness) << '\n'; return Type(newHeapType, newNullability, newExactness); }; @@ -2545,6 +2553,7 @@ void TranslateToFuzzReader::mutateJSBoundary() { // Second, refine results sent from exports. for (auto& exp : wasm.exports) { + std::cout << "exp " << exp->name << '\n'; if (exp->kind != ExternalKind::Function) { continue; } @@ -2552,6 +2561,7 @@ void TranslateToFuzzReader::mutateJSBoundary() { if (map[name].reffed) { continue; } + std::cout << " unreffed exp " << name << '\n'; // Find the LUB. auto* func = wasm.getFunction(name); diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index a30877ea892..89dfc391da6 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -1,6 +1,7 @@ import random import subprocess import tempfile +import time from scripts.test import shared @@ -17,8 +18,7 @@ def test_against_js(self): # will handle random data differently, and the test would constantly get # out of date). Instead, test randomly, in a way that the chance of a # flake is unrealistic. - size = 10 * 1024 - iters = 100 + max_size = 1024 temp_dat = tempfile.NamedTemporaryFile(suffix='.dat') initial = self.input_path('fuzz.wat') @@ -27,12 +27,34 @@ def test_against_js(self): import_params = set() export_results = set() - for i in range(iters): - print(f"\r{i}/{iters}...", end='', flush=True) + # Run for at least a certain number of iterations, but keep going after + # if we still failed to find what we want, to avoid flakes. We keep + # going until a full minute. + min_iters = 100 + start_time = time.time() + max_time = start_time + 60 + + i = 0 + while True: + i += 1 + + # We want to see some variety in both, but don't want to see + # everything we expect in both (as one might be slower than the + # other). + if self.is_varied(import_params) and self.is_varied(export_results) and \ + self.found_expected(import_params | export_results): + print(f"{i} iterations {round(time.time() - start_time, 2)} seconds)") + print(f'proper import_params : {import_params}') + print(f'proper export_results: {export_results}') + return + + if i > min_iters and time.time() > max_time: + raise Exception('looked too long and still failed') # Generate raw random data + size = random.randint(1, max_size) with open(temp_dat.name, 'wb') as f: - f.write(bytes([random.randint(0, 255) for x in range(size)])) + f.write(bytes([random.randint(0, 255) for x in range(max_size)])) import shutil shutil.copyfile(temp_dat.name, '/tmp/waka') @@ -71,16 +93,33 @@ def test_against_js(self): if export_reffed_is_reffed: assert results == '(result eqref)', 'cannot refine reffed stuff' - # We looked at 1000 cases, and we should be refining half the time, so - # we must see more than one refinement, unless we are so lucky we'd win - # the lottery a thousand times and more. - print(f'import_params: {import_params}') - assert len(import_params) >= 2 - print(f'export_results: {export_results}') - assert len(export_results) >= 2 + # Given the types we saw for params or results, see if it has some + # variety at all. Without fuzzing, we'd always see the same thing here. + def is_varied(self, data): + return len(data) >= 2 - # We should see struct types - # We should see exactness + # Given the types we saw for params or results, look in detail for the + # things we expect to see. + def found_expected(self, data): + # Look for significant variety, more than is_varied. + if len(data) < 5: + return False + + string = str(data) + + # There must be non-nullable types. + if '(ref (' not in string: + return False + + # There must be exact types. + if '(exact ' not in string: + return False + + # There must be defined types. + if ' $' not in string: + return False + + return True # Given a line with wat params and results, parse and return them. def parse_params_results(self, line): From c1fccda3eb038f397f31b1e61a2dc9ecb17436eb Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 28 Apr 2026 09:56:37 -0700 Subject: [PATCH 25/47] work --- test/unit/test_fuzz_preserve.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index 89dfc391da6..ce86ea34459 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -27,10 +27,9 @@ def test_against_js(self): import_params = set() export_results = set() - # Run for at least a certain number of iterations, but keep going after - # if we still failed to find what we want, to avoid flakes. We keep - # going until a full minute. - min_iters = 100 + # Run until we find what we want. Stop only if we reached a max number + # of iterations and a timeout. + min_iters = 200 start_time = time.time() max_time = start_time + 60 @@ -119,6 +118,10 @@ def found_expected(self, data): if ' $' not in string: return False + # There must be defined non-exact types. + if '(ref $' not in string: + return False + return True # Given a line with wat params and results, parse and return them. From a7b5bfdcc18da85cddfd8b574b08d61d91355c4b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 28 Apr 2026 10:40:45 -0700 Subject: [PATCH 26/47] work --- src/tools/fuzzing/fuzzing.cpp | 10 ---------- src/tools/fuzzing/random.cpp | 3 --- test/unit/test_fuzz_preserve.py | 20 +++++++------------- 3 files changed, 7 insertions(+), 26 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index bb8c449be65..22166c341c3 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2447,7 +2447,6 @@ void TranslateToFuzzReader::mutateJSBoundary() { // refine, we are given the maximum refinement and pick a random type between // it and the old type. auto maybeRefine = [&](Type old, Type new_) { - std::cout << "maybe " << old << " to " << new_ << '\n'; if (!new_.isRef()) { // A non-reference like i32, or unreachable (no values reach this place), // so it does not matter. @@ -2461,12 +2460,10 @@ void TranslateToFuzzReader::mutateJSBoundary() { std::vector options; while (1) { options.push_back(newHeapType); -std::cout << " happy push " << newHeapType << '\n'; // We cannot look at a bottom type's supers (there can be many, and the // getSuperType() API doesn't return them). // TODO: handle all possible supers. if (newHeapType.isBottom()) { -std::cout << " sad push " << oldHeapType << '\n'; options.push_back(oldHeapType); break; } @@ -2478,7 +2475,6 @@ std::cout << " sad push " << oldHeapType << '\n'; assert(next); newHeapType = *next; } -std::cout << "opts: " << options.size() << '\n'; newHeapType = pick(options); // Pick the nullability. @@ -2499,10 +2495,6 @@ std::cout << "opts: " << options.size() << '\n'; newExactness = Inexact; } -//std::cout << "old: " << oldHeapType << " : " << oldNullability << " : " << oldExactness << '\n'; -//std::cout << "new: " << newHeapType << " : " << newNullability << " : " << newExactness << '\n'; - std::cout << " => " << Type(newHeapType, newNullability, newExactness) << '\n'; - return Type(newHeapType, newNullability, newExactness); }; @@ -2553,7 +2545,6 @@ std::cout << "opts: " << options.size() << '\n'; // Second, refine results sent from exports. for (auto& exp : wasm.exports) { - std::cout << "exp " << exp->name << '\n'; if (exp->kind != ExternalKind::Function) { continue; } @@ -2561,7 +2552,6 @@ std::cout << "opts: " << options.size() << '\n'; if (map[name].reffed) { continue; } - std::cout << " unreffed exp " << name << '\n'; // Find the LUB. auto* func = wasm.getFunction(name); diff --git a/src/tools/fuzzing/random.cpp b/src/tools/fuzzing/random.cpp index cfcdbdd970e..7af7b412775 100644 --- a/src/tools/fuzzing/random.cpp +++ b/src/tools/fuzzing/random.cpp @@ -66,9 +66,6 @@ float Random::getFloat() { return Literal(get32()).reinterpretf32(); } double Random::getDouble() { return Literal(get64()).reinterpretf64(); } uint32_t Random::upTo(uint32_t x) { - if (finished()) { - return 0; - } if (x == 0) { return 0; } diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index ce86ea34459..51e0299a8a3 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -40,8 +40,7 @@ def test_against_js(self): # We want to see some variety in both, but don't want to see # everything we expect in both (as one might be slower than the # other). - if self.is_varied(import_params) and self.is_varied(export_results) and \ - self.found_expected(import_params | export_results): + if self.found_expected(import_params) and self.found_expected(export_results): print(f"{i} iterations {round(time.time() - start_time, 2)} seconds)") print(f'proper import_params : {import_params}') print(f'proper export_results: {export_results}') @@ -92,20 +91,19 @@ def test_against_js(self): if export_reffed_is_reffed: assert results == '(result eqref)', 'cannot refine reffed stuff' - # Given the types we saw for params or results, see if it has some - # variety at all. Without fuzzing, we'd always see the same thing here. - def is_varied(self, data): - return len(data) >= 2 - # Given the types we saw for params or results, look in detail for the # things we expect to see. def found_expected(self, data): - # Look for significant variety, more than is_varied. + # Look for significant variety. if len(data) < 5: return False string = str(data) + # There must be nullable types. + if '(ref null' not in string: + return False + # There must be non-nullable types. if '(ref (' not in string: return False @@ -114,11 +112,7 @@ def found_expected(self, data): if '(exact ' not in string: return False - # There must be defined types. - if ' $' not in string: - return False - - # There must be defined non-exact types. + # There must be inexact types (this also tests defined types). if '(ref $' not in string: return False From 35c498f410e3fed71c890de33088c8c5e64cf35b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 28 Apr 2026 11:05:54 -0700 Subject: [PATCH 27/47] work --- test/unit/test_fuzz_preserve.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index 51e0299a8a3..78fae85a45f 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -100,6 +100,10 @@ def found_expected(self, data): string = str(data) + # Each of the following has a 50% chance to get emitted each time, so + # over many iterations, the chance of failing to find them goes + # exponentially to nothing. + # There must be nullable types. if '(ref null' not in string: return False From 98e7b09934275db65da2cdd7c408b96be8a4159c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 28 Apr 2026 13:17:23 -0700 Subject: [PATCH 28/47] clean --- test/lit/fuzz-preserve-imports-exports.wast | 2 -- test/unit/test_fuzz_preserve.py | 16 ++++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/test/lit/fuzz-preserve-imports-exports.wast b/test/lit/fuzz-preserve-imports-exports.wast index d9867ff4710..e8cde8dadbe 100644 --- a/test/lit/fuzz-preserve-imports-exports.wast +++ b/test/lit/fuzz-preserve-imports-exports.wast @@ -57,5 +57,3 @@ ) ) - -how to test new flagg? diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index 78fae85a45f..3aedd51e925 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -16,8 +16,8 @@ def test_against_js(self): # # Testing this deterministically is too hard (as the fuzzer evolves, it # will handle random data differently, and the test would constantly get - # out of date). Instead, test randomly, in a way that the chance of a - # flake is unrealistic. + # out of date). Instead, test randomly, but in a way that the chance of + # a flake is unrealistic. max_size = 1024 temp_dat = tempfile.NamedTemporaryFile(suffix='.dat') initial = self.input_path('fuzz.wat') @@ -31,6 +31,8 @@ def test_against_js(self): # of iterations and a timeout. min_iters = 200 start_time = time.time() + # Locally this succeeds in less than 1 second. Give it a very wide + # margin of error to avoid flakes. max_time = start_time + 60 i = 0 @@ -109,14 +111,20 @@ def found_expected(self, data): return False # There must be non-nullable types. - if '(ref (' not in string: + if '(ref (' not in string and '(ref $' not in string: + return False + + string = string.replace('null ', '') + + # There must be defined types. + if ' $' not in string: return False # There must be exact types. if '(exact ' not in string: return False - # There must be inexact types (this also tests defined types). + # There must be inexact types. if '(ref $' not in string: return False From 26d3f3ee29f2569a7ca07272239c0ba35cfa7ba1 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 28 Apr 2026 13:17:53 -0700 Subject: [PATCH 29/47] go --- scripts/fuzz_opt.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index 69d51259e92..4436a965a79 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -2161,6 +2161,7 @@ def do_handle_pair(self, input, before_wasm, after_wasm, opts): input, '-ttf', '--fuzz-preserve-imports-exports', + '--fuzz-against-js', '--initial-fuzz=' + wat_file, '-o', pre_wasm, '-g', From 90c18afd1c389bf0f43c6ada305bc2dd7e7c6a4a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 28 Apr 2026 14:43:52 -0700 Subject: [PATCH 30/47] cleanup --- src/tools/fuzzing/fuzzing.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 22166c341c3..5a33d473ae6 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -415,7 +415,6 @@ void TranslateToFuzzReader::build() { ReFinalize().run(&runner, &wasm); ReFinalize().walkModuleCode(&wasm); - // If fuzzing against JS, we can refine if (againstJS) { mutateJSBoundary(); } @@ -2403,7 +2402,7 @@ void TranslateToFuzzReader::mutateJSBoundary() { struct FunctionInfo { // Whether there are references to this function itself. - std::atomic reffed = false; + bool reffed = false; // Calls to imports from this function. std::vector callImports; From f3d11a6594019e8abd6a14185f5ec1077bc75854 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 28 Apr 2026 14:48:40 -0700 Subject: [PATCH 31/47] cleanup --- test/unit/test_fuzz_preserve.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index 3aedd51e925..72d0376ecb1 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -39,9 +39,6 @@ def test_against_js(self): while True: i += 1 - # We want to see some variety in both, but don't want to see - # everything we expect in both (as one might be slower than the - # other). if self.found_expected(import_params) and self.found_expected(export_results): print(f"{i} iterations {round(time.time() - start_time, 2)} seconds)") print(f'proper import_params : {import_params}') @@ -56,9 +53,6 @@ def test_against_js(self): with open(temp_dat.name, 'wb') as f: f.write(bytes([random.randint(0, 255) for x in range(max_size)])) - import shutil - shutil.copyfile(temp_dat.name, '/tmp/waka') - # Generate the fuzz testcase from the random data + the initial # contents. args = ['-ttf', temp_dat.name, '--initial-fuzz=' + initial, '-all'] @@ -161,7 +155,6 @@ def get(what, line): # Keep looking. pos = end - # print('find', what, line, ' ======>>>>> ', ret) return ret return get('(param', line), get('(result', line) From 44fe786f7cb178ab8d3e0ad9a22550d1ad9eed0e Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 28 Apr 2026 15:00:36 -0700 Subject: [PATCH 32/47] fix --- src/tools/fuzzing/fuzzing.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 5a33d473ae6..548f6e1a818 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2524,6 +2524,11 @@ void TranslateToFuzzReader::mutateJSBoundary() { if (map[func->name].reffed) { continue; } + // Do not alter the signature of configureAll, which will make the VM + // reject it immediately. + if (intrinsics.isConfigureAll(func.get()) { + continue; + } // Find the LUB, which is the most we can refine. auto lub = paramLUBs[func->name]; From 79e65d7924077fe81e5046fd4af1b50f67bac96f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 28 Apr 2026 15:01:16 -0700 Subject: [PATCH 33/47] fix --- src/tools/fuzzing/fuzzing.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 548f6e1a818..9c462c1a61a 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2526,7 +2526,7 @@ void TranslateToFuzzReader::mutateJSBoundary() { } // Do not alter the signature of configureAll, which will make the VM // reject it immediately. - if (intrinsics.isConfigureAll(func.get()) { + if (intrinsics.isConfigureAll(func.get())) { continue; } From 98bd0ac160e9d0ed0b70260f24a90224f89669d5 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 28 Apr 2026 15:12:32 -0700 Subject: [PATCH 34/47] go --- src/tools/fuzzing/fuzzing.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 9c462c1a61a..41a9ea622ee 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2524,9 +2524,10 @@ void TranslateToFuzzReader::mutateJSBoundary() { if (map[func->name].reffed) { continue; } - // Do not alter the signature of configureAll, which will make the VM - // reject it immediately. - if (intrinsics.isConfigureAll(func.get())) { + // Do not alter the signature of configureAll or other VM builtins. Changing + // these to something the VM does not expect will just cause it to + // immediately reject the module by trapping. + if (func->module.startsWith("wasm:")) { continue; } From 4179d3149b1a933df96d16bf2f0d91f6d63eb689 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 28 Apr 2026 15:37:47 -0700 Subject: [PATCH 35/47] fix lint --- test/unit/test_fuzz_preserve.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index 72d0376ecb1..953c99e98ee 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -51,7 +51,7 @@ def test_against_js(self): # Generate raw random data size = random.randint(1, max_size) with open(temp_dat.name, 'wb') as f: - f.write(bytes([random.randint(0, 255) for x in range(max_size)])) + f.write(bytes([random.randint(0, 255) for x in range(size)])) # Generate the fuzz testcase from the random data + the initial # contents. @@ -158,4 +158,3 @@ def get(what, line): return ret return get('(param', line), get('(result', line) - From 2e2d0289decde41d529ee6579e071954558c50c7 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 28 Apr 2026 16:37:16 -0700 Subject: [PATCH 36/47] lint --- test/unit/test_fuzz_preserve.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index 953c99e98ee..bfc9b77f8ba 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -90,6 +90,9 @@ def test_against_js(self): # Given the types we saw for params or results, look in detail for the # things we expect to see. def found_expected(self, data): + # The many returns here seem to be the best way to write this code. + # ruff: noqa: PLR0911 + # Look for significant variety. if len(data) < 5: return False From 1f7e875958495f3d0e49bcba1623206d173388ea Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 29 Apr 2026 08:34:15 -0700 Subject: [PATCH 37/47] fix --- src/tools/fuzzing/fuzzing.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 41a9ea622ee..d56e9549704 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2490,7 +2490,13 @@ void TranslateToFuzzReader::mutateJSBoundary() { // TODO: once getExactness() is fixed (see there), use that newExactness = oneIn(2) ? Exact : Inexact; } - if (newHeapType.isBasic()) { + // We can only be exact if we are using the new heap type: that type is + // exactly what is sent here, and no intermediate heap type would be valid. + // For example, given $A :> $B :> $C, then maybeRefine($A, exact $C) can + // return exact $C, but cannot return exact $B. + // + // Also, basic heap types cannot be exact. + if (newHeapType != new_.getHeapType() || newHeapType.isBasic()) { newExactness = Inexact; } From 06357e5f69e578c634bddeff5d129c225bc678ed Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 29 Apr 2026 10:03:05 -0700 Subject: [PATCH 38/47] least restrictions when unreachable --- src/tools/fuzzing/fuzzing.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index d56e9549704..8dc47cc44c2 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2513,9 +2513,13 @@ void TranslateToFuzzReader::mutateJSBoundary() { for (Index i = 0; i < call->operands.size(); i++) { auto type = call->operands[i]->type; if (type == Type::unreachable) { - // Nothing sent here, so use the declared type - what we refine to - // must still validate even though this call is unreachable. + // Nothing sent here. What we refine to must still validate, even + // though this call is unreachable. Using the non-nullable bottom type + // is valid, and has the fewest restrictions. type = declaredParams[i]; + if (type.isRef()) { + type = Type(type.getHeapType().getBottom(), NonNullable); + } } sent.push_back(type); } From a079f2e222fa8d000c4927a9292d13e0d095c86c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 29 Apr 2026 10:04:46 -0700 Subject: [PATCH 39/47] wrapper TODO --- src/tools/fuzzing/fuzzing.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 8dc47cc44c2..30ae7fa38af 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2531,6 +2531,8 @@ void TranslateToFuzzReader::mutateJSBoundary() { if (!func->imported()) { continue; } + // TODO: In the reffed cast, we could consider using import/export wrappers + // and refining just there. if (map[func->name].reffed) { continue; } From a7b5125547399b05d779e05d4ea0a6f53bd8840d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 29 Apr 2026 10:06:38 -0700 Subject: [PATCH 40/47] avoid typos in comments --- test/unit/input/fuzz.wat | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit/input/fuzz.wat b/test/unit/input/fuzz.wat index 147589e8723..031770ea561 100644 --- a/test/unit/input/fuzz.wat +++ b/test/unit/input/fuzz.wat @@ -8,11 +8,11 @@ (type $B2 (sub $B(struct))) ) - ;; Two imports, one which will be reffed. + ;; Two imports, one which will be referenced. (import "module" "base" (func $import (param i32 anyref) (result eqref))) (import "module" "base" (func $import-reffed (param i32 anyref) (result eqref))) - ;; Two exports, one which will be reffed. + ;; Two exports, one which will be referenced. (func $export (export "export") (param $0 i32) (param $1 anyref) (result eqref) ;; Add the refs. From 149f30093d88f561308dc6549ceeb062100a0845 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 29 Apr 2026 11:54:00 -0700 Subject: [PATCH 41/47] typo --- src/tools/fuzzing/fuzzing.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 30ae7fa38af..3bc933b2cbc 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2531,8 +2531,8 @@ void TranslateToFuzzReader::mutateJSBoundary() { if (!func->imported()) { continue; } - // TODO: In the reffed cast, we could consider using import/export wrappers - // and refining just there. + // TODO: In the referenced case, we could consider using import/export + // wrappers and refining just there. if (map[func->name].reffed) { continue; } From 130029f98450b652537ba9f96dd000ed788662e3 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 29 Apr 2026 15:52:48 -0700 Subject: [PATCH 42/47] Refactor --- test/unit/test_fuzz_preserve.py | 140 +++++++++++++++++++------------- 1 file changed, 85 insertions(+), 55 deletions(-) diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index bfc9b77f8ba..2ca44b3dfc4 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -8,84 +8,108 @@ from . import utils -class PreserveFuzzTest(utils.BinaryenTestCase): - def test_against_js(self): - # When --fuzz-against-js is used, the wasm is only going to be fuzzed - # against JS, so the fuzzer mutates the boundary in valid ways, even if - # --fuzz-preserve-imports-exports is set. - # - # Testing this deterministically is too hard (as the fuzzer evolves, it - # will handle random data differently, and the test would constantly get - # out of date). Instead, test randomly, but in a way that the chance of - # a flake is unrealistic. - max_size = 1024 +# Runs the fuzzer many times and allows checking for specific variety in the +# output. Calls hooks: +# +# self.found_variety() - checks if we found what we are looking for +# self.process_wat(wat) - receives the current fuzz wat +# +class FuzzerVarietyTester: + # Run until we find what we want. Stop only if we reached a max number + # of iterations and a timeout. + max_time = 60 + min_iters = 200 + + # The maximum size of the wasm-generating input + max_size = 1024 + + def __init__(self, initial): + self.initial = initial + + def test(self): temp_dat = tempfile.NamedTemporaryFile(suffix='.dat') - initial = self.input_path('fuzz.wat') - - # The set of all params we see, for the import that is refinable. Ditto - # for export results. - import_params = set() - export_results = set() - # Run until we find what we want. Stop only if we reached a max number - # of iterations and a timeout. - min_iters = 200 start_time = time.time() - # Locally this succeeds in less than 1 second. Give it a very wide - # margin of error to avoid flakes. - max_time = start_time + 60 + stop_time = start_time + self.max_time i = 0 while True: i += 1 - if self.found_expected(import_params) and self.found_expected(export_results): + # Stop early if we found what we are looking for. + if self.found_variety(): print(f"{i} iterations {round(time.time() - start_time, 2)} seconds)") - print(f'proper import_params : {import_params}') - print(f'proper export_results: {export_results}') + print(f'proper import_params : {self.import_params}') + print(f'proper export_results: {self.export_results}') return - if i > min_iters and time.time() > max_time: + if i > self.min_iters and time.time() > stop_time: raise Exception('looked too long and still failed') # Generate raw random data - size = random.randint(1, max_size) + size = random.randint(1, self.max_size) with open(temp_dat.name, 'wb') as f: f.write(bytes([random.randint(0, 255) for x in range(size)])) # Generate the fuzz testcase from the random data + the initial # contents. - args = ['-ttf', temp_dat.name, '--initial-fuzz=' + initial, '-all'] - args += ['--fuzz-preserve-imports-exports', '--fuzz-against-js'] + args = ['-ttf', temp_dat.name, '--initial-fuzz=' + self.initial, '-all'] + args += self.ttf_args args += ['--print'] wat = shared.run_process(shared.WASM_OPT + args, stdout=subprocess.PIPE).stdout - # The things that begin reffed might end up not reffed, if mutation - # removes the refs. Check for that. - import_reffed_is_reffed = '(ref.func $import-reffed)' in wat - export_reffed_is_reffed = '(ref.func $export-reffed)' in wat - - # Find the params/results that might be refined. - for line in wat.splitlines(): - if line.startswith(' (import "module" "base" (func $import '): - params, results = self.parse_params_results(line) - import_params.add(params) - assert results == '(result eqref)', 'cannot refine import result' - elif line.startswith(' (import "module" "base" (func $import-reffed '): - params, results = self.parse_params_results(line) - if import_reffed_is_reffed: - assert params == '(param i32 anyref)', 'cannot refine reffed stuff' - assert results == '(result eqref)', 'cannot refine import result' - if line.startswith(' (func $export '): - params, results = self.parse_params_results(line) - assert params == '(param $0 i32) (param $1 anyref)', 'cannot refine export params' - export_results.add(results) - if line.startswith(' (func $export-reffed '): - params, results = self.parse_params_results(line) - assert params == '(param $0 i32) (param $1 anyref)', 'cannot refine export params' - if export_reffed_is_reffed: - assert results == '(result eqref)', 'cannot refine reffed stuff' + self.process_wat(wat) + + +class FuzzAgainstJSVarietyTester(FuzzerVarietyTester): + # When --fuzz-against-js is used, the wasm is only going to be fuzzed + # against JS, so the fuzzer mutates the boundary in valid ways, even if + # --fuzz-preserve-imports-exports is set. + # + # Testing this deterministically is too hard (as the fuzzer evolves, it + # will handle random data differently, and the test would constantly get + # out of date). Instead, test randomly, but in a way that the chance of + # a flake is unrealistic. + ttf_args = ['--fuzz-preserve-imports-exports', '--fuzz-against-js'] + + def __init__(self, initial): + super().__init__(initial) + + # The set of all params we see, for the import that is refinable. Ditto + # for export results. + self.import_params = set() + self.export_results = set() + + def found_variety(self): + return self.found_expected(self.import_params) and self.found_expected(self.export_results) + + def process_wat(self, wat): + # The things that begin reffed might end up not reffed, if mutation + # removes the refs. Check for that. + import_reffed_is_reffed = '(ref.func $import-reffed)' in wat + export_reffed_is_reffed = '(ref.func $export-reffed)' in wat + + # Find the params/results that might be refined. + for line in wat.splitlines(): + if line.startswith(' (import "module" "base" (func $import '): + params, results = self.parse_params_results(line) + self.import_params.add(params) + assert results == '(result eqref)', 'cannot refine import result' + elif line.startswith(' (import "module" "base" (func $import-reffed '): + params, results = self.parse_params_results(line) + if import_reffed_is_reffed: + assert params == '(param i32 anyref)', 'cannot refine reffed stuff' + assert results == '(result eqref)', 'cannot refine import result' + if line.startswith(' (func $export '): + params, results = self.parse_params_results(line) + assert params == '(param $0 i32) (param $1 anyref)', 'cannot refine export params' + self.export_results.add(results) + if line.startswith(' (func $export-reffed '): + params, results = self.parse_params_results(line) + assert params == '(param $0 i32) (param $1 anyref)', 'cannot refine export params' + if export_reffed_is_reffed: + assert results == '(result eqref)', 'cannot refine reffed stuff' # Given the types we saw for params or results, look in detail for the # things we expect to see. @@ -161,3 +185,9 @@ def get(what, line): return ret return get('(param', line), get('(result', line) + + +class PreserveFuzzTest(utils.BinaryenTestCase): + def test_against_js(self): + FuzzAgainstJSVarietyTester(self.input_path('fuzz.wat')).test() + From ca52c2d3f778b3eb0db009ff8bbc75d5f9080f8d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 29 Apr 2026 16:37:44 -0700 Subject: [PATCH 43/47] linkt --- test/unit/test_fuzz_preserve.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/unit/test_fuzz_preserve.py b/test/unit/test_fuzz_preserve.py index 2ca44b3dfc4..9f8590eff49 100644 --- a/test/unit/test_fuzz_preserve.py +++ b/test/unit/test_fuzz_preserve.py @@ -190,4 +190,3 @@ def get(what, line): class PreserveFuzzTest(utils.BinaryenTestCase): def test_against_js(self): FuzzAgainstJSVarietyTester(self.input_path('fuzz.wat')).test() - From ada15912e3d8e377d3f161935943329134ff262f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 5 May 2026 13:46:06 -0700 Subject: [PATCH 44/47] use interestingHeapSubTypes for bottom types --- src/tools/fuzzing/fuzzing.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 22166c341c3..9a13ef5f58f 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2461,10 +2461,12 @@ void TranslateToFuzzReader::mutateJSBoundary() { while (1) { options.push_back(newHeapType); // We cannot look at a bottom type's supers (there can be many, and the - // getSuperType() API doesn't return them). - // TODO: handle all possible supers. + // getSuperType() API doesn't return them), but can use + // interestingHeapSubTypes on the top. if (newHeapType.isBottom()) { - options.push_back(oldHeapType); + for (auto type : interestingHeapSubTypes[newHeapType.getTop()]) { + options.push_back(type); + } break; } // Continue until we reach the old type. From 3ddc4feaab5c66bcb007dc4c3e514366a0eea09f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 5 May 2026 13:59:19 -0700 Subject: [PATCH 45/47] refine unreachable code to the bottom --- src/tools/fuzzing/fuzzing.cpp | 43 +++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 9a13ef5f58f..e2234ae1877 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2446,15 +2446,28 @@ void TranslateToFuzzReader::mutateJSBoundary() { // are not changing the arity, which JS might notice). Each place we may // refine, we are given the maximum refinement and pick a random type between // it and the old type. - auto maybeRefine = [&](Type old, Type new_) { - if (!new_.isRef()) { + // + // We receive the new type, computed as a LUBFinder, and the index in that + // LUB. + auto maybeRefine = [&](Type old, LUBFinder newLUB, Index lubIndex) { + if (!old.isRef()) { // A non-reference like i32, or unreachable (no values reach this place), // so it does not matter. return old; } + auto oldHeapType = old.getHeapType(); + + Type new_; + if (newLUB.noted()) { + new_ = newLUB.getLUB()[lubIndex]; + assert(new_.isRef()); + } else { + // Nothing was noted, so this is unreachable code. We can still refine to + // the bottom. + new_ = Type(oldHeapType.getBottom(), NonNullable); + } // Find all heap types between the old and new, starting from new. - auto oldHeapType = old.getHeapType(); auto newHeapType = new_.getHeapType(); assert(HeapType::isSubType(newHeapType, oldHeapType)); std::vector options; @@ -2528,19 +2541,16 @@ void TranslateToFuzzReader::mutateJSBoundary() { continue; } - // Find the LUB, which is the most we can refine. - auto lub = paramLUBs[func->name]; - if (!lub.noted()) { - continue; - } - // Refine. + auto lub = paramLUBs[func->name]; auto oldParams = func->getParams(); auto lubType = lub.getLUB(); - assert(oldParams.size() == lubType.size()); + // Either the LUB has the right data shape, or nothing was noted (this is + // unreachable). + assert(oldParams.size() == lubType.size() || !lub.noted()); std::vector newParams; for (Index i = 0; i < lubType.size(); i++) { - newParams.push_back(maybeRefine(oldParams[i], lubType[i])); + newParams.push_back(maybeRefine(oldParams[i], lub, i)); } func->setParams(Type(newParams)); } @@ -2555,20 +2565,15 @@ void TranslateToFuzzReader::mutateJSBoundary() { continue; } - // Find the LUB. + // Refine. auto* func = wasm.getFunction(name); auto lub = LUB::getResultsLUB(func, wasm); - if (!lub.noted()) { - continue; - } - - // Refine. auto oldResults = func->getResults(); auto lubType = lub.getLUB(); - assert(oldResults.size() == lubType.size()); + assert(oldResults.size() == lubType.size() || !lub.noted()); std::vector newResults; for (Index i = 0; i < lubType.size(); i++) { - newResults.push_back(maybeRefine(oldResults[i], lubType[i])); + newResults.push_back(maybeRefine(oldResults[i], lub, i)); } func->setResults(Type(newResults)); } From cd47cb63b7f60dce6a3b460e9e09696dd07b4e52 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 5 May 2026 14:02:47 -0700 Subject: [PATCH 46/47] save a bit --- src/tools/fuzzing/fuzzing.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index e2234ae1877..f94498d7482 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2502,12 +2502,11 @@ void TranslateToFuzzReader::mutateJSBoundary() { // Pick the exactness. auto oldExactness = old.getExactness(); auto newExactness = new_.getExactness(); - if (newExactness != oldExactness) { - // TODO: once getExactness() is fixed (see there), use that - newExactness = oneIn(2) ? Exact : Inexact; - } if (newHeapType.isBasic()) { newExactness = Inexact; + } else if (newExactness != oldExactness) { + // TODO: once getExactness() is fixed (see there), use that + newExactness = oneIn(2) ? Exact : Inexact; } return Type(newHeapType, newNullability, newExactness); From 2f67d1d5553b9d51f555589b0d0bcb97c79638dd Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 6 May 2026 10:51:26 -0700 Subject: [PATCH 47/47] refactor to localize LUB-handling code, leaving the main maybeRefine() operating purely on Types --- src/tools/fuzzing/fuzzing.cpp | 40 ++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/src/tools/fuzzing/fuzzing.cpp b/src/tools/fuzzing/fuzzing.cpp index 535f5311576..1ee1165e2c0 100644 --- a/src/tools/fuzzing/fuzzing.cpp +++ b/src/tools/fuzzing/fuzzing.cpp @@ -2445,28 +2445,13 @@ void TranslateToFuzzReader::mutateJSBoundary() { // are not changing the arity, which JS might notice). Each place we may // refine, we are given the maximum refinement and pick a random type between // it and the old type. - // - // We receive the new type, computed as a LUBFinder, and the index in that - // LUB. - auto maybeRefine = [&](Type old, LUBFinder newLUB, Index lubIndex) { + auto maybeRefine = [&](Type old, Type new_) { if (!old.isRef()) { - // A non-reference like i32, or unreachable (no values reach this place), - // so it does not matter. return old; } - auto oldHeapType = old.getHeapType(); - - Type new_; - if (newLUB.noted()) { - new_ = newLUB.getLUB()[lubIndex]; - assert(new_.isRef()); - } else { - // Nothing was noted, so this is unreachable code. We can still refine to - // the bottom. - new_ = Type(oldHeapType.getBottom(), NonNullable); - } // Find all heap types between the old and new, starting from new. + auto oldHeapType = old.getHeapType(); auto newHeapType = new_.getHeapType(); assert(HeapType::isSubType(newHeapType, oldHeapType)); std::vector options; @@ -2517,6 +2502,23 @@ void TranslateToFuzzReader::mutateJSBoundary() { return Type(newHeapType, newNullability, newExactness); }; + // Given a set of types (all params or all results), and an index among them, + // refine that index if we can. It is possible that no new types exist at all, + // if the code was unreachable and we noted nothing. + auto maybeRefineIndex = [&](Type oldTypes, LUBFinder newLUB, Index index) { + auto old = oldTypes[index]; + if (newLUB.noted()) { + return maybeRefine(old, newLUB.getLUB()[index]); + } + + // Nothing was noted, so this is unreachable code. We can still refine to + // the bottom in some cases. + if (!old.isRef()) { + return old; + } + return maybeRefine(old, Type(old.getHeapType().getBottom(), NonNullable)); + }; + // First, refine params sent to imports. Gather the LUB sent to each import, // and then refine. std::unordered_map paramLUBs; @@ -2566,7 +2568,7 @@ void TranslateToFuzzReader::mutateJSBoundary() { assert(oldParams.size() == lubType.size() || !lub.noted()); std::vector newParams; for (Index i = 0; i < lubType.size(); i++) { - newParams.push_back(maybeRefine(oldParams[i], lub, i)); + newParams.push_back(maybeRefineIndex(oldParams, lub, i)); } func->setParams(Type(newParams)); } @@ -2589,7 +2591,7 @@ void TranslateToFuzzReader::mutateJSBoundary() { assert(oldResults.size() == lubType.size() || !lub.noted()); std::vector newResults; for (Index i = 0; i < lubType.size(); i++) { - newResults.push_back(maybeRefine(oldResults[i], lub, i)); + newResults.push_back(maybeRefineIndex(oldResults, lub, i)); } func->setResults(Type(newResults)); }