Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
91c30a0
start
kripken Apr 23, 2026
65c2a31
work
kripken Apr 23, 2026
d08c29f
work
kripken Apr 23, 2026
f3a07c8
work
kripken Apr 23, 2026
823c665
work
kripken Apr 23, 2026
767c8b4
work
kripken Apr 23, 2026
c7d7f1b
work
kripken Apr 23, 2026
fc981c7
work
kripken Apr 23, 2026
ebb1106
work
kripken Apr 23, 2026
fa2da6e
work
kripken Apr 23, 2026
2c3c192
help
kripken Apr 27, 2026
53b1438
work
kripken Apr 27, 2026
255250d
work
kripken Apr 27, 2026
169f4c8
work
kripken Apr 27, 2026
2fe14e0
work
kripken Apr 27, 2026
300c953
work
kripken Apr 27, 2026
39f57ee
work
kripken Apr 27, 2026
cffacbc
work
kripken Apr 27, 2026
41cd320
work
kripken Apr 27, 2026
063b415
work
kripken Apr 27, 2026
12d35ef
work
kripken Apr 27, 2026
e5d4ea5
work
kripken Apr 27, 2026
d968315
work
kripken Apr 27, 2026
45ce1dc
work
kripken Apr 28, 2026
c1fccda
work
kripken Apr 28, 2026
a7b5bfd
work
kripken Apr 28, 2026
35c498f
work
kripken Apr 28, 2026
98e7b09
clean
kripken Apr 28, 2026
26d3f3e
go
kripken Apr 28, 2026
c0c6dae
Merge remote-tracking branch 'origin/main' into fuzz.against.js
kripken Apr 28, 2026
90c18af
cleanup
kripken Apr 28, 2026
f3d11a6
cleanup
kripken Apr 28, 2026
44fe786
fix
kripken Apr 28, 2026
79e65d7
fix
kripken Apr 28, 2026
98bd0ac
go
kripken Apr 28, 2026
4179d31
fix lint
kripken Apr 28, 2026
2e2d028
lint
kripken Apr 28, 2026
a7bf270
Merge remote-tracking branch 'origin/main' into fuzz.against.js
kripken Apr 28, 2026
1f7e875
fix
kripken Apr 29, 2026
06357e5
least restrictions when unreachable
kripken Apr 29, 2026
a079f2e
wrapper TODO
kripken Apr 29, 2026
a7b5125
avoid typos in comments
kripken Apr 29, 2026
149f300
typo
kripken Apr 29, 2026
130029f
Refactor
kripken Apr 29, 2026
ca52c2d
linkt
kripken Apr 29, 2026
c59b29b
Merge remote-tracking branch 'origin/main' into fuzz.against.js
kripken May 5, 2026
ada1591
use interestingHeapSubTypes for bottom types
kripken May 5, 2026
3ddc4fe
refine unreachable code to the bottom
kripken May 5, 2026
cd47cb6
save a bit
kripken May 5, 2026
59b11d3
merge
kripken May 5, 2026
1b857b1
Merge remote-tracking branch 'origin/main' into fuzz.against.js
kripken May 6, 2026
ebe62e9
Merge remote-tracking branch 'origin/main' into fuzz.against.js
kripken May 6, 2026
2f67d1d
refactor to localize LUB-handling code, leaving the main maybeRefine(…
kripken May 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions scripts/fuzz_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2202,6 +2202,7 @@ def do_handle_pair(self, input, before_wasm, after_wasm, opts):
input,
'-ttf',
'--fuzz-preserve-imports-exports',
'--fuzz-against-js',
'--initial-fuzz=' + wat_file,
'-o', pre_wasm,
'-g',
Expand Down
10 changes: 10 additions & 0 deletions src/tools/fuzzing.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ class TranslateToFuzzReader {
void setPreserveImportsAndExports(bool preserveImportsAndExports_) {
preserveImportsAndExports = preserveImportsAndExports_;
}
void setAgainstJS(bool againstJS_) { againstJS = againstJS_; }
void setImportedModule(std::string importedModuleName);

void build();
Expand Down Expand Up @@ -159,6 +160,11 @@ class TranslateToFuzzReader {
// existing testcase (using initial-content).
bool preserveImportsAndExports = false;

// Whether the wasm will be used from JS and in no other way. This lets us
// modify the wasm in ways that keep it valid from JS's point of view, but
// which might cause issues when linked against wasm or used otherwise.
bool againstJS = false;

// An optional module to import from.
std::optional<Module> importedModule;

Expand Down Expand Up @@ -409,6 +415,10 @@ class TranslateToFuzzReader {
void fixAfterChanges(Function* func);
void modifyInitialFunctions();

// Mutate the JS boundary, that is, make changes on the wasm side that JS
// would not be broken by (JS does not care about types).
void mutateJSBoundary();

// Note a global for use during code generation.
void useGlobalLater(Global* global);

Expand Down
228 changes: 228 additions & 0 deletions src/tools/fuzzing/fuzzing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "ir/glbs.h"
#include "ir/iteration.h"
#include "ir/local-structural-dominance.h"
#include "ir/lubs.h"
#include "ir/module-utils.h"
#include "ir/names.h"
#include "ir/subtype-exprs.h"
Expand Down Expand Up @@ -413,6 +414,10 @@ void TranslateToFuzzReader::build() {
PassRunner runner(&wasm);
ReFinalize().run(&runner, &wasm);
ReFinalize().walkModuleCode(&wasm);

if (againstJS) {
mutateJSBoundary();
}
}

void TranslateToFuzzReader::setupMemory() {
Expand Down Expand Up @@ -2389,6 +2394,229 @@ void TranslateToFuzzReader::modifyInitialFunctions() {
}
}

void TranslateToFuzzReader::mutateJSBoundary() {
assert(againstJS);

// Scan to find functions whose address is taken. We cannot modify their
// signatures at all.

struct FunctionInfo {
// Whether there are references to this function itself.
bool reffed = false;

// Calls to imports from this function.
std::vector<Call*> callImports;
};

using NameInfoMap = std::unordered_map<Name, FunctionInfo>;

struct FunctionInfoScanner
: public WalkerPass<PostWalker<FunctionInfoScanner>> {
// Not parallel for simplicity, see the map update below.

bool modifiesBinaryenIR() override { return false; }

NameInfoMap& map;

FunctionInfoScanner(NameInfoMap& map) : map(map) {}

std::unique_ptr<Pass> create() override {
return std::make_unique<FunctionInfoScanner>(map);
}

void visitCall(Call* curr) {
if (getModule()->getFunction(curr->target)->imported()) {
map[curr->target].callImports.push_back(curr);
}
}

void visitRefFunc(RefFunc* curr) { map[curr->func].reffed = true; }
};

NameInfoMap map;
FunctionInfoScanner scanner(map);
PassRunner runner(&wasm);
scanner.setModule(&wasm);
scanner.run(&runner, &wasm);
scanner.walkModuleCode(&wasm);

// If a function does not have its address taken, we can refine types. This is
// safe because we will still send and receive the right number of values (we
// are not changing the arity, which JS might notice). Each place we may
// refine, we are given the maximum refinement and pick a random type between
// it and the old type.
auto maybeRefine = [&](Type old, Type new_) {
if (!old.isRef()) {
return old;
}

// Find all heap types between the old and new, starting from new.
auto oldHeapType = old.getHeapType();
auto newHeapType = new_.getHeapType();
assert(HeapType::isSubType(newHeapType, oldHeapType));
std::vector<HeapType> options;
while (1) {
options.push_back(newHeapType);
// We cannot look at a bottom type's supers (there can be many, and the
// getSuperType() API doesn't return them), but can use
// interestingHeapSubTypes on the top.
if (newHeapType.isBottom()) {
for (auto type : interestingHeapSubTypes[newHeapType.getTop()]) {
options.push_back(type);
}
break;
}
// Continue until we reach the old type.
if (newHeapType == oldHeapType) {
break;
}
auto next = newHeapType.getSuperType();
assert(next);
newHeapType = *next;
}
newHeapType = pick(options);

// Pick the nullability.
auto oldNullability = old.getNullability();
auto newNullability = new_.getNullability();
if (newNullability != oldNullability) {
newNullability = getNullability();
}

// Pick the exactness.
auto oldExactness = old.getExactness();
auto newExactness = new_.getExactness();
// We can only be exact if we are using the new heap type: that type is
// exactly what is sent here, and no intermediate heap type would be valid.
// For example, given $A :> $B :> $C, then maybeRefine($A, exact $C) can
// return exact $C, but cannot return exact $B.
//
// Also, basic heap types cannot be exact.
if (newHeapType != new_.getHeapType() || newHeapType.isBasic()) {
newExactness = Inexact;
} else if (newExactness != oldExactness) {
// TODO: once getExactness() is fixed (see there), use that
newExactness = oneIn(2) ? Exact : Inexact;
}
Comment on lines +2495 to 2500
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might as well check these conditions before burning a bit to generate a new exactness above.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bump.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, saved that bit.


return Type(newHeapType, newNullability, newExactness);
};

// Given a set of types (all params or all results), and an index among them,
// refine that index if we can. It is possible that no new types exist at all,
// if the code was unreachable and we noted nothing.
auto maybeRefineIndex = [&](Type oldTypes, LUBFinder newLUB, Index index) {
auto old = oldTypes[index];
if (newLUB.noted()) {
return maybeRefine(old, newLUB.getLUB()[index]);
}

// Nothing was noted, so this is unreachable code. We can still refine to
// the bottom in some cases.
if (!old.isRef()) {
return old;
}
return maybeRefine(old, Type(old.getHeapType().getBottom(), NonNullable));
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this could all be replaced with the following:

auto lub = newLUB.noted() ? newLUB.getLUB()[index] : Type(Type::unreachable);
return maybeRefine(oldTypes[index], lub);

if we also added this to maybeRefine:

if (new_ == Type::unreachable) {
  new_ = Type(oldHeapType.getBottom(), NonNullable);
}

But I see that the noted and non-noted cases requiring different indexing is the source of the extra work. We could fix that by allowing arbitrary indexing into Type::unreachable (and having it return Type::unreachable at every index), but it's probably not worth making that change to simplify just this code.

Whatever way of structuring this you think is best SGTM.

};

// First, refine params sent to imports. Gather the LUB sent to each import,
// and then refine.
std::unordered_map<Name, LUBFinder> paramLUBs;
for (auto& [_, info] : map) {
for (auto* call : info.callImports) {
auto declaredParams = wasm.getFunction(call->target)->getParams();
std::vector<Type> sent;
for (Index i = 0; i < call->operands.size(); i++) {
auto type = call->operands[i]->type;
if (type == Type::unreachable) {
// Nothing sent here. What we refine to must still validate, even
// though this call is unreachable. Using the non-nullable bottom type
// is valid, and has the fewest restrictions.
type = declaredParams[i];
if (type.isRef()) {
type = Type(type.getHeapType().getBottom(), NonNullable);
}
}
Comment thread
tlively marked this conversation as resolved.
sent.push_back(type);
}
paramLUBs[call->target].note(Type(sent));
}
}

for (auto& func : wasm.functions) {
if (!func->imported()) {
continue;
}
// TODO: In the referenced case, we could consider using import/export
// wrappers and refining just there.
if (map[func->name].reffed) {
continue;
}
Comment thread
tlively marked this conversation as resolved.
// Do not alter the signature of configureAll or other VM builtins. Changing
// these to something the VM does not expect will just cause it to
// immediately reject the module by trapping.
if (func->module.startsWith("wasm:")) {
continue;
}

// Refine.
auto lub = paramLUBs[func->name];
auto oldParams = func->getParams();
auto lubType = lub.getLUB();
// Either the LUB has the right data shape, or nothing was noted (this is
// unreachable).
assert(oldParams.size() == lubType.size() || !lub.noted());
std::vector<Type> newParams;
for (Index i = 0; i < lubType.size(); i++) {
newParams.push_back(maybeRefineIndex(oldParams, lub, i));
}
func->setParams(Type(newParams));
}

// Second, refine results sent from exports.
for (auto& exp : wasm.exports) {
if (exp->kind != ExternalKind::Function) {
continue;
}
auto name = *exp->getInternalName();
if (map[name].reffed) {
continue;
}

// Refine.
auto* func = wasm.getFunction(name);
auto lub = LUB::getResultsLUB(func, wasm);
auto oldResults = func->getResults();
auto lubType = lub.getLUB();
assert(oldResults.size() == lubType.size() || !lub.noted());
std::vector<Type> newResults;
for (Index i = 0; i < lubType.size(); i++) {
newResults.push_back(maybeRefineIndex(oldResults, lub, i));
}
func->setResults(Type(newResults));
}

// Update return types from calls to exports whose results we refined.
struct CallUpdater : public WalkerPass<PostWalker<CallUpdater>> {
bool isFunctionParallel() override { return true; }

std::unique_ptr<Pass> create() override {
return std::make_unique<CallUpdater>();
}

void visitCall(Call* curr) {
if (curr->type != Type::unreachable) {
curr->type = getModule()->getFunction(curr->target)->getResults();
}
}
} updater;
updater.setModule(&wasm);
updater.run(&runner, &wasm);

// Propagate after our changes.
ReFinalize().run(&runner, &wasm);
}

void TranslateToFuzzReader::dropToLog(Function* func) {
// Don't always do this.
if (oneIn(2)) {
Expand Down
9 changes: 9 additions & 0 deletions src/tools/wasm-opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ int main(int argc, const char* argv[]) {
bool fuzzMemory = true;
bool fuzzOOB = true;
bool fuzzPreserveImportsAndExports = false;
bool fuzzAgainstJS = false;
std::string fuzzImport;
std::string emitSpecWrapper;
std::string emitWasm2CWrapper;
Expand Down Expand Up @@ -212,6 +213,13 @@ For more on how to optimize effectively, see
[&](Options* o, const std::string& arguments) {
fuzzPreserveImportsAndExports = true;
})
.add(
"--fuzz-against-js",
"",
"modify the wasm in valid ways that assume it is used only from JS",
WasmOptOption,
Options::Arguments::Zero,
[&](Options* o, const std::string& arguments) { fuzzAgainstJS = true; })
.add(
"--fuzz-import",
"",
Expand Down Expand Up @@ -349,6 +357,7 @@ For more on how to optimize effectively, see
reader.setAllowMemory(fuzzMemory);
reader.setAllowOOB(fuzzOOB);
reader.setPreserveImportsAndExports(fuzzPreserveImportsAndExports);
reader.setAgainstJS(fuzzAgainstJS);
if (!fuzzImport.empty()) {
reader.setImportedModule(fuzzImport);
}
Expand Down
4 changes: 4 additions & 0 deletions test/lit/help/wasm-opt.test
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@
;; CHECK-NEXT: --fuzz-preserve-imports-exports don't add imports and exports in
;; CHECK-NEXT: -ttf mode, and keep the start
;; CHECK-NEXT:
;; CHECK-NEXT: --fuzz-against-js modify the wasm in valid ways
;; CHECK-NEXT: that assume it is used only from
;; CHECK-NEXT: JS
;; CHECK-NEXT:
;; CHECK-NEXT: --fuzz-import a module to use as an import in
;; CHECK-NEXT: -ttf mode
;; CHECK-NEXT:
Expand Down
48 changes: 48 additions & 0 deletions test/unit/input/fuzz.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
(module
;; Two structs, A and B, each of which has a subtype.
(rec
(type $A (sub (struct)))
(type $A2 (sub $A (struct)))

(type $B (sub (struct)))
(type $B2 (sub $B(struct)))
)

;; Two imports, one which will be referenced.
(import "module" "base" (func $import (param i32 anyref) (result eqref)))
(import "module" "base" (func $import-reffed (param i32 anyref) (result eqref)))

;; Two exports, one which will be referenced.

(func $export (export "export") (param $0 i32) (param $1 anyref) (result eqref)
;; Add the refs.
(drop
(ref.func $import-reffed)
)
(drop
(ref.func $export-reffed)
)

;; Call the imports.
(drop
(call $import
(i32.const 10)
;; Send $A. We can refine the anyref to $A or $A2 (but not $B or $B2).
(struct.new $A)
)
)
(drop
(call $import-reffed
(i32.const 20)
(struct.new $A)
)
)

;; Return $B. We can refine the eqref to $B or $B2 (but not $A or $A2).
(struct.new $B)
)

(func $export-reffed (export "export-reffed") (param $0 i32) (param $1 anyref) (result eqref)
(struct.new $A)
)
)
Loading
Loading