-
Notifications
You must be signed in to change notification settings - Fork 855
New fuzzer mode: Fuzz against JavaScript #8655
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
91c30a0
65c2a31
d08c29f
f3a07c8
823c665
767c8b4
c7d7f1b
fc981c7
ebb1106
fa2da6e
2c3c192
53b1438
255250d
169f4c8
2fe14e0
300c953
39f57ee
cffacbc
41cd320
063b415
12d35ef
e5d4ea5
d968315
45ce1dc
c1fccda
a7b5bfd
35c498f
98e7b09
26d3f3e
c0c6dae
90c18af
f3d11a6
44fe786
79e65d7
98bd0ac
4179d31
2e2d028
a7bf270
1f7e875
06357e5
a079f2e
a7b5125
149f300
130029f
ca52c2d
c59b29b
ada1591
3ddc4fe
cd47cb6
59b11d3
1b857b1
ebe62e9
2f67d1d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ | |
| #include "ir/glbs.h" | ||
| #include "ir/iteration.h" | ||
| #include "ir/local-structural-dominance.h" | ||
| #include "ir/lubs.h" | ||
| #include "ir/module-utils.h" | ||
| #include "ir/names.h" | ||
| #include "ir/subtype-exprs.h" | ||
|
|
@@ -413,6 +414,10 @@ void TranslateToFuzzReader::build() { | |
| PassRunner runner(&wasm); | ||
| ReFinalize().run(&runner, &wasm); | ||
| ReFinalize().walkModuleCode(&wasm); | ||
|
|
||
| if (againstJS) { | ||
| mutateJSBoundary(); | ||
| } | ||
| } | ||
|
|
||
| void TranslateToFuzzReader::setupMemory() { | ||
|
|
@@ -2389,6 +2394,229 @@ void TranslateToFuzzReader::modifyInitialFunctions() { | |
| } | ||
| } | ||
|
|
||
| void TranslateToFuzzReader::mutateJSBoundary() { | ||
| assert(againstJS); | ||
|
|
||
| // Scan to find functions whose address is taken. We cannot modify their | ||
| // signatures at all. | ||
|
|
||
| struct FunctionInfo { | ||
| // Whether there are references to this function itself. | ||
| bool reffed = false; | ||
|
|
||
| // Calls to imports from this function. | ||
| std::vector<Call*> callImports; | ||
| }; | ||
|
|
||
| using NameInfoMap = std::unordered_map<Name, FunctionInfo>; | ||
|
|
||
| struct FunctionInfoScanner | ||
| : public WalkerPass<PostWalker<FunctionInfoScanner>> { | ||
| // Not parallel for simplicity, see the map update below. | ||
|
|
||
| bool modifiesBinaryenIR() override { return false; } | ||
|
|
||
| NameInfoMap& map; | ||
|
|
||
| FunctionInfoScanner(NameInfoMap& map) : map(map) {} | ||
|
|
||
| std::unique_ptr<Pass> create() override { | ||
| return std::make_unique<FunctionInfoScanner>(map); | ||
| } | ||
|
|
||
| void visitCall(Call* curr) { | ||
| if (getModule()->getFunction(curr->target)->imported()) { | ||
| map[curr->target].callImports.push_back(curr); | ||
| } | ||
| } | ||
|
|
||
| void visitRefFunc(RefFunc* curr) { map[curr->func].reffed = true; } | ||
| }; | ||
|
|
||
| NameInfoMap map; | ||
| FunctionInfoScanner scanner(map); | ||
| PassRunner runner(&wasm); | ||
| scanner.setModule(&wasm); | ||
| scanner.run(&runner, &wasm); | ||
| scanner.walkModuleCode(&wasm); | ||
|
|
||
| // If a function does not have its address taken, we can refine types. This is | ||
| // safe because we will still send and receive the right number of values (we | ||
| // are not changing the arity, which JS might notice). Each place we may | ||
| // refine, we are given the maximum refinement and pick a random type between | ||
| // it and the old type. | ||
| auto maybeRefine = [&](Type old, Type new_) { | ||
| if (!old.isRef()) { | ||
| return old; | ||
| } | ||
|
|
||
| // Find all heap types between the old and new, starting from new. | ||
| auto oldHeapType = old.getHeapType(); | ||
| auto newHeapType = new_.getHeapType(); | ||
| assert(HeapType::isSubType(newHeapType, oldHeapType)); | ||
| std::vector<HeapType> options; | ||
| while (1) { | ||
| options.push_back(newHeapType); | ||
| // We cannot look at a bottom type's supers (there can be many, and the | ||
| // getSuperType() API doesn't return them), but can use | ||
| // interestingHeapSubTypes on the top. | ||
| if (newHeapType.isBottom()) { | ||
| for (auto type : interestingHeapSubTypes[newHeapType.getTop()]) { | ||
| options.push_back(type); | ||
| } | ||
| break; | ||
| } | ||
| // Continue until we reach the old type. | ||
| if (newHeapType == oldHeapType) { | ||
| break; | ||
| } | ||
| auto next = newHeapType.getSuperType(); | ||
| assert(next); | ||
| newHeapType = *next; | ||
| } | ||
| newHeapType = pick(options); | ||
|
|
||
| // Pick the nullability. | ||
| auto oldNullability = old.getNullability(); | ||
| auto newNullability = new_.getNullability(); | ||
| if (newNullability != oldNullability) { | ||
| newNullability = getNullability(); | ||
| } | ||
|
|
||
| // Pick the exactness. | ||
| auto oldExactness = old.getExactness(); | ||
| auto newExactness = new_.getExactness(); | ||
| // We can only be exact if we are using the new heap type: that type is | ||
| // exactly what is sent here, and no intermediate heap type would be valid. | ||
| // For example, given $A :> $B :> $C, then maybeRefine($A, exact $C) can | ||
| // return exact $C, but cannot return exact $B. | ||
| // | ||
| // Also, basic heap types cannot be exact. | ||
| if (newHeapType != new_.getHeapType() || newHeapType.isBasic()) { | ||
| newExactness = Inexact; | ||
| } else if (newExactness != oldExactness) { | ||
| // TODO: once getExactness() is fixed (see there), use that | ||
| newExactness = oneIn(2) ? Exact : Inexact; | ||
| } | ||
|
|
||
| return Type(newHeapType, newNullability, newExactness); | ||
| }; | ||
|
|
||
| // Given a set of types (all params or all results), and an index among them, | ||
| // refine that index if we can. It is possible that no new types exist at all, | ||
| // if the code was unreachable and we noted nothing. | ||
| auto maybeRefineIndex = [&](Type oldTypes, LUBFinder newLUB, Index index) { | ||
| auto old = oldTypes[index]; | ||
| if (newLUB.noted()) { | ||
| return maybeRefine(old, newLUB.getLUB()[index]); | ||
| } | ||
|
|
||
| // Nothing was noted, so this is unreachable code. We can still refine to | ||
| // the bottom in some cases. | ||
| if (!old.isRef()) { | ||
| return old; | ||
| } | ||
| return maybeRefine(old, Type(old.getHeapType().getBottom(), NonNullable)); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this could all be replaced with the following: if we also added this to But I see that the noted and non-noted cases requiring different indexing is the source of the extra work. We could fix that by allowing arbitrary indexing into Whatever way of structuring this you think is best SGTM. |
||
| }; | ||
|
|
||
| // First, refine params sent to imports. Gather the LUB sent to each import, | ||
| // and then refine. | ||
| std::unordered_map<Name, LUBFinder> paramLUBs; | ||
| for (auto& [_, info] : map) { | ||
| for (auto* call : info.callImports) { | ||
| auto declaredParams = wasm.getFunction(call->target)->getParams(); | ||
| std::vector<Type> sent; | ||
| for (Index i = 0; i < call->operands.size(); i++) { | ||
| auto type = call->operands[i]->type; | ||
| if (type == Type::unreachable) { | ||
| // Nothing sent here. What we refine to must still validate, even | ||
| // though this call is unreachable. Using the non-nullable bottom type | ||
| // is valid, and has the fewest restrictions. | ||
| type = declaredParams[i]; | ||
| if (type.isRef()) { | ||
| type = Type(type.getHeapType().getBottom(), NonNullable); | ||
| } | ||
| } | ||
|
tlively marked this conversation as resolved.
|
||
| sent.push_back(type); | ||
| } | ||
| paramLUBs[call->target].note(Type(sent)); | ||
| } | ||
| } | ||
|
|
||
| for (auto& func : wasm.functions) { | ||
| if (!func->imported()) { | ||
| continue; | ||
| } | ||
| // TODO: In the referenced case, we could consider using import/export | ||
| // wrappers and refining just there. | ||
| if (map[func->name].reffed) { | ||
| continue; | ||
| } | ||
|
tlively marked this conversation as resolved.
|
||
| // Do not alter the signature of configureAll or other VM builtins. Changing | ||
| // these to something the VM does not expect will just cause it to | ||
| // immediately reject the module by trapping. | ||
| if (func->module.startsWith("wasm:")) { | ||
| continue; | ||
| } | ||
|
|
||
| // Refine. | ||
| auto lub = paramLUBs[func->name]; | ||
| auto oldParams = func->getParams(); | ||
| auto lubType = lub.getLUB(); | ||
| // Either the LUB has the right data shape, or nothing was noted (this is | ||
| // unreachable). | ||
| assert(oldParams.size() == lubType.size() || !lub.noted()); | ||
| std::vector<Type> newParams; | ||
| for (Index i = 0; i < lubType.size(); i++) { | ||
| newParams.push_back(maybeRefineIndex(oldParams, lub, i)); | ||
| } | ||
| func->setParams(Type(newParams)); | ||
| } | ||
|
|
||
| // Second, refine results sent from exports. | ||
| for (auto& exp : wasm.exports) { | ||
| if (exp->kind != ExternalKind::Function) { | ||
| continue; | ||
| } | ||
| auto name = *exp->getInternalName(); | ||
| if (map[name].reffed) { | ||
| continue; | ||
| } | ||
|
|
||
| // Refine. | ||
| auto* func = wasm.getFunction(name); | ||
| auto lub = LUB::getResultsLUB(func, wasm); | ||
| auto oldResults = func->getResults(); | ||
| auto lubType = lub.getLUB(); | ||
| assert(oldResults.size() == lubType.size() || !lub.noted()); | ||
| std::vector<Type> newResults; | ||
| for (Index i = 0; i < lubType.size(); i++) { | ||
| newResults.push_back(maybeRefineIndex(oldResults, lub, i)); | ||
| } | ||
| func->setResults(Type(newResults)); | ||
| } | ||
|
|
||
| // Update return types from calls to exports whose results we refined. | ||
| struct CallUpdater : public WalkerPass<PostWalker<CallUpdater>> { | ||
| bool isFunctionParallel() override { return true; } | ||
|
|
||
| std::unique_ptr<Pass> create() override { | ||
| return std::make_unique<CallUpdater>(); | ||
| } | ||
|
|
||
| void visitCall(Call* curr) { | ||
| if (curr->type != Type::unreachable) { | ||
| curr->type = getModule()->getFunction(curr->target)->getResults(); | ||
| } | ||
| } | ||
| } updater; | ||
| updater.setModule(&wasm); | ||
| updater.run(&runner, &wasm); | ||
|
|
||
| // Propagate after our changes. | ||
| ReFinalize().run(&runner, &wasm); | ||
| } | ||
|
|
||
| void TranslateToFuzzReader::dropToLog(Function* func) { | ||
| // Don't always do this. | ||
| if (oneIn(2)) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| (module | ||
| ;; Two structs, A and B, each of which has a subtype. | ||
| (rec | ||
| (type $A (sub (struct))) | ||
| (type $A2 (sub $A (struct))) | ||
|
|
||
| (type $B (sub (struct))) | ||
| (type $B2 (sub $B(struct))) | ||
| ) | ||
|
|
||
| ;; Two imports, one which will be referenced. | ||
| (import "module" "base" (func $import (param i32 anyref) (result eqref))) | ||
| (import "module" "base" (func $import-reffed (param i32 anyref) (result eqref))) | ||
|
|
||
| ;; Two exports, one which will be referenced. | ||
|
|
||
| (func $export (export "export") (param $0 i32) (param $1 anyref) (result eqref) | ||
| ;; Add the refs. | ||
| (drop | ||
| (ref.func $import-reffed) | ||
| ) | ||
| (drop | ||
| (ref.func $export-reffed) | ||
| ) | ||
|
|
||
| ;; Call the imports. | ||
| (drop | ||
| (call $import | ||
| (i32.const 10) | ||
| ;; Send $A. We can refine the anyref to $A or $A2 (but not $B or $B2). | ||
| (struct.new $A) | ||
| ) | ||
| ) | ||
| (drop | ||
| (call $import-reffed | ||
| (i32.const 20) | ||
| (struct.new $A) | ||
| ) | ||
| ) | ||
|
|
||
| ;; Return $B. We can refine the eqref to $B or $B2 (but not $A or $A2). | ||
| (struct.new $B) | ||
| ) | ||
|
|
||
| (func $export-reffed (export "export-reffed") (param $0 i32) (param $1 anyref) (result eqref) | ||
| (struct.new $A) | ||
| ) | ||
| ) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might as well check these conditions before burning a bit to generate a new exactness above.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bump.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, saved that bit.