Skip to content

Commit 16dae77

Browse files
authored
(wasm) New approach for parameterized rules (#519)
Implement a new approach for parameterized rules: - Analyze the grammar and create specialized versions of parameterized rules — one for every possible set of actual parameters - The specialized rules are effectively closures: they capture a particular set of concrete parameters, so they can be invoked without arguments. This also means we can memoize them. - This allows us to remove the runtime closure creation code; it allows us to support arbitrary numbers of parameters; and it eliminates the runtime functions for evaluating rules with fixed numbers of parameters (`evalApply1`, etc.)
1 parent 6acae8e commit 16dae77

File tree

6 files changed

+486
-245
lines changed

6 files changed

+486
-245
lines changed

packages/miniohm-js/index.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* global TextDecoder, TextEncoder, WebAssembly */
1+
/* global process, TextDecoder, TextEncoder, WebAssembly */
22

33
const WASM_PAGE_SIZE = 64 * 1024;
44
const INPUT_BUFFER_OFFSET = WASM_PAGE_SIZE;
@@ -88,6 +88,7 @@ export class WasmMatcher {
8888
}
8989

9090
match() {
91+
if (process.env.OHM_DEBUG === '1') debugger; // eslint-disable-line no-debugger
9192
return this._instance.exports.match(0);
9293
}
9394

packages/wasm/TODO.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66
- [x] NonterminalNodes should keep track of the rule
77
- [ ] When iteration contains a sequence, the children are flattened into the iter node.
88
- [x] Basic parameterized rules
9-
- [ ] Parameterized rules with >3 params
9+
- [x] Parameterized rules with >3 params
1010
- [x] Parameters that aren't terminals
11-
- [ ] Memoization for parameterized rules
11+
- [x] Memoization for parameterized rules
12+
- [ ] Avoid unnecessary dispatch in generalized rules
13+
- [ ] Avoid duplicate lifted rules.
1214
- [x] Support direct left recursion.
1315
- [ ] Handle left recursion detection at grammar parse time.
1416
- [x] Separate API for _creating_ the Wasm module from the WasmMatcher interface.
@@ -26,4 +28,3 @@
2628

2729
- How to deal with matchLength in lookahead. In regular Ohm, lookahead _does_ bind things. But that is hard to square with the current CST representation, that stores only the matchLength. Because somehow the things inside a lookahead must consume nothing — but if you have `&("a" "b")`, the only way to make them consume nothing (in the current representation) is to rewrite the matchLength of the two terminal nodes.
2830
- Could we introduce a pseudo-node for lookahead? It could get transparently unpacked when walking the tree.
29-
- Memoization of parameterized rules: Alex suggested assigning memoization keys statically to unique applications

packages/wasm/runtime/ohmRuntime.ts

Lines changed: 9 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,15 @@ export function match(startRuleId: i32): Result {
101101
return call_indirect<Result>(ruleId);
102102
}
103103

104+
export function evalApplyGeneralized(ruleId: i32, caseIdx: i32): Result {
105+
const origPos = pos;
106+
const origNumBindings = bindings.length;
107+
if (call_indirect<Result>(ruleId, caseIdx)) {
108+
return newNonterminalNode(origPos, pos, ruleId, origNumBindings);
109+
}
110+
return 0;
111+
}
112+
104113
export function evalApplyNoMemo0(ruleId: i32): Result {
105114
const origPos = pos;
106115
const origNumBindings = bindings.length;
@@ -111,25 +120,6 @@ export function evalApplyNoMemo0(ruleId: i32): Result {
111120
}
112121

113122
export function evalApply0(ruleId: i32): Result {
114-
// Handle closures, which are an pointer w/ a flag in the high bit.
115-
// TODO: Find a cleaner way of doing this.
116-
if (ruleId & 0x80000000) {
117-
const ptr = ruleId & 0x7fffffff;
118-
ruleId = load<i32>(ptr, 0);
119-
const argCount = load<i32>(ptr, 4);
120-
const args: i32[] = [];
121-
for (let i = 0; i < argCount; i++) {
122-
args.push(load<i32>(ptr + i * 4, 8));
123-
}
124-
switch (argCount) {
125-
case 0: return evalApply0(ruleId);
126-
case 1: return evalApply1(ruleId, args[0]);
127-
case 2: return evalApply2(ruleId, args[0], args[1]);
128-
case 3: return evalApply3(ruleId, args[0], args[1], args[2]);
129-
}
130-
assert(false);
131-
}
132-
133123
let result = memoTableGet(pos, ruleId);
134124
if (result !== 0) {
135125
return useMemoizedResult(ruleId, result);
@@ -177,54 +167,6 @@ export function handleLeftRecursion(origPos: usize, ruleId: i32, origNumBindings
177167
return succeeded;
178168
}
179169

180-
export function evalApply1(ruleId: i32, arg0: i32): Result {
181-
// if (hasMemoizedResult(ruleId)) {
182-
// return useMemoizedResult(ruleId);
183-
// }
184-
const origPos = pos;
185-
const origNumBindings = bindings.length;
186-
let result: Result = FAIL;
187-
const succeeded = call_indirect<Result>(ruleId, arg0);
188-
if (succeeded) {
189-
const numChildren = bindings.length - origNumBindings;
190-
result = newNonterminalNode(origPos, pos, ruleId, origNumBindings);
191-
}
192-
// memoizeResult(origPos, ruleId, result);
193-
return succeeded;
194-
}
195-
196-
export function evalApply2(ruleId: i32, arg0: i32, arg1: i32): Result {
197-
// if (hasMemoizedResult(ruleId)) {
198-
// return useMemoizedResult(ruleId);
199-
// }
200-
const origPos = pos;
201-
const origNumBindings = bindings.length;
202-
let result: Result = FAIL;
203-
const succeeded = call_indirect<Result>(ruleId, arg0, arg1);
204-
if (succeeded) {
205-
const numChildren = bindings.length - origNumBindings;
206-
result = newNonterminalNode(origPos, pos, ruleId, origNumBindings);
207-
}
208-
// memoizeResult(origPos, ruleId, result);
209-
return succeeded;
210-
}
211-
212-
export function evalApply3(ruleId: i32, arg0: i32, arg1: i32, arg2: i32): Result {
213-
// if (hasMemoizedResult(ruleId)) {
214-
// return useMemoizedResult(ruleId);
215-
// }
216-
const origPos = pos;
217-
const origNumBindings = bindings.length;
218-
let result: Result = FAIL;
219-
const succeeded = call_indirect<Result>(ruleId, arg0, arg1, arg2);
220-
if (succeeded) {
221-
const numChildren = bindings.length - origNumBindings;
222-
result = newNonterminalNode(origPos, pos, ruleId, origNumBindings);
223-
}
224-
// memoizeResult(origPos, ruleId, result);
225-
return succeeded;
226-
}
227-
228170
export function newTerminalNode(startIdx: i32, endIdx: i32): usize {
229171
const ptr = heap.alloc(CST_NODE_OVERHEAD);
230172
cstSetCount(ptr, 0);

packages/wasm/scripts/bench.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ const inputs = {
1515
featuredProduct: readFileSync(join(datadir, '_featured-product.liquid'), 'utf-8'),
1616
footer: readFileSync(join(datadir, '_footer.liquid'), 'utf-8'),
1717
html5shiv: readFileSync(join(datadir, '_html5shiv-3.7.3.js'), 'utf-8'),
18-
underscore: readFileSync(join(datadir, '_underscore-1.8.3.js'), 'utf-8')
18+
underscore: readFileSync(join(datadir, '_underscore-1.8.3.js'), 'utf-8'),
1919
};
2020

2121
function checkOk(val) {
@@ -37,7 +37,7 @@ function benchWithSetup(name, setupFn, benchFn) {
3737
bench(name, function* () {
3838
yield {
3939
[0]: setupFn,
40-
bench: benchFn
40+
bench: benchFn,
4141
};
4242
});
4343
}
@@ -87,4 +87,4 @@ group('LiquidHTML: footer.liquid', () => {
8787
});
8888
});
8989

90-
await run();
90+
(async () => await run())();

0 commit comments

Comments
 (0)