Skip to content

Commit 2eedbdc

Browse files
author
Brian Mock
authored
Merge pull request #160 from /issues/157
Fixes #157; adds createLanguage, thru, node
2 parents 81fa365 + 9e6bbb8 commit 2eedbdc

File tree

5 files changed

+233
-19
lines changed

5 files changed

+233
-19
lines changed

API.md

+97-18
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,41 @@ A parser is said to *consume* the text that it parses, leaving only the unconsum
1616

1717
These are either parsers or functions that return new parsers. These are the building blocks of parsers. They are all contained in the `Parsimmon` object.
1818

19+
## Parsimmon.createLanguage(parsers)
20+
21+
`createLanguage` is the best starting point for building a language parser in Parsimmon. It organizes all of your parsers, collects them into a single namespace, and removes the need to worry about using `Parsimmon.lazy`.
22+
23+
Each function passed to `createLanguage` receives as its only parameter the entire language of parsers as an object. This is used for referring to other rules from within your current rule.
24+
25+
Example:
26+
27+
```js
28+
var Lang = Parsimmon.createLanguage({
29+
Value: function(r) {
30+
return Parsimmon.alt(
31+
r.Number,
32+
r.Symbol,
33+
r.List
34+
);
35+
},
36+
Number: function() {
37+
return Parsimmon.regexp(/[0-9]+/).map(Number);
38+
},
39+
Symbol: function() {
40+
return Parsimmon.regexp(/[a-z]+/);
41+
},
42+
List: function(r) {
43+
return Parsimmon.string('(')
44+
.then(Parsimmon.sepBy(r.Value, r._))
45+
.skip(Parsimmon.string(')'));
46+
},
47+
_: function() {
48+
return Parsimmon.optWhitespace;
49+
}
50+
});
51+
Lang.Value.tryParse('(list 1 2 foo (list nice 3 56 989 asdasdas))');
52+
```
53+
1954
## Parsimmon(fn)
2055

2156
**NOTE:** You probably will never need to use this function. Most parsing can be accomplished using `Parsimmon.regexp` and combination with `Parsimmon.seq` and `Parsimmon.alt`.
@@ -182,6 +217,8 @@ This is the same as `Parsimmon.sepBy`, but matches the `content` parser **at lea
182217

183218
## Parsimmon.lazy(fn)
184219

220+
**NOTE:** This is not needed if you're using `createLanguage`.
221+
185222
Accepts a function that returns a parser, which is evaluated the first time the parser is used. This is useful for referencing parsers that haven't yet been defined, and for implementing recursive parsers. Example:
186223

187224
```javascript
@@ -523,29 +560,71 @@ Expects `parser` at most `n` times. Yields an array of the results.
523560

524561
Expects `parser` at least `n` times. Yields an array of the results.
525562

563+
## parser.node(name)
564+
565+
Yields an object with `name`, `value`, `start`, and `end` keys, where `value` is the original value yielded by the parser, `name` is the argument passed in, and `start` and `end` are are objects with a 0-based `offset` and 1-based `line` and `column` properties that represent the position in the input that contained the parsed text.
566+
567+
Example:
568+
569+
```javascript
570+
var Identifier =
571+
Parsimmon.regexp(/[a-z]+/).node('Identifier');
572+
573+
Identifier.tryParse('hey');
574+
// => { name: 'Identifier',
575+
// value: 'hey',
576+
// start: { offset: 0, line: 1, column: 1 },
577+
// end: { offset: 3, line: 1, column: 4 } }
578+
```
579+
526580
## parser.mark()
527581

528-
Yields an object with `start`, `value`, and `end` keys,
529-
where `value` is the original value yielded by the parser, and `start` and
530-
`end` are are objects with a 0-based `offset` and 1-based `line` and
531-
`column` properties that represent the position in the input that
532-
contained the parsed text. Works like this function:
582+
Yields an object with `start`, `value`, and `end` keys, where `value` is the original value yielded by the parser, and `start` and `end` are are objects with a 0-based `offset` and 1-based `line` and `column` properties that represent the position in the input that contained the parsed text. Works like this function:
533583

534584
```javascript
535-
function mark(parser) {
536-
return Parsimmon.seqMap(
537-
Parsimmon.index,
538-
parser,
539-
Parsimmon.index,
540-
function(start, value, end) {
541-
return {
542-
start: start,
543-
value: value,
544-
end: end
545-
};
546-
}
547-
);
585+
var Identifier =
586+
Parsimmon.regexp(/[a-z]+/).mark();
587+
588+
Identifier.tryParse('hey');
589+
// => { start: { offset: 0, line: 1, column: 1 },
590+
// value: 'hey',
591+
// end: { offset: 3, line: 1, column: 4 } }
592+
```
593+
594+
## parser.thru(wrapper)
595+
596+
Simply returns `wrapper(this)` from the parser. Useful for custom functions used to wrap your parsers, while keeping with Parsimmon chaining style.
597+
598+
Example:
599+
600+
```js
601+
function makeNode(name) {
602+
return function(parser) {
603+
return Parsimmon.seqMap(
604+
Parsimmon.index,
605+
parser,
606+
Parsimmon.index,
607+
function(start, value, end) {
608+
return Object.freeze({
609+
type: 'myLanguage.' + name,
610+
value: value,
611+
start: start,
612+
end: end
613+
});
614+
}
615+
);
616+
};
548617
}
618+
619+
var Identifier =
620+
Parsimmon.regexp(/[a-z]+/)
621+
.thru(makeNode('Identifier'));
622+
623+
Identifier.tryParse('hey');
624+
// => { type: 'myLanguage.Identifier',
625+
// value: 'hey',
626+
// start: { offset: 0, line: 1, column: 1 },
627+
// end: { offset: 3, line: 1, column: 4 } }
549628
```
550629

551630
## parser.desc(description)

CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
## version 1.4.0 (2017-06-05)
2+
* Adds `Parsimmon.createLanguage(parsers)`
3+
* Adds `parser.thru(wrapper)`
4+
* Adds `parser.node(name)`
5+
16
## version 1.3.0 (2017-05-28)
27

38
* Adds `Parsimmon.notFollowedBy(parser)`

README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,13 @@ Note: If you prefer throwing an error when the parse failed, call [`.tryParse(st
4848

4949
## Common Functions
5050

51+
- [`.createLanguage(parsers)`](API.md#parsimmoncreatelanguageparsers)
5152
- [`.string(string)`](API.md#parsimmonstringstring)
5253
- [`.regexp(regexp)`](API.md#parsimmonregexpregexp)
5354
- [`.seq(p1, p2, ...pn)`](API.md#parsimmonseqp1-p2-pn)
5455
- [`.sepBy(content, separator)`](API.md#parsimmonsepbycontent)
5556
- [`.alt(p1, p2, ...pn)`](API.md#parsimmonaltp1-p2-pn)
56-
- [`.lazy(fn)`](API.md#parsimmonlazyfn)
57+
- [`.node(name)`](API.md#nodename)
5758
- [`.whitespace`](API.md#parsimmonwhitespace)
5859
- [`.index`](API.md#parsimmonindex)
5960
- [`parser.map(fn)`](API.md#parsermapfn)

src/parsimmon.js

+32
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,22 @@
238238
});
239239
}
240240

241+
// Revisit this with Object.keys and .bind when we drop ES3 support.
242+
function createLanguage(parsers) {
243+
var language = {};
244+
for (var key in parsers) {
245+
if ({}.hasOwnProperty.call(parsers, key)) {
246+
(function(key) {
247+
var func = function() {
248+
return parsers[key](language);
249+
};
250+
language[key] = lazy(func);
251+
}(key));
252+
}
253+
}
254+
return language;
255+
}
256+
241257
/**
242258
* Allows to add custom primitive parsers
243259
*/
@@ -285,6 +301,10 @@
285301
return alt(this, alternative);
286302
};
287303

304+
_.thru = function(wrapper) {
305+
return wrapper(this);
306+
};
307+
288308
_.then = function(next) {
289309
if (typeof next === 'function') {
290310
throw new Error('chaining features of .then are no longer supported, use .chain instead');
@@ -425,6 +445,17 @@
425445
});
426446
};
427447

448+
_.node = function(name) {
449+
return seqMap(index, this, index, function(start, value, end) {
450+
return {
451+
name: name,
452+
value: value,
453+
start: start,
454+
end: end
455+
};
456+
});
457+
};
458+
428459
_.lookahead = function(x) {
429460
return this.skip(lookahead(x));
430461
};
@@ -667,6 +698,7 @@
667698
var optWhitespace = regexp(/\s*/).desc('optional whitespace');
668699
var whitespace = regexp(/\s+/).desc('whitespace');
669700

701+
Parsimmon.createLanguage = createLanguage;
670702
Parsimmon.all = all;
671703
Parsimmon.alt = alt;
672704
Parsimmon.any = any;

test/parsimmon.test.js

+97
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,71 @@ suite('parser', function() {
153153
});
154154
});
155155

156+
suite('Parsimmon.createLanguage', function() {
157+
test('should return an object of parsers', function() {
158+
var lang = Parsimmon.createLanguage({
159+
a: function() {
160+
return Parsimmon.string('a');
161+
},
162+
b: function() {
163+
return Parsimmon.string('b');
164+
}
165+
});
166+
assert.ok(Parsimmon.isParser(lang.a));
167+
assert.ok(Parsimmon.isParser(lang.b));
168+
});
169+
test('should allow direct recursion in parsers', function() {
170+
var lang = Parsimmon.createLanguage({
171+
Parentheses: function(r) {
172+
return Parsimmon.alt(
173+
Parsimmon.string('()'),
174+
Parsimmon.string('(')
175+
.then(r.Parentheses)
176+
.skip(Parsimmon.string(')'))
177+
);
178+
}
179+
});
180+
lang.Parentheses.tryParse('(((())))');
181+
});
182+
test('should allow indirect recursion in parsers', function() {
183+
var lang = Parsimmon.createLanguage({
184+
Value: function(r) {
185+
return Parsimmon.alt(
186+
r.Number,
187+
r.Symbol,
188+
r.List
189+
);
190+
},
191+
Number: function() {
192+
return Parsimmon.regexp(/[0-9]+/).map(Number);
193+
},
194+
Symbol: function() {
195+
return Parsimmon.regexp(/[a-z]+/);
196+
},
197+
List: function(r) {
198+
return Parsimmon.string('(')
199+
.then(Parsimmon.sepBy(r.Value, r._))
200+
.skip(Parsimmon.string(')'));
201+
},
202+
_: function() {
203+
return Parsimmon.optWhitespace;
204+
}
205+
});
206+
lang.Value.tryParse('(list 1 2 foo (list nice 3 56 989 asdasdas))');
207+
});
208+
});
209+
210+
suite('parser.thru', function() {
211+
test('should return wrapper(this)', function() {
212+
function arrayify(x) {
213+
return [x];
214+
}
215+
var parser = Parsimmon.string('');
216+
var array = parser.thru(arrayify);
217+
assert.strictEqual(array[0], parser);
218+
});
219+
});
220+
156221
suite('Parsimmon.lookahead', function() {
157222
test('should handle a string', function() {
158223
lookahead('');
@@ -1086,6 +1151,38 @@ suite('parser', function() {
10861151
);
10871152
});
10881153

1154+
test('.node(name)', function() {
1155+
var ys = regex(/^y*/).node('Y');
1156+
var parser = optWhitespace.then(ys).skip(optWhitespace);
1157+
assert.deepEqual(
1158+
parser.parse('').value,
1159+
{
1160+
name: 'Y',
1161+
value: '',
1162+
start: {offset: 0, line: 1, column: 1},
1163+
end: {offset: 0, line: 1, column: 1}
1164+
}
1165+
);
1166+
assert.deepEqual(
1167+
parser.parse(' yy ').value,
1168+
{
1169+
name: 'Y',
1170+
value: 'yy',
1171+
start: {offset: 1, line: 1, column: 2},
1172+
end: {offset: 3, line: 1, column: 4}
1173+
}
1174+
);
1175+
assert.deepEqual(
1176+
parser.parse('\nyy ').value,
1177+
{
1178+
name: 'Y',
1179+
value: 'yy',
1180+
start: {offset: 1, line: 2, column: 1},
1181+
end: {offset: 3, line: 2, column: 3}
1182+
}
1183+
);
1184+
});
1185+
10891186
suite('smart error messages', function() {
10901187
// this is mainly about .or(), .many(), and .times(), but not about
10911188
// their core functionality, so it's in its own test suite

0 commit comments

Comments
 (0)