Skip to content

Commit 382d82a

Browse files
committed
Enable the possibility to switch to unicode decoding.
1 parent ce951cc commit 382d82a

22 files changed

+2607
-2506
lines changed

lib/src/parser/character/any.dart

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ import '../../core/parser.dart';
44
import '../predicate/character.dart';
55
import 'predicate/constant.dart';
66

7-
/// Returns a parser that accepts any input element.
7+
/// Returns a parser that accepts any character.
88
///
99
/// For example, `any()` succeeds and consumes any given letter. It only
1010
/// fails for an empty input.
1111
@useResult
12-
Parser<String> any({String message = 'input expected'}) =>
13-
CharacterParser(ConstantCharPredicate.any, message);
12+
Parser<String> any({String message = 'input expected', bool unicode = false}) =>
13+
CharacterParser(ConstantCharPredicate.any, message, unicode: unicode);

lib/src/parser/character/any_of.dart

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,15 @@ import '../predicate/character.dart';
55
import 'utils/code.dart';
66
import 'utils/optimize.dart';
77

8-
/// Returns a parser that accepts any of the specified characters.
8+
/// Returns a parser that accepts any of the specified characters in [value].
99
@useResult
10-
Parser<String> anyOf(String chars, {String? message}) => CharacterParser(
11-
optimizedString(chars),
12-
message ?? 'any of "${toReadableString(chars)}" expected');
10+
Parser<String> anyOf(String value,
11+
{String? message, bool ignoreCase = false, bool unicode = false}) {
12+
final predicate = ignoreCase
13+
? optimizedString('${value.toLowerCase()}${value.toUpperCase()}',
14+
unicode: unicode)
15+
: optimizedString(value, unicode: unicode);
16+
message ??= 'any of "${toReadableString(value, unicode: unicode)}"'
17+
'${ignoreCase ? ' (case-insensitive)' : ''} expected';
18+
return CharacterParser(predicate, message, unicode: unicode);
19+
}

lib/src/parser/character/char.dart

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,30 @@ import 'package:meta/meta.dart';
33
import '../../core/parser.dart';
44
import '../predicate/character.dart';
55
import 'predicate/char.dart';
6-
import 'predicate/range.dart';
76
import 'utils/code.dart';
87
import 'utils/optimize.dart';
98

10-
/// Returns a parser that accepts a specific character only.
9+
/// Returns a parser that accepts a specific character [value].
10+
///
11+
/// - [message] defines a custom error message.
12+
/// - If [ignoreCase] is `true`, the character is matched in a case-insensitive
13+
/// manner.
14+
/// - If [unicode] is `true`, the character is matched using full unicode
15+
/// character parsing (as opposed to UTF-16 code units).
1116
@useResult
12-
Parser<String> char(String char, {String? message}) => CharacterParser(
13-
SingleCharPredicate(toCharCode(char)),
14-
message ?? '"${toReadableString(char)}" expected');
17+
Parser<String> char(String value,
18+
{String? message, bool ignoreCase = false, bool unicode = false}) {
19+
final charCode = toCharCode(value, unicode: unicode);
20+
final predicate = ignoreCase
21+
? optimizedString('${value.toLowerCase()}${value.toUpperCase()}',
22+
unicode: unicode)
23+
: SingleCharPredicate(charCode);
24+
message ??= '"${toReadableString(value, unicode: unicode)}"'
25+
'${ignoreCase ? ' (case-insensitive)' : ''} expected';
26+
return CharacterParser(predicate, message, unicode: unicode);
27+
}
1528

16-
/// Returns a parser that accepts a case-insensitive specific character only.
1729
@useResult
18-
Parser<String> charIgnoringCase(String char, {String? message}) {
19-
final lowerCase = toCharCode(char.toLowerCase());
20-
final upperCase = toCharCode(char.toUpperCase());
21-
return CharacterParser(
22-
optimizedRanges([
23-
RangeCharPredicate(lowerCase, lowerCase),
24-
RangeCharPredicate(upperCase, upperCase),
25-
]),
26-
message ?? '"${toReadableString(char)}" (case-insensitive) expected');
27-
}
30+
@Deprecated('Use `char(value, message: message, ignoreCase: true)` instead')
31+
Parser<String> charIgnoringCase(String value, [String? message]) =>
32+
char(value, message: message, ignoreCase: true);

lib/src/parser/character/none_of.dart

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,15 @@ import 'predicate/not.dart';
66
import 'utils/code.dart';
77
import 'utils/optimize.dart';
88

9-
/// Returns a parser that accepts none of the specified characters.
9+
/// Returns a parser that accepts none of the specified characters in [value].
1010
@useResult
11-
Parser<String> noneOf(String chars, {String? message}) => CharacterParser(
12-
NotCharacterPredicate(optimizedString(chars)),
13-
message ?? 'none of "${toReadableString(chars)}" expected');
11+
Parser<String> noneOf(String value,
12+
{String? message, bool ignoreCase = false, bool unicode = false}) {
13+
final predicate = NotCharPredicate(ignoreCase
14+
? optimizedString('${value.toLowerCase()}${value.toUpperCase()}',
15+
unicode: unicode)
16+
: optimizedString(value, unicode: unicode));
17+
message ??= 'none of "${toReadableString(value, unicode: unicode)}"'
18+
'${ignoreCase ? ' (case-insensitive)' : ''} expected';
19+
return CharacterParser(predicate, message, unicode: unicode);
20+
}

lib/src/parser/character/pattern.dart

Lines changed: 45 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ import 'package:meta/meta.dart';
33
import '../../core/parser.dart';
44
import '../action/map.dart';
55
import '../combinator/choice.dart';
6-
import '../combinator/optional.dart';
76
import '../combinator/sequence.dart';
7+
import '../misc/eof.dart';
88
import '../predicate/character.dart';
99
import '../repeater/possessive.dart';
1010
import 'any.dart';
@@ -16,7 +16,7 @@ import 'utils/code.dart';
1616
import 'utils/optimize.dart';
1717

1818
/// Returns a parser that accepts a single character of a given character set
19-
/// provided as a string.
19+
/// [pattern] provided as a string.
2020
///
2121
/// Characters match themselves. A dash `-` between two characters matches the
2222
/// range of those characters. A caret `^` at the beginning negates the pattern.
@@ -26,59 +26,54 @@ import 'utils/optimize.dart';
2626
/// either '1', '2', or '3'; and fails for any other character. The parser
2727
/// `pattern('^aou') accepts any character, but fails for the characters 'a',
2828
/// 'o', or 'u'.
29-
@useResult
30-
Parser<String> pattern(String element, {String? message}) => CharacterParser(
31-
_pattern.parse(element).value,
32-
message ?? '[${toReadableString(element)}] expected');
33-
34-
/// Returns a parser that accepts a single character of a given case-insensitive
35-
/// character set provided as a string.
36-
///
37-
/// Characters match themselves. A dash `-` between two characters matches the
38-
/// range of those characters. A caret `^` at the beginning negates the pattern.
3929
///
40-
/// For example, the parser `patternIgnoreCase('aoU')` accepts the character
41-
/// 'a', 'o', 'u' and 'A', 'O', 'U', and fails for any other input. The parser
42-
/// `patternIgnoreCase('a-c')` accepts 'a', 'b', 'c' and 'A', 'B', 'C'; and
43-
/// fails for any other character. The parser `patternIgnoreCase('^A') accepts
44-
/// any character, but fails for the characters 'a' or 'A'.
30+
/// If [ignoreCase] is set to `true` the pattern accepts lower and uppercase
31+
/// variations of its characters. If [unicode] is set to `true` unicode
32+
/// surrogate pairs are extracted and matched against the predicate.
4533
@useResult
46-
Parser<String> patternIgnoreCase(String element, {String? message}) {
47-
var normalized = element;
48-
final isNegated = normalized.startsWith('^');
34+
Parser<String> pattern(String pattern,
35+
{String? message, bool ignoreCase = false, bool unicode = false}) {
36+
var input = pattern;
37+
final isNegated = input.startsWith('^');
38+
if (isNegated) input = input.substring(1);
39+
final inputs =
40+
ignoreCase ? [input.toLowerCase(), input.toUpperCase()] : [input];
41+
final parser = unicode ? _patternUnicodeParser : _patternParser;
42+
var predicate = optimizedRanges(
43+
inputs.expand((each) => parser.parse(each).value),
44+
unicode: unicode);
4945
if (isNegated) {
50-
normalized = normalized.substring(1);
46+
predicate = predicate is ConstantCharPredicate
47+
? ConstantCharPredicate(!predicate.constant)
48+
: NotCharPredicate(predicate);
5149
}
52-
final isDashed = normalized.endsWith('-');
53-
if (isDashed) {
54-
normalized = normalized.substring(0, normalized.length - 1);
55-
}
56-
return pattern(
57-
'${isNegated ? '^' : ''}'
58-
'${normalized.toLowerCase()}${normalized.toUpperCase()}'
59-
'${isDashed ? '-' : ''}',
60-
message: message ??
61-
'[${toReadableString(element)}] (case-insensitive) expected');
50+
message ??= '[${toReadableString(pattern, unicode: unicode)}]'
51+
'${ignoreCase ? ' (case-insensitive)' : ''} expected';
52+
return CharacterParser(predicate, message, unicode: unicode);
6253
}
6354

64-
/// Parser that reads a single character.
65-
final _single = any().map(
66-
(element) => RangeCharPredicate(toCharCode(element), toCharCode(element)));
67-
68-
/// Parser that reads a character range.
69-
final _range = (any(), char('-'), any()).toSequenceParser().map3(
70-
(start, _, stop) =>
71-
RangeCharPredicate(toCharCode(start), toCharCode(stop)));
55+
@useResult
56+
@Deprecated('Use `pattern(value, message: message, ignoreCase: true)` instead')
57+
Parser<String> patternIgnoreCase(String value, [String? message]) =>
58+
pattern(value, message: message, ignoreCase: true);
7259

73-
/// Parser that reads a sequence of single characters or ranges.
74-
final _sequence =
75-
[_range, _single].toChoiceParser().star().map(optimizedRanges);
60+
Parser<List<RangeCharPredicate>> _createParser({required bool unicode}) {
61+
// Parser that consumes a single character.
62+
final character = any(unicode: unicode);
63+
// Parser that reads a single character.
64+
final single = character.map((element) => RangeCharPredicate(
65+
toCharCode(element, unicode: unicode),
66+
toCharCode(element, unicode: unicode)));
67+
// Parser that reads a character range.
68+
final range = (
69+
character,
70+
char('-'),
71+
character
72+
).toSequenceParser().map3((start, _, stop) => RangeCharPredicate(
73+
toCharCode(start, unicode: unicode), toCharCode(stop, unicode: unicode)));
74+
// Parser that reads a sequence of single characters or ranges.
75+
return [range, single].toChoiceParser().star().end();
76+
}
7677

77-
/// Parser that reads a possibly negated sequence of predicates.
78-
final _pattern = (char('^').optional(), _sequence)
79-
.toSequenceParser()
80-
.map2((negation, sequence) => negation == null
81-
? sequence
82-
: sequence is ConstantCharPredicate
83-
? ConstantCharPredicate(!sequence.constant)
84-
: NotCharacterPredicate(sequence));
78+
final _patternParser = _createParser(unicode: false);
79+
final _patternUnicodeParser = _createParser(unicode: true);

lib/src/parser/character/predicate/lookup.dart

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,10 @@ import '../predicate.dart';
77
import 'range.dart';
88

99
class LookupCharPredicate extends CharacterPredicate {
10-
LookupCharPredicate(List<RangeCharPredicate> ranges)
10+
LookupCharPredicate.fromRanges(Iterable<RangeCharPredicate> ranges)
1111
: start = ranges.first.start,
1212
stop = ranges.last.stop,
13-
bits = Uint32List(
14-
(ranges.last.stop - ranges.first.start + 1 + _offset) >> _shift) {
13+
bits = Uint32List(size(ranges)) {
1514
for (final range in ranges) {
1615
for (var index = range.start - start;
1716
index <= range.stop - start;
@@ -21,6 +20,8 @@ class LookupCharPredicate extends CharacterPredicate {
2120
}
2221
}
2322

23+
const LookupCharPredicate(this.start, this.stop, this.bits);
24+
2425
final int start;
2526
final int stop;
2627
final Uint32List bits;
@@ -43,6 +44,9 @@ class LookupCharPredicate extends CharacterPredicate {
4344

4445
@override
4546
String toString() => '${super.toString()}($start, $stop, $bits)';
47+
48+
static int size(Iterable<RangeCharPredicate> ranges) =>
49+
(ranges.last.stop - ranges.first.start + _offset + 1) >> _shift;
4650
}
4751

4852
const _listEquality = ListEquality<int>();

lib/src/parser/character/predicate/not.dart

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import '../predicate.dart';
22

3-
class NotCharacterPredicate extends CharacterPredicate {
4-
const NotCharacterPredicate(this.predicate);
3+
class NotCharPredicate extends CharacterPredicate {
4+
const NotCharPredicate(this.predicate);
55

66
final CharacterPredicate predicate;
77

@@ -10,7 +10,7 @@ class NotCharacterPredicate extends CharacterPredicate {
1010

1111
@override
1212
bool isEqualTo(CharacterPredicate other) =>
13-
other is NotCharacterPredicate && predicate.isEqualTo(other.predicate);
13+
other is NotCharPredicate && predicate.isEqualTo(other.predicate);
1414

1515
@override
1616
String toString() => '${super.toString()}($predicate)';
Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,52 @@
1+
import 'dart:typed_data';
2+
13
import 'package:collection/collection.dart' show ListEquality;
24

5+
import '../../../shared/pragma.dart';
36
import '../predicate.dart';
7+
import 'range.dart';
48

59
class RangesCharPredicate extends CharacterPredicate {
6-
const RangesCharPredicate(this.length, this.starts, this.stops);
10+
RangesCharPredicate.fromRanges(Iterable<RangeCharPredicate> ranges)
11+
: ranges = Uint32List(size(ranges)) {
12+
var i = 0;
13+
for (final range in ranges) {
14+
this.ranges[i++] = range.start;
15+
this.ranges[i++] = range.stop;
16+
}
17+
}
718

8-
final int length;
9-
final List<int> starts;
10-
final List<int> stops;
19+
const RangesCharPredicate(this.ranges);
20+
21+
final Uint32List ranges;
1122

1223
@override
24+
@noBoundsChecks
1325
bool test(int charCode) {
1426
var min = 0;
15-
var max = length;
16-
while (min < max) {
17-
final mid = min + ((max - min) >> 1);
18-
final comp = starts[mid] - charCode;
19-
if (comp == 0) {
27+
var max = ranges.length - 2;
28+
while (min <= max) {
29+
final mid = (min + ((max - min) >> 1)) & ~1;
30+
if (ranges[mid] <= charCode && charCode <= ranges[mid + 1]) {
2031
return true;
21-
} else if (comp < 0) {
22-
min = mid + 1;
32+
} else if (charCode < ranges[mid]) {
33+
max = mid - 2;
2334
} else {
24-
max = mid;
35+
min = mid + 2;
2536
}
2637
}
27-
return 0 < min && charCode <= stops[min - 1];
38+
return false;
2839
}
2940

3041
@override
3142
bool isEqualTo(CharacterPredicate other) =>
3243
other is RangesCharPredicate &&
33-
length == other.length &&
34-
_listEquality.equals(starts, other.starts) &&
35-
_listEquality.equals(stops, other.stops);
44+
_listEquality.equals(ranges, other.ranges);
3645

3746
@override
38-
String toString() => '${super.toString()}($length, $starts, $stops)';
47+
String toString() => '${super.toString()}($ranges)';
48+
49+
static int size(Iterable<RangeCharPredicate> ranges) => 2 * ranges.length;
3950
}
4051

4152
const _listEquality = ListEquality<int>();

lib/src/parser/character/range.dart

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,12 @@ import 'utils/code.dart';
88
/// Returns a parser that accepts any character in the range
99
/// between [start] and [stop].
1010
@useResult
11-
Parser<String> range(String start, String stop, {String? message}) =>
11+
Parser<String> range(String start, String stop,
12+
{String? message, bool unicode = false}) =>
1213
CharacterParser(
13-
RangeCharPredicate(toCharCode(start), toCharCode(stop)),
14+
RangeCharPredicate(toCharCode(start, unicode: unicode),
15+
toCharCode(stop, unicode: unicode)),
1416
message ??
15-
'[${toReadableString(start)}-${toReadableString(stop)}] expected');
17+
'[${toReadableString(start, unicode: unicode)}-'
18+
'${toReadableString(stop, unicode: unicode)}] expected',
19+
unicode: unicode);

0 commit comments

Comments
 (0)