Skip to content

Commit 8be0116

Browse files
authored
Improve whitespace handling in Time::Format (#15890)
* Adds support for `%n` and `%t`. They are exactly equivalent to `\n` and `\t`, as in POSIX C `strptime` (when `_XOPEN_SOURCE` is defined) and Ruby `Time.strptime`. * All ASCII whitespace characters in the pattern now consume zero or more ASCII whitespace characters when parsing. This includes `%n`, `%t`, and the embedded spaces in `%c` and `%r` too. * The YAML timestamp specification only allows the space character and the tab character (`[ \t]+`), any other whitespace is now disallowed.
1 parent 4581c8e commit 8be0116

File tree

6 files changed

+57
-9
lines changed

6 files changed

+57
-9
lines changed

spec/std/time/format_spec.cr

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,8 @@ describe Time::Format do
137137
# TODO %U
138138
# TODO %W
139139
# TODO %s
140-
# TODO %n
141-
# TODO %t
140+
assert_prints t.to_s("%n"), "\n"
141+
assert_prints t.to_s("%t"), "\t"
142142
# TODO %%
143143

144144
assert_prints t.to_s("%%"), "%"
@@ -565,8 +565,26 @@ describe Time::Format do
565565
# TODO %U
566566
# TODO %W
567567
# TODO %s
568-
# TODO %n
569-
# TODO %t
568+
569+
it "parses whitespace" do
570+
[" ", "\t", "\n", "\v", "\f", "\r", "%n", "%t"].each do |space|
571+
parse_time("20250530", "%Y#{space}%m#{space}%d").should eq(Time.utc(2025, 5, 30))
572+
parse_time("2025 05 30", "%Y#{space}%m#{space}%d").should eq(Time.utc(2025, 5, 30))
573+
parse_time("2025 \t\n\v\f\r05 \t\n\v\f\r30", "%Y#{space}%m#{space}%d").should eq(Time.utc(2025, 5, 30))
574+
end
575+
576+
parse_time("20250530", "%Y \t\n\v\f\r%n%t%m \t\n\v\f\r%n%t%d").should eq(Time.utc(2025, 5, 30))
577+
parse_time("2025 05 30", "%Y \t\n\v\f\r%n%t%m \t\n\v\f\r%n%t%d").should eq(Time.utc(2025, 5, 30))
578+
parse_time("2025 \t\n\v\f\r05 \t\n\v\f\r30", "%Y \t\n\v\f\r%n%t%m \t\n\v\f\r%n%t%d").should eq(Time.utc(2025, 5, 30))
579+
580+
parse_time("Fri Oct 31 23:00:24 2014", "%c").should eq(Time.utc(2014, 10, 31, 23, 0, 24))
581+
parse_time("Fri\tOct\n31\v23:00:24\f\r 2014", "%c").should eq(Time.utc(2014, 10, 31, 23, 0, 24))
582+
583+
parse_time("11:14:01PM", "%r").should eq(Time.utc(1, 1, 1, 23, 14, 1))
584+
parse_time("11:14:01 PM", "%r").should eq(Time.utc(1, 1, 1, 23, 14, 1))
585+
parse_time("11:14:01 \t\n\v\f\rPM", "%r").should eq(Time.utc(1, 1, 1, 23, 14, 1))
586+
end
587+
570588
# TODO %%
571589
# TODO %v
572590

spec/std/yaml/serialization_spec.cr

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,17 @@ describe "YAML serialization" do
384384

385385
it "deserializes time" do
386386
Time.from_yaml("2010-11-12").should eq(Time.utc(2010, 11, 12))
387+
388+
t = Time.local(2001, 12, 14, 21, 59, 43, nanosecond: 100000000, location: Time::Location.fixed(-18000))
389+
Time.from_yaml("2001-12-14t21:59:43.10-05:00").should eq(t)
390+
Time.from_yaml("2001-12-14 21:59:43.10 -5").should eq(t)
391+
Time.from_yaml("2001-12-14 21:59:43.10\t\t -5").should eq(t)
392+
Time.from_yaml(%(!!timestamp "2001-12-14 21:59:43.10\t\\t -5")).should eq(t)
393+
394+
expect_raises(YAML::ParseException) { Time.from_yaml(%(!!timestamp "2001-12-14\\f21:59:43.10 -5")) }
395+
expect_raises(YAML::ParseException) { Time.from_yaml(%(!!timestamp "2001-12-14\\n21:59:43.10 -5")) }
396+
expect_raises(YAML::ParseException) { Time.from_yaml(%(!!timestamp "2001-12-14\\r21:59:43.10 -5")) }
397+
expect_raises(YAML::ParseException) { Time.from_yaml(%(!!timestamp "2001-12-14\\v21:59:43.10 -5")) }
387398
end
388399

389400
it "deserializes bytes" do

src/time/format.cr

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ require "./format/parser"
77
# being with a percent (`%`) character. Any text not listed as a directive
88
# will be passed/parsed through the output/input string.
99
#
10+
# ASCII whitespaces in the pattern string are written verbatim when formatting,
11+
# and consume any number of ASCII whitespace characters on parsing.
12+
#
1013
# The directives are:
1114
#
1215
# * **%a**: short day name (Sun, Mon, Tue, ...)
@@ -40,13 +43,15 @@ require "./format/parser"
4043
# * **%3N**: milliseconds, zero padded (000, 001, ..., 999) (same as **%L**)
4144
# * **%6N**: microseconds, zero padded (000000, 000001, ..., 999999)
4245
# * **%9N**: nanoseconds, zero padded (000000000, 000000001, ..., 999999999)
46+
# * **%n**: same as the newline character (`\n`)
4347
# * **%N**: second fraction, zero padded. (Same as `%9N` but may consume more than 9 digits while parsing)
4448
# * **%p**: am-pm (lowercase)
4549
# * **%P**: AM-PM (uppercase)
4650
# * **%r**: 12-hour time (03:04:05 AM)
4751
# * **%R**: 24-hour time (13:04)
4852
# * **%s**: seconds since unix epoch (see `Time#to_unix`)
4953
# * **%S**: seconds, zero padded (00, 01, ..., 59)
54+
# * **%t**: same as the tab character (`\t`)
5055
# * **%T**: 24-hour time (13:04:05)
5156
# * **%u**: day of week (Monday is 1, 1..7)
5257
# * **%V**: ISO calendar week number of the week-based year (01..53)

src/time/format/custom/yaml_date.cr

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@ struct Time::Format
44
# can consist of just the date part, and following it any number of spaces,
55
# or 't', or 'T' can follow, with many optional components. So, we implement
66
# this in a more efficient way to avoid parsing the same string with many
7-
# possible formats (there's also no way to specify any number of spaces
8-
# with Time::Format, or an "or" like in a Regex).
7+
# possible formats (there's also no way to specify an "or" like in a Regex).
98
#
109
# As an additional note, Ruby's Psych YAML parser also implements a
1110
# custom time parser, probably for this same reason.
@@ -63,8 +62,8 @@ struct Time::Format
6362
when 'T', 't'
6463
next_char
6564
return yaml_time?
66-
when .ascii_whitespace?
67-
skip_spaces
65+
when ' ', '\t'
66+
skip_whitespaces_and_tabs
6867

6968
if @reader.has_next?
7069
return yaml_time?
@@ -99,7 +98,7 @@ struct Time::Format
9998

10099
second_fraction?
101100

102-
skip_spaces
101+
skip_whitespaces_and_tabs
103102

104103
if @reader.has_next?
105104
begin
@@ -113,6 +112,12 @@ struct Time::Format
113112

114113
true
115114
end
115+
116+
private def skip_whitespaces_and_tabs
117+
while current_char.in?(' ', '\t')
118+
next_char
119+
end
120+
end
116121
end
117122

118123
struct Formatter

src/time/format/parser.cr

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,11 @@ struct Time::Format
502502
end
503503

504504
def char(char, *alternatives)
505+
if char.ascii_whitespace?
506+
skip_spaces
507+
return
508+
end
509+
505510
unless @reader.has_next?
506511
if alternatives.empty?
507512
raise "Expected #{char.inspect} but the end of the input was reached"

src/time/format/pattern.cr

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ struct Time::Format
5454
month_zero_padded
5555
when 'M'
5656
minute
57+
when 'n'
58+
char '\n'
5759
when 'N'
5860
second_fraction
5961
when 'p'
@@ -68,6 +70,8 @@ struct Time::Format
6870
unix_seconds
6971
when 'S'
7072
second
73+
when 't'
74+
char '\t'
7175
when 'T', 'X'
7276
twenty_four_hour_time_with_seconds
7377
when 'u'

0 commit comments

Comments
 (0)