Skip to content

Commit 3e16921

Browse files
committed
BasicStringLexer support for parsing hexadecimal numbers
1 parent f9c7552 commit 3e16921

File tree

3 files changed

+169
-28
lines changed

3 files changed

+169
-28
lines changed

fly/types/string/detail/string_formatter_types.hpp

+7-3
Original file line numberDiff line numberDiff line change
@@ -920,7 +920,7 @@ BasicFormatString<StringType, ParameterTypes...>::parse_position(FormatSpecifier
920920
{
921921
if (auto position = m_lexer.consume_number(); position)
922922
{
923-
specifier.m_position = position.value();
923+
specifier.m_position = static_cast<std::size_t>(position.value());
924924
m_expect_all_positions_specified = true;
925925
}
926926
else
@@ -1001,7 +1001,9 @@ BasicFormatString<StringType, ParameterTypes...>::parse_width(FormatSpecifier &s
10011001
{
10021002
if (auto width = m_lexer.consume_number(); width)
10031003
{
1004-
specifier.set_width(FormatSpecifier::SizeOrPosition::Type::Size, *width);
1004+
specifier.set_width(
1005+
FormatSpecifier::SizeOrPosition::Type::Size,
1006+
static_cast<std::size_t>(*width));
10051007
}
10061008
else if (m_lexer.consume_if(s_left_brace))
10071009
{
@@ -1023,7 +1025,9 @@ BasicFormatString<StringType, ParameterTypes...>::parse_precision(FormatSpecifie
10231025
{
10241026
if (auto precision = m_lexer.consume_number(); precision)
10251027
{
1026-
specifier.set_precision(FormatSpecifier::SizeOrPosition::Type::Size, *precision);
1028+
specifier.set_precision(
1029+
FormatSpecifier::SizeOrPosition::Type::Size,
1030+
static_cast<std::size_t>(*precision));
10271031
}
10281032
else if (m_lexer.consume_if(s_left_brace))
10291033
{

fly/types/string/string_lexer.hpp

+61-18
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,22 @@ class BasicStringLexer
8383
constexpr bool consume_if(char_type ch);
8484

8585
/**
86-
* Beginning with the current position, retrieve characters from the current position in the
87-
* C-string literal and advance the current position to the next character until a character is
88-
* either not available or not a decimal digit. Convert the retrieved characters to an unsigned
89-
* number.
86+
* Beginning with the current position, retrieve characters from the C-string literal and
87+
* advance the current position to the next character until a character is either not available
88+
* or not a decimal digit. Convert the retrieved characters to an unsigned number.
9089
*
91-
* @return If consumed, the parsed unsigned number. Otherwise, an uninitialized value.
90+
* @return If consumed, the parsed decimal number. Otherwise, an uninitialized value.
9291
*/
93-
constexpr std::optional<std::size_t> consume_number();
92+
constexpr std::optional<std::uintmax_t> consume_number();
93+
94+
/**
95+
* Beginning with the current position, retrieve characters from the C-string literal and
96+
* advance the current position to the next character until a character is either not available
97+
* or not a hexadecimal digit. Convert the retrieved characters to an unsigned number.
98+
*
99+
* @return If consumed, the parsed hexadecimal number. Otherwise, an uninitialized value.
100+
*/
101+
constexpr std::optional<std::uintmax_t> consume_hex_number();
94102

95103
private:
96104
/**
@@ -107,6 +115,12 @@ class BasicStringLexer
107115
template <typename Condition>
108116
constexpr std::optional<char_type> consume_if(Condition condition);
109117

118+
static constexpr const auto s_zero = FLY_CHR(char_type, '0');
119+
static constexpr const auto s_upper_a = FLY_CHR(char_type, 'A');
120+
static constexpr const auto s_upper_f = FLY_CHR(char_type, 'F');
121+
static constexpr const auto s_lower_a = FLY_CHR(char_type, 'a');
122+
static constexpr const auto s_lower_f = FLY_CHR(char_type, 'f');
123+
110124
const std::size_t m_size;
111125
const view_type m_view;
112126

@@ -183,34 +197,63 @@ constexpr bool BasicStringLexer<StringType>::consume_if(char_type ch)
183197

184198
//==================================================================================================
185199
template <typename StringType>
186-
template <typename Condition>
187-
constexpr auto BasicStringLexer<StringType>::consume_if(Condition condition)
188-
-> std::optional<char_type>
200+
constexpr std::optional<std::uintmax_t> BasicStringLexer<StringType>::consume_number()
189201
{
190-
if (auto next = peek(); next && condition(next.value()))
202+
bool parsed_number = false;
203+
std::uintmax_t number = 0;
204+
205+
while (auto ch = consume_if(classifier::is_digit))
191206
{
192-
return consume();
207+
parsed_number = true;
208+
209+
number *= 10;
210+
number += static_cast<std::uintmax_t>(ch.value() - s_zero);
193211
}
194212

195-
return std::nullopt;
213+
return parsed_number ? std::optional<std::uintmax_t>(number) : std::nullopt;
196214
}
197215

198216
//==================================================================================================
199217
template <typename StringType>
200-
constexpr std::optional<std::size_t> BasicStringLexer<StringType>::consume_number()
218+
constexpr std::optional<std::uintmax_t> BasicStringLexer<StringType>::consume_hex_number()
201219
{
202220
bool parsed_number = false;
203-
std::size_t number = 0;
221+
std::uintmax_t number = 0;
204222

205-
while (auto ch = consume_if(classifier::is_digit))
223+
while (auto ch = consume_if(classifier::is_x_digit))
206224
{
207225
parsed_number = true;
226+
number *= 16;
227+
228+
if ((ch.value() >= s_upper_a) && (ch.value() <= s_upper_f))
229+
{
230+
number += static_cast<std::uintmax_t>(ch.value()) - s_upper_a + 0xA;
231+
}
232+
else if ((ch.value() >= s_lower_a) && (ch.value() <= s_lower_f))
233+
{
234+
number += static_cast<std::uintmax_t>(ch.value()) - s_lower_a + 0xa;
235+
}
236+
else
237+
{
238+
number += static_cast<std::uintmax_t>(ch.value()) - s_zero;
239+
}
240+
}
208241

209-
number *= 10;
210-
number += static_cast<std::size_t>(ch.value() - FLY_CHR(char_type, '0'));
242+
return parsed_number ? std::optional<std::uintmax_t>(number) : std::nullopt;
243+
}
244+
245+
//==================================================================================================
246+
template <typename StringType>
247+
template <typename Condition>
248+
constexpr auto BasicStringLexer<StringType>::consume_if(Condition condition)
249+
-> std::optional<char_type>
250+
{
251+
if (auto next = peek(); next && condition(next.value()))
252+
{
253+
return consume();
211254
}
212255

213-
return parsed_number ? std::optional<std::size_t>(number) : std::nullopt;
256+
return std::nullopt;
214257
}
215258

216259
} // namespace fly

test/types/string/string_lexer.cpp

+101-7
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ CATCH_TEMPLATE_TEST_CASE(
2828
CATCH_CHECK_FALSE(lexer.consume());
2929
CATCH_CHECK_FALSE(lexer.consume_if(FLY_CHR(char_type, '\0')));
3030
CATCH_CHECK_FALSE(lexer.consume_number());
31+
CATCH_CHECK_FALSE(lexer.consume_hex_number());
3132
CATCH_CHECK(lexer.position() == 0);
3233
}
3334

@@ -196,7 +197,7 @@ CATCH_TEMPLATE_TEST_CASE(
196197
CATCH_CHECK(lexer.position() == 2);
197198
}
198199

199-
CATCH_SECTION("Cannot consume number if no number exists")
200+
CATCH_SECTION("Cannot consume decimal number if no number exists")
200201
{
201202
Lexer lexer(FLY_ARR(char_type, "ab"));
202203
CATCH_CHECK(lexer.position() == 0);
@@ -205,7 +206,7 @@ CATCH_TEMPLATE_TEST_CASE(
205206
CATCH_CHECK(lexer.position() == 0);
206207
}
207208

208-
CATCH_SECTION("Cannot consume number past end of lexer")
209+
CATCH_SECTION("Cannot consume decimal number past end of lexer")
209210
{
210211
Lexer lexer(FLY_ARR(char_type, "1"));
211212
CATCH_CHECK(lexer.position() == 0);
@@ -219,7 +220,7 @@ CATCH_TEMPLATE_TEST_CASE(
219220
CATCH_CHECK(lexer.position() == 1);
220221
}
221222

222-
CATCH_SECTION("Cannot consume number if number exists past internal pointer")
223+
CATCH_SECTION("Cannot consume decimal number if number exists past internal pointer")
223224
{
224225
Lexer lexer(FLY_ARR(char_type, "ab1"));
225226
CATCH_CHECK(lexer.position() == 0);
@@ -228,7 +229,7 @@ CATCH_TEMPLATE_TEST_CASE(
228229
CATCH_CHECK(lexer.position() == 0);
229230
}
230231

231-
CATCH_SECTION("Number consumption stops at first non-digit character")
232+
CATCH_SECTION("Decimal number consumption stops at first non-digit character")
232233
{
233234
Lexer lexer(FLY_ARR(char_type, "1ab"));
234235
CATCH_CHECK(lexer.position() == 0);
@@ -244,7 +245,7 @@ CATCH_TEMPLATE_TEST_CASE(
244245
CATCH_CHECK(lexer.position() == 1);
245246
}
246247

247-
CATCH_SECTION("Number consumption stops at end of lexer")
248+
CATCH_SECTION("Decimal number consumption stops at end of lexer")
248249
{
249250
Lexer lexer(FLY_ARR(char_type, "1"));
250251
CATCH_CHECK(lexer.position() == 0);
@@ -257,7 +258,7 @@ CATCH_TEMPLATE_TEST_CASE(
257258
CATCH_CHECK_FALSE(lexer.peek());
258259
}
259260

260-
CATCH_SECTION("Number consumption consumes all digits in a row")
261+
CATCH_SECTION("Decimal number consumption consumes all digits in a row")
261262
{
262263
Lexer lexer(FLY_ARR(char_type, "123"));
263264
CATCH_CHECK(lexer.position() == 0);
@@ -270,7 +271,7 @@ CATCH_TEMPLATE_TEST_CASE(
270271
CATCH_CHECK_FALSE(lexer.peek());
271272
}
272273

273-
CATCH_SECTION("Number consumption can succeed multiple times per lexer if separated")
274+
CATCH_SECTION("Decimal number consumption can succeed multiple times per lexer if separated")
274275
{
275276
Lexer lexer(FLY_ARR(char_type, "123a456"));
276277
CATCH_CHECK(lexer.position() == 0);
@@ -288,4 +289,97 @@ CATCH_TEMPLATE_TEST_CASE(
288289
CATCH_CHECK(n2.value() == 456);
289290
CATCH_CHECK(lexer.position() == 7);
290291
}
292+
293+
CATCH_SECTION("Cannot consume hex number if no number exists")
294+
{
295+
Lexer lexer(FLY_ARR(char_type, "xy"));
296+
CATCH_CHECK(lexer.position() == 0);
297+
298+
CATCH_CHECK_FALSE(lexer.consume_hex_number());
299+
CATCH_CHECK(lexer.position() == 0);
300+
}
301+
302+
CATCH_SECTION("Cannot consume hex number past end of lexer")
303+
{
304+
Lexer lexer(FLY_ARR(char_type, "1"));
305+
CATCH_CHECK(lexer.position() == 0);
306+
307+
auto n1 = lexer.consume_hex_number();
308+
CATCH_REQUIRE(n1.has_value());
309+
CATCH_CHECK(n1.value() == 1);
310+
CATCH_CHECK(lexer.position() == 1);
311+
312+
CATCH_CHECK_FALSE(lexer.consume_hex_number());
313+
CATCH_CHECK(lexer.position() == 1);
314+
}
315+
316+
CATCH_SECTION("Cannot consume hex number if number exists past internal pointer")
317+
{
318+
Lexer lexer(FLY_ARR(char_type, "xy1"));
319+
CATCH_CHECK(lexer.position() == 0);
320+
321+
CATCH_CHECK_FALSE(lexer.consume_hex_number());
322+
CATCH_CHECK(lexer.position() == 0);
323+
}
324+
325+
CATCH_SECTION("Hex number consumption stops at first non-digit character")
326+
{
327+
Lexer lexer(FLY_ARR(char_type, "1ax"));
328+
CATCH_CHECK(lexer.position() == 0);
329+
330+
auto n1 = lexer.consume_hex_number();
331+
CATCH_REQUIRE(n1.has_value());
332+
CATCH_CHECK(n1.value() == 0x1a);
333+
CATCH_CHECK(lexer.position() == 2);
334+
335+
auto p1 = lexer.peek();
336+
CATCH_REQUIRE(p1.has_value());
337+
CATCH_CHECK(p1.value() == FLY_CHR(char_type, 'x'));
338+
CATCH_CHECK(lexer.position() == 2);
339+
}
340+
341+
CATCH_SECTION("Hex number consumption stops at end of lexer")
342+
{
343+
Lexer lexer(FLY_ARR(char_type, "1a"));
344+
CATCH_CHECK(lexer.position() == 0);
345+
346+
auto n1 = lexer.consume_hex_number();
347+
CATCH_REQUIRE(n1.has_value());
348+
CATCH_CHECK(n1.value() == 0x1a);
349+
CATCH_CHECK(lexer.position() == 2);
350+
351+
CATCH_CHECK_FALSE(lexer.peek());
352+
}
353+
354+
CATCH_SECTION("Hex number consumption consumes all digits in a row")
355+
{
356+
Lexer lexer(FLY_ARR(char_type, "123a"));
357+
CATCH_CHECK(lexer.position() == 0);
358+
359+
auto n1 = lexer.consume_hex_number();
360+
CATCH_REQUIRE(n1.has_value());
361+
CATCH_CHECK(n1.value() == 0x123a);
362+
CATCH_CHECK(lexer.position() == 4);
363+
364+
CATCH_CHECK_FALSE(lexer.peek());
365+
}
366+
367+
CATCH_SECTION("Hex number consumption can succeed multiple times per lexer if separated")
368+
{
369+
Lexer lexer(FLY_ARR(char_type, "123ax456B"));
370+
CATCH_CHECK(lexer.position() == 0);
371+
372+
auto n1 = lexer.consume_hex_number();
373+
CATCH_REQUIRE(n1.has_value());
374+
CATCH_CHECK(n1.value() == 0x123a);
375+
CATCH_CHECK(lexer.position() == 4);
376+
377+
CATCH_CHECK(lexer.consume_if(FLY_CHR(char_type, 'x')));
378+
CATCH_CHECK(lexer.position() == 5);
379+
380+
auto n2 = lexer.consume_hex_number();
381+
CATCH_REQUIRE(n2.has_value());
382+
CATCH_CHECK(n2.value() == 0x456b);
383+
CATCH_CHECK(lexer.position() == 9);
384+
}
291385
}

0 commit comments

Comments
 (0)