Skip to content

Commit 723d2d3

Browse files
committed
Add safe variants of std::isalpha and std::isdigit
These STL methods result in undefined behavior if the given value cannot be represented by an unsigned char. Add methods to BasicString that do not have this restriction.
1 parent 2d84985 commit 723d2d3

File tree

8 files changed

+193
-0
lines changed

8 files changed

+193
-0
lines changed

build/win/libfly/libfly.vcxproj

+1
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@
219219
<ClInclude Include="..\..\..\fly\types\numeric\detail\literal_parser.hpp" />
220220
<ClInclude Include="..\..\..\fly\types\numeric\endian.hpp" />
221221
<ClInclude Include="..\..\..\fly\types\numeric\literals.hpp" />
222+
<ClInclude Include="..\..\..\fly\types\string\detail\string_classifier.hpp" />
222223
<ClInclude Include="..\..\..\fly\types\string\detail\string_converter.hpp" />
223224
<ClInclude Include="..\..\..\fly\types\string\detail\string_formatter.hpp" />
224225
<ClInclude Include="..\..\..\fly\types\string\detail\string_streamer.hpp" />

build/win/libfly/libfly.vcxproj.filters

+3
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,9 @@
268268
<ClInclude Include="..\..\..\fly\types\string\string_literal.hpp">
269269
<Filter>types\string</Filter>
270270
</ClInclude>
271+
<ClInclude Include="..\..\..\fly\types\string\detail\string_classifier.hpp">
272+
<Filter>types\string\detail</Filter>
273+
</ClInclude>
271274
<ClInclude Include="..\..\..\fly\types\string\detail\string_converter.hpp">
272275
<Filter>types\string\detail</Filter>
273276
</ClInclude>

build/win/libfly_unit_tests/libfly_unit_tests.vcxproj

+1
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@
242242
<ClCompile Include="..\..\..\test\types\json_traits.cpp" />
243243
<ClCompile Include="..\..\..\test\types\literals.cpp" />
244244
<ClCompile Include="..\..\..\test\types\string.cpp" />
245+
<ClCompile Include="..\..\..\test\types\string_classifier.cpp" />
245246
<ClCompile Include="..\..\..\test\types\string_converter.cpp" />
246247
<ClCompile Include="..\..\..\test\types\string_formatter.cpp" />
247248
<ClCompile Include="..\..\..\test\types\string_traits.cpp" />

build/win/libfly_unit_tests/libfly_unit_tests.vcxproj.filters

+3
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@
127127
<ClCompile Include="..\..\..\test\types\string.cpp">
128128
<Filter>types</Filter>
129129
</ClCompile>
130+
<ClCompile Include="..\..\..\test\types\string_classifier.cpp">
131+
<Filter>types</Filter>
132+
</ClCompile>
130133
<ClCompile Include="..\..\..\test\types\string_converter.cpp">
131134
<Filter>types</Filter>
132135
</ClCompile>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#pragma once
2+
3+
#include "fly/types/string/detail/string_traits.hpp"
4+
#include "fly/types/string/string_literal.hpp"
5+
6+
namespace fly::detail {
7+
8+
/**
9+
* Helper class to provide safe alernatives to the STL's <cctype> methods.
10+
*
11+
* @author Timothy Flynn ([email protected])
12+
* @version January 3, 2021
13+
*/
14+
template <typename StringType>
15+
class BasicStringClassifier
16+
{
17+
using traits = detail::BasicStringTraits<StringType>;
18+
using char_type = typename traits::char_type;
19+
using int_type = typename traits::int_type;
20+
21+
public:
22+
/**
23+
* Checks if the given character is an alphabetic character as classified by the default C
24+
* locale.
25+
*
26+
* The STL's std::isalpha and std::iswalpha require that the provided character fits into an
27+
* unsigned char and unsigned wchar_t, respectively. Other values result in undefined behavior.
28+
* This method has no such restriction.
29+
*
30+
* @param ch The character to classify.
31+
*
32+
* @return True if the character is an alphabetic character.
33+
*/
34+
static bool is_alpha(char_type ch);
35+
36+
/**
37+
* Checks if the given character is a decimal digit character.
38+
*
39+
* The STL's std::isdigit and std::iswdigit require that the provided character fits into an
40+
* unsigned char and unsigned wchar_t, respectively. Other values result in undefined behavior.
41+
* This method has no such restriction.
42+
*
43+
* @param ch The character to classify.
44+
*
45+
* @return True if the character is a decimal digit character.
46+
*/
47+
static bool is_digit(char_type ch);
48+
49+
private:
50+
static constexpr const char_type s_zero = FLY_CHR(char_type, '0');
51+
static constexpr const char_type s_upper_a = FLY_CHR(char_type, 'A');
52+
static constexpr const char_type s_upper_z = FLY_CHR(char_type, 'Z');
53+
54+
static constexpr const int_type s_case_mask = static_cast<int_type>(~0x20);
55+
};
56+
57+
//==================================================================================================
58+
template <typename StringType>
59+
inline bool BasicStringClassifier<StringType>::is_alpha(char_type ch)
60+
{
61+
// Remove the 0x20 bit, converting the a-z range of characters to the A-Z range.
62+
ch &= s_case_mask;
63+
64+
return (ch >= s_upper_a) && (ch <= s_upper_z);
65+
}
66+
67+
//==================================================================================================
68+
template <typename StringType>
69+
inline bool BasicStringClassifier<StringType>::is_digit(char_type ch)
70+
{
71+
return (ch ^ s_zero) < 10;
72+
}
73+
74+
} // namespace fly::detail

fly/types/string/detail/string_traits.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ struct BasicStringTraits
101101
using size_type = typename StringType::size_type;
102102
using char_type = typename StringType::value_type;
103103
using view_type = std::basic_string_view<char_type>;
104+
using int_type = typename std::char_traits<char_type>::int_type;
104105

105106
using codepoint_type = std::uint32_t;
106107

fly/types/string/string.hpp

+44
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include "fly/traits/traits.hpp"
4+
#include "fly/types/string/detail/string_classifier.hpp"
45
#include "fly/types/string/detail/string_converter.hpp"
56
#include "fly/types/string/detail/string_formatter.hpp"
67
#include "fly/types/string/detail/string_streamer.hpp"
@@ -51,6 +52,7 @@ using String32Traits = detail::BasicStringTraits<std::u32string>;
5152
template <typename StringType>
5253
class BasicString
5354
{
55+
using classifier = detail::BasicStringClassifier<StringType>;
5456
using formatter = detail::BasicStringFormatter<StringType>;
5557
using streamer = detail::BasicStringStreamer<StringType>;
5658
using traits = detail::BasicStringTraits<StringType>;
@@ -61,6 +63,7 @@ class BasicString
6163
using size_type = typename traits::size_type;
6264
using char_type = typename traits::char_type;
6365
using view_type = typename traits::view_type;
66+
using int_type = typename traits::int_type;
6467
using codepoint_type = typename traits::codepoint_type;
6568
using ostream_type = typename traits::ostream_type;
6669
using streamed_type = typename traits::streamed_type;
@@ -78,6 +81,33 @@ class BasicString
7881
template <typename T, enable_if<detail::is_like_supported_string<T>> = 0>
7982
static size_type size(const T &value);
8083

84+
/**
85+
* Checks if the given character is an alphabetic character as classified by the default C
86+
* locale.
87+
*
88+
* The STL's std::isalpha and std::iswalpha require that the provided character fits into an
89+
* unsigned char and unsigned wchar_t, respectively. Other values result in undefined behavior.
90+
* This method has no such restriction.
91+
*
92+
* @param ch The character to classify.
93+
*
94+
* @return True if the character is an alphabetic character.
95+
*/
96+
static bool is_alpha(char_type ch);
97+
98+
/**
99+
* Checks if the given character is a decimal digit character.
100+
*
101+
* The STL's std::isdigit and std::iswdigit require that the provided character fits into an
102+
* unsigned char and unsigned wchar_t, respectively. Other values result in undefined behavior.
103+
* This method has no such restriction.
104+
*
105+
* @param ch The character to classify.
106+
*
107+
* @return True if the character is a decimal digit character.
108+
*/
109+
static bool is_digit(char_type ch);
110+
81111
/**
82112
* Split a string into a vector of strings.
83113
*
@@ -421,6 +451,20 @@ auto BasicString<StringType>::size(const T &value) -> size_type
421451
}
422452
}
423453

454+
//==================================================================================================
455+
template <typename StringType>
456+
inline bool BasicString<StringType>::is_alpha(char_type ch)
457+
{
458+
return classifier::is_alpha(ch);
459+
}
460+
461+
//==================================================================================================
462+
template <typename StringType>
463+
inline bool BasicString<StringType>::is_digit(char_type ch)
464+
{
465+
return classifier::is_digit(ch);
466+
}
467+
424468
//==================================================================================================
425469
template <typename StringType>
426470
std::vector<StringType> BasicString<StringType>::split(const StringType &input, char_type delimiter)

test/types/string_classifier.cpp

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#include "fly/types/string/string.hpp"
2+
3+
#include "catch2/catch.hpp"
4+
5+
#include <cctype>
6+
#include <string>
7+
8+
CATCH_TEMPLATE_TEST_CASE(
9+
"BasicStringClassifier",
10+
"[string]",
11+
std::string,
12+
std::wstring,
13+
std::u8string,
14+
std::u16string,
15+
std::u32string)
16+
{
17+
using StringType = TestType;
18+
using BasicString = fly::BasicString<StringType>;
19+
using char_type = typename BasicString::char_type;
20+
21+
CATCH_SECTION("Check if a character is an alphabetic character")
22+
{
23+
for (int i = 0; i < 0x80; ++i)
24+
{
25+
const auto ch = static_cast<char_type>(i);
26+
CATCH_CHECK(BasicString::is_alpha(ch) == static_cast<bool>(std::isalpha(i)));
27+
}
28+
29+
if constexpr (sizeof(char_type) > 1)
30+
{
31+
// Spot check some values that incorrectly result in std::isalpha returning true when
32+
// cast to unsigned char (which is how the spec suggests to avoid undefined behavior).
33+
for (int i = 0xaa41; i <= 0xaa5a; ++i)
34+
{
35+
CATCH_CHECK(std::isalpha(static_cast<unsigned char>(i)));
36+
CATCH_CHECK_FALSE(BasicString::is_alpha(static_cast<char_type>(i)));
37+
}
38+
39+
for (int i = 0xaa61; i <= 0xaa7a; ++i)
40+
{
41+
CATCH_CHECK(std::isalpha(static_cast<unsigned char>(i)));
42+
CATCH_CHECK_FALSE(BasicString::is_alpha(static_cast<char_type>(i)));
43+
}
44+
}
45+
}
46+
47+
CATCH_SECTION("Check if a character is a decimal digit character")
48+
{
49+
for (int i = 0; i < 0x80; ++i)
50+
{
51+
const auto ch = static_cast<char_type>(i);
52+
CATCH_CHECK(BasicString::is_digit(ch) == static_cast<bool>(std::isdigit(i)));
53+
}
54+
55+
if constexpr (sizeof(char_type) > 1)
56+
{
57+
// Spot check some values that incorrectly result in std::isdigit returning true when
58+
// cast to unsigned char (which is how the spec suggests to avoid undefined behavior).
59+
for (int i = 0xaa30; i <= 0xaa39; ++i)
60+
{
61+
CATCH_CHECK(std::isdigit(static_cast<unsigned char>(i)));
62+
CATCH_CHECK_FALSE(BasicString::is_digit(static_cast<char_type>(i)));
63+
}
64+
}
65+
}
66+
}

0 commit comments

Comments
 (0)