Skip to content

Commit cc3801c

Browse files
committed
Add safe variants of std::toupper and std::tolower
1 parent 791ccb1 commit cc3801c

File tree

3 files changed

+142
-1
lines changed

3 files changed

+142
-1
lines changed

fly/types/string/detail/string_classifier.hpp

+54-1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,34 @@ class BasicStringClassifier
6161
*/
6262
static constexpr bool is_lower(char_type ch);
6363

64+
/**
65+
* Converts the given character to an upper-case alphabetic character as classified by the
66+
* default C locale.
67+
*
68+
* The STL's std:tosupper and std::towupper require that the provided character fits into an
69+
* unsigned char and unsigned wchar_t, respectively. Other values result in undefined behavior.
70+
* This method has no such restriction.
71+
*
72+
* @param ch The character to convert.
73+
*
74+
* @return The converted character.
75+
*/
76+
static constexpr char_type to_upper(char_type ch);
77+
78+
/**
79+
* Converts the given character to a lower-case alphabetic character as classified by the
80+
* default C locale.
81+
*
82+
* The STL's std:toslower and std::towlower require that the provided character fits into an
83+
* unsigned char and unsigned wchar_t, respectively. Other values result in undefined behavior.
84+
* This method has no such restriction.
85+
*
86+
* @param ch The character to convert.
87+
*
88+
* @return The converted character.
89+
*/
90+
static constexpr char_type to_lower(char_type ch);
91+
6492
/**
6593
* Checks if the given character is a decimal digit character.
6694
*
@@ -105,7 +133,8 @@ class BasicStringClassifier
105133
static constexpr const char_type s_lower_a = FLY_CHR(char_type, 'a');
106134
static constexpr const char_type s_lower_z = FLY_CHR(char_type, 'z');
107135

108-
static constexpr const int_type s_case_mask = static_cast<int_type>(~0x20);
136+
static constexpr const int_type s_case_bit = static_cast<int_type>(0x20);
137+
static constexpr const int_type s_case_mask = static_cast<int_type>(~s_case_bit);
109138
};
110139

111140
//==================================================================================================
@@ -129,6 +158,30 @@ constexpr inline bool BasicStringClassifier<StringType>::is_lower(char_type ch)
129158
return (ch >= s_lower_a) && (ch <= s_lower_z);
130159
}
131160

161+
//==================================================================================================
162+
template <typename StringType>
163+
constexpr inline auto BasicStringClassifier<StringType>::to_upper(char_type ch) -> char_type
164+
{
165+
if (is_lower(ch))
166+
{
167+
ch = static_cast<char_type>(static_cast<int_type>(ch) & s_case_mask);
168+
}
169+
170+
return ch;
171+
}
172+
173+
//==================================================================================================
174+
template <typename StringType>
175+
constexpr inline auto BasicStringClassifier<StringType>::to_lower(char_type ch) -> char_type
176+
{
177+
if (is_upper(ch))
178+
{
179+
ch = static_cast<char_type>(static_cast<int_type>(ch) | s_case_bit);
180+
}
181+
182+
return ch;
183+
}
184+
132185
//==================================================================================================
133186
template <typename StringType>
134187
constexpr inline bool BasicStringClassifier<StringType>::is_digit(char_type ch)

fly/types/string/string.hpp

+42
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,34 @@ class BasicString
126126
*/
127127
static constexpr bool is_lower(char_type ch);
128128

129+
/**
130+
* Converts the given character to an upper-case alphabetic character as classified by the
131+
* default C locale.
132+
*
133+
* The STL's std:tosupper and std::towupper require that the provided character fits into an
134+
* unsigned char and unsigned wchar_t, respectively. Other values result in undefined behavior.
135+
* This method has no such restriction.
136+
*
137+
* @param ch The character to convert.
138+
*
139+
* @return The converted character.
140+
*/
141+
static constexpr char_type to_upper(char_type ch);
142+
143+
/**
144+
* Converts the given character to a lower-case alphabetic character as classified by the
145+
* default C locale.
146+
*
147+
* The STL's std:toslower and std::towlower require that the provided character fits into an
148+
* unsigned char and unsigned wchar_t, respectively. Other values result in undefined behavior.
149+
* This method has no such restriction.
150+
*
151+
* @param ch The character to convert.
152+
*
153+
* @return The converted character.
154+
*/
155+
static constexpr char_type to_lower(char_type ch);
156+
129157
/**
130158
* Checks if the given character is a decimal digit character.
131159
*
@@ -550,6 +578,20 @@ constexpr inline bool BasicString<StringType>::is_digit(char_type ch)
550578
return classifier::is_digit(ch);
551579
}
552580

581+
//==================================================================================================
582+
template <typename StringType>
583+
constexpr inline auto BasicString<StringType>::to_upper(char_type ch) -> char_type
584+
{
585+
return classifier::to_upper(ch);
586+
}
587+
588+
//==================================================================================================
589+
template <typename StringType>
590+
constexpr inline auto BasicString<StringType>::to_lower(char_type ch) -> char_type
591+
{
592+
return classifier::to_lower(ch);
593+
}
594+
553595
//==================================================================================================
554596
template <typename StringType>
555597
constexpr inline bool BasicString<StringType>::is_x_digit(char_type ch)

test/types/string/string_classifier.cpp

+46
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,52 @@ CATCH_TEMPLATE_TEST_CASE(
8686
}
8787
}
8888

89+
CATCH_SECTION("Convert a character to an upper-case alphabetic character")
90+
{
91+
for (char_type ch = 0; (ch >= 0) && (ch < 0x80); ++ch)
92+
{
93+
CATCH_CHECK(
94+
BasicString::to_upper(ch) ==
95+
static_cast<char_type>(std::toupper(static_cast<unsigned char>(ch))));
96+
}
97+
98+
if constexpr (sizeof(char_type) > 1)
99+
{
100+
// Spot check some values that incorrectly result in std::toupper returning an
101+
// upper-case character when cast to unsigned char (which is how the spec suggests to
102+
// avoid undefined behavior).
103+
for (char_type ch = 0xaa41; ch <= 0xaa5a; ++ch)
104+
{
105+
CATCH_CHECK(
106+
ch != static_cast<char_type>(std::toupper(static_cast<unsigned char>(ch))));
107+
CATCH_CHECK(ch == BasicString::to_upper(ch));
108+
}
109+
}
110+
}
111+
112+
CATCH_SECTION("Convert a character to a lower-case alphabetic character")
113+
{
114+
for (char_type ch = 0; (ch >= 0) && (ch < 0x80); ++ch)
115+
{
116+
CATCH_CHECK(
117+
BasicString::to_lower(ch) ==
118+
static_cast<char_type>(std::tolower(static_cast<unsigned char>(ch))));
119+
}
120+
121+
if constexpr (sizeof(char_type) > 1)
122+
{
123+
// Spot check some values that incorrectly result in std::toupper returning a lower-case
124+
// character when cast to unsigned char (which is how the spec suggests to avoid
125+
// undefined behavior).
126+
for (char_type ch = 0xaa61; ch <= 0xaa7a; ++ch)
127+
{
128+
CATCH_CHECK(
129+
ch != static_cast<char_type>(std::tolower(static_cast<unsigned char>(ch))));
130+
CATCH_CHECK(ch == BasicString::to_lower(ch));
131+
}
132+
}
133+
}
134+
89135
CATCH_SECTION("Check if a character is a decimal digit character")
90136
{
91137
for (char_type ch = 0; (ch >= 0) && (ch < 0x80); ++ch)

0 commit comments

Comments
 (0)