Skip to content

Commit a68f31d

Browse files
committed
Add safe variant of std::isspace
1 parent 78ff5fe commit a68f31d

File tree

3 files changed

+75
-2
lines changed

3 files changed

+75
-2
lines changed

fly/types/string/detail/string_classifier.hpp

+28
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,20 @@ class BasicStringClassifier
143143
*/
144144
static constexpr bool is_x_digit(char_type ch);
145145

146+
/**
147+
* Checks if the given character is a whitespace character as classified by the default C
148+
* locale.
149+
*
150+
* The STL's std::isspace and std::iswspace require that the provided character fits into an
151+
* unsigned char and unsigned wchar_t, respectively. Other values result in undefined behavior.
152+
* This method has no such restriction.
153+
*
154+
* @param ch The character to classify.
155+
*
156+
* @return True if the character is a whitespace character.
157+
*/
158+
static constexpr bool is_space(char_type ch);
159+
146160
private:
147161
/**
148162
* Remove the 0x20 bit from the given character, effectively converting the a-z range of
@@ -161,6 +175,12 @@ class BasicStringClassifier
161175
static constexpr const auto s_upper_f = FLY_CHR(char_type, 'F');
162176
static constexpr const auto s_lower_a = FLY_CHR(char_type, 'a');
163177
static constexpr const auto s_lower_z = FLY_CHR(char_type, 'z');
178+
static constexpr const auto s_space = FLY_CHR(char_type, ' ');
179+
static constexpr const auto s_form_feed = FLY_CHR(char_type, '\f');
180+
static constexpr const auto s_line_feed = FLY_CHR(char_type, '\n');
181+
static constexpr const auto s_carriage_return = FLY_CHR(char_type, '\r');
182+
static constexpr const auto s_horizontal_tab = FLY_CHR(char_type, '\t');
183+
static constexpr const auto s_vertical_tab = FLY_CHR(char_type, '\v');
164184

165185
static constexpr const auto s_case_bit = static_cast<int_type>(0x20);
166186
static constexpr const auto s_case_mask = static_cast<int_type>(~s_case_bit);
@@ -251,6 +271,14 @@ constexpr inline bool BasicStringClassifier<StringType>::is_x_digit(char_type ch
251271
return is_digit(ch) || ((alpha >= s_upper_a) && (alpha <= s_upper_f));
252272
}
253273

274+
//==================================================================================================
275+
template <typename StringType>
276+
constexpr inline bool BasicStringClassifier<StringType>::is_space(char_type ch)
277+
{
278+
return (ch == s_space) || (ch == s_form_feed) || (ch == s_line_feed) ||
279+
(ch == s_carriage_return) || (ch == s_horizontal_tab) || (ch == s_vertical_tab);
280+
}
281+
254282
//==================================================================================================
255283
template <typename StringType>
256284
constexpr inline auto BasicStringClassifier<StringType>::unify_az_characters(char_type ch)

fly/types/string/string.hpp

+23-2
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,20 @@ class BasicString
175175
*/
176176
static constexpr bool is_x_digit(char_type ch);
177177

178+
/**
179+
* Checks if the given character is a whitespace character as classified by the default C
180+
* locale.
181+
*
182+
* The STL's std::isspace and std::iswspace require that the provided character fits into an
183+
* unsigned char and unsigned wchar_t, respectively. Other values result in undefined behavior.
184+
* This method has no such restriction.
185+
*
186+
* @param ch The character to classify.
187+
*
188+
* @return True if the character is a whitespace character.
189+
*/
190+
static constexpr bool is_space(char_type ch);
191+
178192
/**
179193
* Split a string into a vector of strings.
180194
*
@@ -547,6 +561,13 @@ constexpr inline bool BasicString<StringType>::is_x_digit(char_type ch)
547561
return classifier::is_x_digit(ch);
548562
}
549563

564+
//==================================================================================================
565+
template <typename StringType>
566+
constexpr inline bool BasicString<StringType>::is_space(char_type ch)
567+
{
568+
return classifier::is_space(ch);
569+
}
570+
550571
//==================================================================================================
551572
template <typename StringType>
552573
std::vector<StringType> BasicString<StringType>::split(view_type input, char_type delimiter)
@@ -600,9 +621,9 @@ BasicString<StringType>::split(view_type input, char_type delimiter, size_type c
600621
template <typename StringType>
601622
void BasicString<StringType>::trim(StringType &target)
602623
{
603-
auto is_non_space = [](int ch)
624+
auto is_non_space = [](auto ch)
604625
{
605-
return !std::isspace(ch);
626+
return !is_space(ch);
606627
};
607628

608629
// Remove leading whitespace.

test/types/string/string_classifier.cpp

+24
Original file line numberDiff line numberDiff line change
@@ -197,4 +197,28 @@ CATCH_TEMPLATE_TEST_CASE(
197197
}
198198
}
199199
}
200+
201+
CATCH_SECTION("Check if a character is a whitespace character")
202+
{
203+
for (char_type ch = 0; (ch >= 0) && (ch < 0x80); ++ch)
204+
{
205+
CATCH_CHECK(
206+
BasicString::is_space(ch) ==
207+
static_cast<bool>(std::isspace(static_cast<unsigned char>(ch))));
208+
}
209+
210+
if constexpr (sizeof(char_type) > 1)
211+
{
212+
// Spot check some values that incorrectly result in std::isspace returning true when
213+
// cast to unsigned char (which is how the spec suggests to avoid undefined behavior).
214+
CATCH_CHECK(std::isspace(static_cast<unsigned char>(0xaa20)));
215+
CATCH_CHECK_FALSE(BasicString::is_space(0xaa20));
216+
217+
CATCH_CHECK(std::isspace(static_cast<unsigned char>(0xaa0a)));
218+
CATCH_CHECK_FALSE(BasicString::is_space(0xaa0a));
219+
220+
CATCH_CHECK(std::isspace(static_cast<unsigned char>(0xaa09)));
221+
CATCH_CHECK_FALSE(BasicString::is_space(0xaa09));
222+
}
223+
}
200224
}

0 commit comments

Comments
 (0)