6
6
7
7
#include < array>
8
8
#include < functional>
9
+ #include < iterator>
9
10
#include < optional>
10
11
#include < string>
11
12
@@ -75,6 +76,43 @@ class BasicStringUnicode
75
76
static std::optional<DesiredStringType>
76
77
convert_encoding (IteratorType &it, const IteratorType &end);
77
78
79
+ /* *
80
+ * Convert the Unicode encoding of a string to another encoding, inserting the result into the
81
+ * provided output iterator.
82
+ *
83
+ * @tparam DesiredStringType The type of string to convert to.
84
+ * @tparam IteratorType The type of the encoded Unicode string's iterator.
85
+ * @tparam OutputIteratorType The type of the output iterator to insert the result into.
86
+ *
87
+ * @param it Pointer to the beginning of the encoded Unicode string.
88
+ * @param end Pointer to the end of the encoded Unicode string.
89
+ * @param out The output iterator to insert the result into.
90
+ *
91
+ * @return Whether the conversion was successful.
92
+ */
93
+ template <
94
+ typename DesiredStringType,
95
+ typename OutputIteratorType,
96
+ typename SourceStringType = StringType>
97
+ static bool convert_encoding_into (SourceStringType &&value, OutputIteratorType out);
98
+
99
+ /* *
100
+ * Convert the Unicode encoding of a string to another encoding, inserting the result into the
101
+ * provided output iterator.
102
+ *
103
+ * @tparam DesiredStringType The type of string to convert to.
104
+ * @tparam OutputIteratorType The type of the output iterator to insert the result into.
105
+ * @tparam SourceStringType The type of string to convert.
106
+ *
107
+ * @param value The encoded Unicode string to convert.
108
+ * @param out The output iterator to insert the result into.
109
+ *
110
+ * @return Whether the conversion was successful.
111
+ */
112
+ template <typename DesiredStringType, typename IteratorType, typename OutputIteratorType>
113
+ static bool
114
+ convert_encoding_into (IteratorType &it, const IteratorType &end, OutputIteratorType out);
115
+
78
116
/* *
79
117
* Decode a single Unicode codepoint, starting at the character pointed to by the provided
80
118
* iterator. If successful, after invoking this method, that iterator will point at the first
@@ -155,6 +193,12 @@ class BasicStringUnicode
155
193
static std::optional<StringType> unescape_codepoint (IteratorType &it, const IteratorType &end);
156
194
157
195
private:
196
+ friend BasicStringUnicode<std::string>;
197
+ friend BasicStringUnicode<std::wstring>;
198
+ friend BasicStringUnicode<std::u8string>;
199
+ friend BasicStringUnicode<std::u16string>;
200
+ friend BasicStringUnicode<std::u32string>;
201
+
158
202
/* *
159
203
* Escape a single Unicode codepoint.
160
204
*
@@ -234,32 +278,44 @@ class BasicStringUnicode
234
278
/* *
235
279
* Encode a Unicode codepoint into a UTF-8 string.
236
280
*
237
- * @param codepoint The codepoint to encode .
281
+ * @tparam OutputIteratorType The type of the output iterator to insert the result into .
238
282
*
239
- * @return A string containing the encoded Unicode codepoint.
283
+ * @param codepoint The codepoint to encode.
284
+ * @param out The output iterator to insert the result into.
240
285
*/
241
- template <typename CharType = char_type, enable_if<size_of_type_is<CharType, 1 >> = 0 >
242
- static StringType codepoint_to_string (codepoint_type codepoint);
286
+ template <
287
+ typename OutputIteratorType,
288
+ typename CharType = char_type,
289
+ enable_if<size_of_type_is<CharType, 1 >> = 0 >
290
+ static void codepoint_to_string (codepoint_type codepoint, OutputIteratorType out);
243
291
244
292
/* *
245
293
* Encode a Unicode codepoint into a UTF-16 string.
246
294
*
247
- * @param codepoint The codepoint to encode .
295
+ * @tparam OutputIteratorType The type of the output iterator to insert the result into .
248
296
*
249
- * @return A string containing the encoded Unicode codepoint.
297
+ * @param codepoint The codepoint to encode.
298
+ * @param out The output iterator to insert the result into.
250
299
*/
251
- template <typename CharType = char_type, enable_if<size_of_type_is<CharType, 2 >> = 0 >
252
- static StringType codepoint_to_string (codepoint_type codepoint);
300
+ template <
301
+ typename OutputIteratorType,
302
+ typename CharType = char_type,
303
+ enable_if<size_of_type_is<CharType, 2 >> = 0 >
304
+ static void codepoint_to_string (codepoint_type codepoint, OutputIteratorType out);
253
305
254
306
/* *
255
307
* Encode a Unicode codepoint into a UTF-32 string.
256
308
*
257
- * @param codepoint The codepoint to encode .
309
+ * @tparam OutputIteratorType The type of the output iterator to insert the result into .
258
310
*
259
- * @return A string containing the encoded Unicode codepoint.
311
+ * @param codepoint The codepoint to encode.
312
+ * @param out The output iterator to insert the result into.
260
313
*/
261
- template <typename CharType = char_type, enable_if<size_of_type_is<CharType, 4 >> = 0 >
262
- static StringType codepoint_to_string (codepoint_type codepoint);
314
+ template <
315
+ typename OutputIteratorType,
316
+ typename CharType = char_type,
317
+ enable_if<size_of_type_is<CharType, 4 >> = 0 >
318
+ static void codepoint_to_string (codepoint_type codepoint, OutputIteratorType out);
263
319
264
320
/* *
265
321
* Create a Unicode codepoint from either one complete codepoint or two surrogate halves. The
@@ -382,26 +438,50 @@ template <typename DesiredStringType, typename IteratorType>
382
438
std::optional<DesiredStringType>
383
439
BasicStringUnicode<StringType>::convert_encoding(IteratorType &it, const IteratorType &end)
384
440
{
385
- using DesiredUnicodeType = BasicStringUnicode<DesiredStringType>;
386
-
387
441
DesiredStringType result;
388
442
result.reserve (static_cast <typename StringType::size_type>(std::distance (it, end)));
389
443
444
+ if (convert_encoding_into<DesiredStringType>(it, end, std::back_inserter (result)))
445
+ {
446
+ return result;
447
+ }
448
+
449
+ return std::nullopt;
450
+ }
451
+
452
+ // ==================================================================================================
453
+ template <typename StringType>
454
+ template <typename DesiredStringType, typename OutputIteratorType, typename SourceStringType>
455
+ bool BasicStringUnicode<StringType>::convert_encoding_into(
456
+ SourceStringType &&value,
457
+ OutputIteratorType out)
458
+ {
459
+ auto it = value.cbegin ();
460
+ return convert_encoding_into<DesiredStringType>(it, value.cend (), out);
461
+ }
462
+
463
+ // ==================================================================================================
464
+ template <typename StringType>
465
+ template <typename DesiredStringType, typename IteratorType, typename OutputIteratorType>
466
+ bool BasicStringUnicode<StringType>::convert_encoding_into(
467
+ IteratorType &it,
468
+ const IteratorType &end,
469
+ OutputIteratorType out)
470
+ {
471
+ using DesiredUnicodeType = BasicStringUnicode<DesiredStringType>;
472
+
390
473
while (it != end)
391
474
{
392
475
if (auto codepoint = decode_codepoint (it, end); codepoint)
393
476
{
394
- if (auto encoded = DesiredUnicodeType::encode_codepoint (*codepoint); encoded)
395
- {
396
- result += *std::move (encoded);
397
- continue ;
398
- }
477
+ DesiredUnicodeType::codepoint_to_string (*codepoint, out);
478
+ continue ;
399
479
}
400
480
401
- return std::nullopt ;
481
+ return false ;
402
482
}
403
483
404
- return result ;
484
+ return true ;
405
485
}
406
486
407
487
// ==================================================================================================
@@ -426,7 +506,10 @@ std::optional<StringType> BasicStringUnicode<StringType>::encode_codepoint(codep
426
506
{
427
507
if (validate_codepoint (codepoint))
428
508
{
429
- return codepoint_to_string (codepoint);
509
+ StringType result;
510
+ codepoint_to_string (codepoint, std::back_inserter (result));
511
+
512
+ return result;
430
513
}
431
514
432
515
return std::nullopt;
@@ -663,64 +746,62 @@ auto BasicStringUnicode<StringType>::codepoint_from_string(
663
746
664
747
// ==================================================================================================
665
748
template <typename StringType>
666
- template <typename CharType, enable_if<size_of_type_is<CharType, 1 >>>
667
- StringType BasicStringUnicode<StringType>::codepoint_to_string(codepoint_type codepoint)
749
+ template <typename OutputIteratorType, typename CharType, enable_if<size_of_type_is<CharType, 1 >>>
750
+ void BasicStringUnicode<StringType>::codepoint_to_string(
751
+ codepoint_type codepoint,
752
+ OutputIteratorType out)
668
753
{
669
- StringType result;
670
-
671
754
if (codepoint < 0x80 )
672
755
{
673
- result + = static_cast <char_type>(codepoint);
756
+ *out++ = static_cast <char_type>(codepoint);
674
757
}
675
758
else if (codepoint < 0x800 )
676
759
{
677
- result + = static_cast <char_type>(0xc0 | (codepoint >> 6 ));
678
- result + = static_cast <char_type>(0x80 | (codepoint & 0x3f ));
760
+ *out++ = static_cast <char_type>(0xc0 | (codepoint >> 6 ));
761
+ *out++ = static_cast <char_type>(0x80 | (codepoint & 0x3f ));
679
762
}
680
763
else if (codepoint < 0x10000 )
681
764
{
682
- result + = static_cast <char_type>(0xe0 | (codepoint >> 12 ));
683
- result + = static_cast <char_type>(0x80 | ((codepoint >> 6 ) & 0x3f ));
684
- result + = static_cast <char_type>(0x80 | (codepoint & 0x3f ));
765
+ *out++ = static_cast <char_type>(0xe0 | (codepoint >> 12 ));
766
+ *out++ = static_cast <char_type>(0x80 | ((codepoint >> 6 ) & 0x3f ));
767
+ *out++ = static_cast <char_type>(0x80 | (codepoint & 0x3f ));
685
768
}
686
769
else
687
770
{
688
- result + = static_cast <char_type>(0xf0 | (codepoint >> 18 ));
689
- result + = static_cast <char_type>(0x80 | ((codepoint >> 12 ) & 0x3f ));
690
- result + = static_cast <char_type>(0x80 | ((codepoint >> 6 ) & 0x3f ));
691
- result + = static_cast <char_type>(0x80 | (codepoint & 0x3f ));
771
+ *out++ = static_cast <char_type>(0xf0 | (codepoint >> 18 ));
772
+ *out++ = static_cast <char_type>(0x80 | ((codepoint >> 12 ) & 0x3f ));
773
+ *out++ = static_cast <char_type>(0x80 | ((codepoint >> 6 ) & 0x3f ));
774
+ *out++ = static_cast <char_type>(0x80 | (codepoint & 0x3f ));
692
775
}
693
-
694
- return result;
695
776
}
696
777
697
778
// ==================================================================================================
698
779
template <typename StringType>
699
- template <typename CharType, enable_if<size_of_type_is<CharType, 2 >>>
700
- StringType BasicStringUnicode<StringType>::codepoint_to_string(codepoint_type codepoint)
780
+ template <typename OutputIteratorType, typename CharType, enable_if<size_of_type_is<CharType, 2 >>>
781
+ void BasicStringUnicode<StringType>::codepoint_to_string(
782
+ codepoint_type codepoint,
783
+ OutputIteratorType out)
701
784
{
702
- StringType result;
703
-
704
785
if (codepoint < 0x10000 )
705
786
{
706
- result + = static_cast <char_type>(codepoint);
787
+ *out++ = static_cast <char_type>(codepoint);
707
788
}
708
789
else
709
790
{
710
791
codepoint -= 0x10000 ;
711
- result + = static_cast <char_type>(s_high_surrogate_min | (codepoint >> 10 ));
712
- result + = static_cast <char_type>(s_low_surrogate_min | (codepoint & 0x3ff ));
792
+ *out++ = static_cast <char_type>(s_high_surrogate_min | (codepoint >> 10 ));
793
+ *out++ = static_cast <char_type>(s_low_surrogate_min | (codepoint & 0x3ff ));
713
794
}
714
-
715
- return result;
716
795
}
717
796
718
797
// ==================================================================================================
719
798
template <typename StringType>
720
- template <typename CharType, enable_if<size_of_type_is<CharType, 4 >>>
721
- StringType BasicStringUnicode<StringType>::codepoint_to_string(codepoint_type codepoint)
799
+ template <typename OutputIteratorType, typename CharType, enable_if<size_of_type_is<CharType, 4 >>>
800
+ void BasicStringUnicode<StringType>::codepoint_to_string(
801
+ codepoint_type codepoint,
802
+ OutputIteratorType out)
722
803
{
723
- return StringType ( 1 , static_cast <char_type>(codepoint) );
804
+ *out++ = static_cast <char_type>(codepoint);
724
805
}
725
806
726
807
// ==================================================================================================
0 commit comments