Skip to content

Commit aa54bf0

Browse files
committed
Fix code from tensorflow/models/research/syntaxnet/util/utf8
See tensorflow/models#7090. Signed-off-by: Stefan Weil <[email protected]>
1 parent 0702194 commit aa54bf0

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

unittest/util/utf8/unicodetext.cc

+10-10
Original file line numberDiff line numberDiff line change
@@ -425,22 +425,22 @@ char32 UnicodeText::const_iterator::operator*() const {
425425
// for speed, we do the calculation ourselves.)
426426

427427
// Convert from UTF-8
428-
int byte1 = it_[0];
428+
unsigned char byte1 = it_[0];
429429
if (byte1 < 0x80)
430430
return byte1;
431431

432-
int byte2 = it_[1];
432+
unsigned char byte2 = it_[1];
433433
if (byte1 < 0xE0)
434434
return ((byte1 & 0x1F) << 6)
435435
| (byte2 & 0x3F);
436436

437-
int byte3 = it_[2];
437+
unsigned char byte3 = it_[2];
438438
if (byte1 < 0xF0)
439439
return ((byte1 & 0x0F) << 12)
440440
| ((byte2 & 0x3F) << 6)
441441
| (byte3 & 0x3F);
442442

443-
int byte4 = it_[3];
443+
unsigned char byte4 = it_[3];
444444
return ((byte1 & 0x07) << 18)
445445
| ((byte2 & 0x3F) << 12)
446446
| ((byte3 & 0x3F) << 6)
@@ -458,9 +458,9 @@ UnicodeText::const_iterator& UnicodeText::const_iterator::operator--() {
458458
}
459459

460460
int UnicodeText::const_iterator::get_utf8(char* utf8_output) const {
461-
utf8_output[0] = it_[0]; if (it_[0] < 0x80) return 1;
462-
utf8_output[1] = it_[1]; if (it_[0] < 0xE0) return 2;
463-
utf8_output[2] = it_[2]; if (it_[0] < 0xF0) return 3;
461+
utf8_output[0] = it_[0]; if ((it_[0] & 0xff) < 0x80) return 1;
462+
utf8_output[1] = it_[1]; if ((it_[0] & 0xff) < 0xE0) return 2;
463+
utf8_output[2] = it_[2]; if ((it_[0] & 0xff) < 0xF0) return 3;
464464
utf8_output[3] = it_[3];
465465
return 4;
466466
}
@@ -470,11 +470,11 @@ string UnicodeText::const_iterator::get_utf8_string() const {
470470
}
471471

472472
int UnicodeText::const_iterator::utf8_length() const {
473-
if (it_[0] < 0x80) {
473+
if ((it_[0] & 0xff) < 0x80) {
474474
return 1;
475-
} else if (it_[0] < 0xE0) {
475+
} else if ((it_[0] & 0xff) < 0xE0) {
476476
return 2;
477-
} else if (it_[0] < 0xF0) {
477+
} else if ((it_[0] & 0xff) < 0xF0) {
478478
return 3;
479479
} else {
480480
return 4;

0 commit comments

Comments
 (0)