Skip to content

Commit e67aa17

Browse files
Introduce new API grapheme_line_segmenter to replace scan API
Signed-off-by: Christian Parpart <[email protected]>
1 parent 8103f23 commit e67aa17

12 files changed

+1480
-876
lines changed

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ option(LIBUNICODE_BENCHMARK "libunicode: Enables building of benchmark for libun
4747
option(LIBUNICODE_TOOLS "libunicode: Builds CLI tools [default: ${MASTER_PROJECT}]" ${MASTER_PROJECT})
4848
option(LIBUNICODE_BUILD_STATIC "libunicode: provide static library instead of dynamic [default: ${LIBUNICODE_BUILD_STATIC_DEFAULT}]" ${LIBUNICODE_BUILD_STATIC_DEFAULT})
4949
option(LIBUNICODE_USE_INTRINSICS "libunicode: Use SIMD extenstion during text read [default: ON]" ON)
50-
option(LIBUNICODE_USE_STD_SIMD "libunicode: Use std::simd as SIMD extenstion during text read (takes precedence over own intrinsics) [default: ON]" ${LIBUNICODE_USE_INTRINSICS})
50+
option(LIBUNICODE_USE_STD_SIMD "libunicode: Use std::simd as SIMD extenstion during text read (takes precedence over own intrinsics) [default: ON]" ON)
5151
option(LIBUNICODE_TABLEGEN_FASTBUILD "libunicode: Use fast table generation (takes more memory in final tables) [default: OFF]" OFF)
5252

5353
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Enable testing of the benchmark library." FORCE)

src/libunicode/CMakeLists.txt

+4-8
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,6 @@ add_library(unicode ${LIBUNICODE_LIB_MODE}
102102
codepoint_properties.cpp
103103
emoji_segmenter.cpp
104104
grapheme_segmenter.cpp
105-
scan.cpp
106105
script_segmenter.cpp
107106
utf8.cpp
108107
width.cpp
@@ -114,22 +113,22 @@ add_library(unicode ${LIBUNICODE_LIB_MODE}
114113
)
115114

116115
if(LIBUNICODE_USE_STD_SIMD)
117-
target_compile_definitions(unicode PRIVATE LIBUNICODE_USE_STD_SIMD)
116+
target_compile_definitions(unicode PUBLIC LIBUNICODE_USE_STD_SIMD)
118117
endif()
119118
if(LIBUNICODE_USE_INTRINSICS)
120-
target_compile_definitions(unicode PRIVATE USE_INTRINSICS)
119+
target_compile_definitions(unicode PUBLIC LIBUNICODE_USE_INTRINSICS)
121120
endif()
122121

123122
set(public_headers
124123
capi.h
125124
codepoint_properties.h
126125
convert.h
127126
emoji_segmenter.h
127+
grapheme_line_segmenter.h
128128
grapheme_segmenter.h
129129
intrinsics.h
130130
multistage_table_view.h
131131
run_segmenter.h
132-
scan.h
133132
script_segmenter.h
134133
support.h
135134
utf8.h
@@ -161,7 +160,6 @@ add_executable(unicode_tablegen tablegen.cpp)
161160
set_target_properties(unicode_tablegen PROPERTIES CMAKE_BUILD_TYPE Release)
162161
target_link_libraries(unicode_tablegen PRIVATE unicode::loader)
163162

164-
165163
# {{{ installation
166164
set(LIBUNICODE_CMAKE_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/libunicode" CACHE PATH "Installation directory for cmake files, a relative path that will be joined with ${CMAKE_INSTALL_PREFIX} or an absolute path.")
167165
set(LIBUNICODE_INSTALL_CMAKE_FILES ${MASTER_PROJECT} CACHE BOOL "Decides whether or not to install CMake config and -version files.")
@@ -220,9 +218,9 @@ if(LIBUNICODE_TESTING)
220218
capi_test.cpp
221219
convert_test.cpp
222220
emoji_segmenter_test.cpp
221+
grapheme_line_segmenter_test.cpp
223222
grapheme_segmenter_test.cpp
224223
run_segmenter_test.cpp
225-
scan_test.cpp
226224
script_segmenter_test.cpp
227225
test_main.cpp
228226
unicode_test.cpp
@@ -247,8 +245,6 @@ if(LIBUNICODE_TESTING)
247245
endif()
248246
# }}}
249247

250-
251-
252248
# {{{ unicode_test
253249
if(LIBUNICODE_BENCHMARK)
254250
if(NOT benchmark_FOUND)

src/libunicode/benchmark.cpp

+13-9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#include <libunicode/convert.h>
2-
#include <libunicode/scan.h>
2+
#include <libunicode/grapheme_line_segmenter.h>
33
#include <libunicode/utf8.h>
44

55
#include <string_view>
@@ -9,22 +9,26 @@
99
using std::string_view;
1010

1111
template <size_t L>
12-
static void benchmarkWithLength(benchmark::State& state)
12+
static void benchmarkWithLength(benchmark::State& benchmarkState)
1313
{
14-
auto TestText = std::string(L, 'a') + "\u00A9";
15-
for (auto _: state)
14+
auto const TestTextString = std::string(L, 'a') + "\u00A9";
15+
auto const TestText = std::string_view(TestTextString);
16+
for (auto _: benchmarkState)
1617
{
17-
benchmark::DoNotOptimize(unicode::detail::scan_for_text_ascii(TestText, L + 10));
18+
benchmark::DoNotOptimize(unicode::detail::process_only_ascii(std::string_view(TestText).substr(0, L + 10)));
1819
}
1920
}
2021

2122
template <size_t L>
22-
static void benchmarkWithOffset(benchmark::State& state)
23+
static void benchmarkWithOffset(benchmark::State& benchmarkState)
2324
{
24-
auto TestText = std::string(L, 'a') + "\u0001F600" + std::string(1000, 'a');
25-
for (auto _: state)
25+
auto const TestTextString = std::string(L, 'a') + "\u0001F600" + std::string(1000, 'a');
26+
auto const TestText = std::string_view(TestTextString);
27+
for (auto _: benchmarkState)
2628
{
27-
benchmark::DoNotOptimize(unicode::detail::scan_for_text_ascii(TestText, L + 10));
29+
auto state = unicode::detail::unicode_process_state {};
30+
auto eventHandler = unicode::detail::EventHandler{};
31+
benchmark::DoNotOptimize(unicode::detail::process_only_complex_unicode(eventHandler, state, TestText, L + 10));
2832
}
2933
}
3034

src/libunicode/capi.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ int u32_gc_width(u32_char_t const* codepoints, size_t size, int mode)
4949
while (segmenter.codepointsAvailable())
5050
{
5151
auto const cluster = *segmenter;
52-
int thisWidth = unicode::width(cluster.front());
52+
int thisWidth = static_cast<int>(unicode::width(cluster.front()));
5353
if (mode != GC_WIDTH_MODE_NON_MODIFIABLE)
5454
{
5555
for (size_t i = 1; i < size; ++i)
@@ -60,7 +60,7 @@ int u32_gc_width(u32_char_t const* codepoints, size_t size, int mode)
6060
{
6161
case 0xFE0E: return 1;
6262
case 0xFE0F: return 2;
63-
default: return unicode::width(codepoint);
63+
default: return static_cast<int>(unicode::width(codepoint));
6464
}
6565
}();
6666
if (width && width != thisWidth)

0 commit comments

Comments
 (0)