Skip to content

Commit 2da95d6

Browse files
committed
Add more avx2, sse4.1 flags. Add MSVC's AVX2 ICE workaround.
1 parent 4b6fefb commit 2da95d6

File tree

3 files changed

+73
-46
lines changed

3 files changed

+73
-46
lines changed

CMakeLists.txt

+53-41
Original file line numberDiff line numberDiff line change
@@ -204,21 +204,33 @@ set(tesseract_src ${tesseract_src}
204204
)
205205

206206
if (WIN32)
207-
set_source_files_properties(
208-
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp
209-
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
210207
if (MSVC)
208+
set_source_files_properties(
209+
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp
210+
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
211+
set_source_files_properties(
212+
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp
213+
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
211214
set_source_files_properties(
212215
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp
213216
PROPERTIES COMPILE_FLAGS "/arch:AVX")
217+
set_source_files_properties(
218+
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp
219+
PROPERTIES COMPILE_FLAGS "/arch:AVX2")
214220
endif()
215221
else()
216222
set_source_files_properties(
217223
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp
218224
PROPERTIES COMPILE_FLAGS "-msse4.1")
225+
set_source_files_properties(
226+
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp
227+
PROPERTIES COMPILE_FLAGS "-msse4.1")
219228
set_source_files_properties(
220229
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp
221230
PROPERTIES COMPILE_FLAGS "-mavx")
231+
set_source_files_properties(
232+
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp
233+
PROPERTIES COMPILE_FLAGS "-mavx2")
222234
endif()
223235

224236
add_library (libtesseract ${LIBRARY_TYPE} ${tesseract_src} ${tesseract_hdr})
@@ -288,69 +300,69 @@ install(FILES
288300
install(FILES
289301
# from api/makefile.am
290302
api/apitypes.h
291-
api/baseapi.h
292-
api/capi.h
303+
api/baseapi.h
304+
api/capi.h
293305
api/renderer.h
294306

295307
#from arch/makefile.am
296308
arch/dotproductavx.h
297-
arch/dotproductsse.h
309+
arch/dotproductsse.h
298310
arch/simddetect.h
299311

300312
#from ccmain/makefile.am
301313
ccmain/thresholder.h
302-
ccmain/ltrresultiterator.h
303-
ccmain/pageiterator.h
304-
ccmain/resultiterator.h
314+
ccmain/ltrresultiterator.h
315+
ccmain/pageiterator.h
316+
ccmain/resultiterator.h
305317
ccmain/osdetect.h
306318

307319
#from ccstruct/makefile.am
308-
ccstruct/publictypes.h
320+
ccstruct/publictypes.h
309321

310322
#from ccutil/makefile.am
311-
ccutil/basedir.h
312-
ccutil/errcode.h
313-
ccutil/fileerr.h
314-
ccutil/genericvector.h
315-
ccutil/helpers.h
316-
ccutil/host.h
317-
ccutil/memry.h
323+
ccutil/basedir.h
324+
ccutil/errcode.h
325+
ccutil/fileerr.h
326+
ccutil/genericvector.h
327+
ccutil/helpers.h
328+
ccutil/host.h
329+
ccutil/memry.h
318330
ccutil/ndminx.h
319331
ccutil/params.h
320-
ccutil/ocrclass.h
321-
ccutil/platform.h
322-
ccutil/serialis.h
323-
ccutil/strngs.h
332+
ccutil/ocrclass.h
333+
ccutil/platform.h
334+
ccutil/serialis.h
335+
ccutil/strngs.h
324336
ccutil/tesscallback.h
325-
ccutil/unichar.h
326-
ccutil/unicharcompress.h
327-
ccutil/unicharmap.h
337+
ccutil/unichar.h
338+
ccutil/unicharcompress.h
339+
ccutil/unicharmap.h
328340
ccutil/unicharset.h
329341
ccutil/version.h
330342

331343
#from lstm/makefile.am
332344
lstm/convolve.h
333-
lstm/ctc.h
334-
lstm/fullyconnected.h
335-
lstm/functions.h
345+
lstm/ctc.h
346+
lstm/fullyconnected.h
347+
lstm/functions.h
336348
lstm/input.h
337-
lstm/lstm.h
338-
lstm/lstmrecognizer.h
339-
lstm/lstmtrainer.h
349+
lstm/lstm.h
350+
lstm/lstmrecognizer.h
351+
lstm/lstmtrainer.h
340352
lstm/maxpool.h
341-
lstm/networkbuilder.h
342-
lstm/network.h
343-
lstm/networkio.h
353+
lstm/networkbuilder.h
354+
lstm/network.h
355+
lstm/networkio.h
344356
lstm/networkscratch.h
345-
lstm/parallel.h
346-
lstm/plumbing.h
347-
lstm/recodebeam.h
348-
lstm/reconfig.h
357+
lstm/parallel.h
358+
lstm/plumbing.h
359+
lstm/recodebeam.h
360+
lstm/reconfig.h
349361
lstm/reversed.h
350-
lstm/series.h
351-
lstm/static_shape.h
352-
lstm/stridemap.h
353-
lstm/tfnetwork.h
362+
lstm/series.h
363+
lstm/static_shape.h
364+
lstm/stridemap.h
365+
lstm/tfnetwork.h
354366
lstm/weightmatrix.h
355367

356368
#${CMAKE_BINARY_DIR}/src/endianness.h

arch/intsimdmatrixavx2.cpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#ifdef __AVX2__
2222
#include <immintrin.h>
2323
#include <stdint.h>
24+
#include <algorithm>
2425
#include <vector>
2526

2627
namespace tesseract {
@@ -74,7 +75,15 @@ inline void ExtractResults(__m256i& result, __m256i& shift_id,
7475
const int8_t*& wi, const double*& scales,
7576
int num_out, double*& v) {
7677
for (int out = 0; out < num_out; ++out) {
77-
int32_t res = _mm256_extract_epi32(result, 0);
78+
int32_t res =
79+
#ifndef _MSC_VER
80+
_mm256_extract_epi32(result, 0)
81+
#else
82+
// Workaround MSVC's ICE
83+
// _mm256_extract_epi32(X, Y) == ((int32_t*)&X)[Y]
84+
((int32_t*)&result)[0]
85+
#endif
86+
;
7887
*v++ = (static_cast<double>(res) / MAX_INT8 + *wi++) * *scales++;
7988
// Rotate the results in int32_t units, so the next result is ready.
8089
result = _mm256_permutevar8x32_epi32(result, shift_id);

cppan.yml

+10-4
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,19 @@ projects:
124124
125125
post_sources: |
126126
if (WIN32)
127-
set_source_files_properties(
128-
${SDIR}/arch/dotproductsse.cpp
129-
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
130127
if (MSVC)
131128
set_source_files_properties(
132-
${SDIR}/arch/dotproductavx.cpp
129+
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductsse.cpp
130+
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
131+
set_source_files_properties(
132+
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixsse.cpp
133+
PROPERTIES COMPILE_DEFINITIONS __SSE4_1__)
134+
set_source_files_properties(
135+
${CMAKE_CURRENT_SOURCE_DIR}/arch/dotproductavx.cpp
133136
PROPERTIES COMPILE_FLAGS "/arch:AVX")
137+
set_source_files_properties(
138+
${CMAKE_CURRENT_SOURCE_DIR}/arch/intsimdmatrixavx2.cpp
139+
PROPERTIES COMPILE_FLAGS "/arch:AVX2")
134140
endif()
135141
else()
136142
remove_src_dir(vs2010/port/*)

0 commit comments

Comments
 (0)