Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.

Commit 8b174e2

Browse files
authored
Revert "Refactor LibraryInitializer so it's thread safe. Fixes random sporadical concurrency crashes. (#15762)"
This reverts commit bfd3bb8.
1 parent 8df9469 commit 8b174e2

File tree

10 files changed

+253
-359
lines changed

10 files changed

+253
-359
lines changed

CMakeLists.txt

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ mxnet_option(USE_OLDCMAKECUDA "Build with old cmake cuda" OFF)
2424
mxnet_option(USE_NCCL "Use NVidia NCCL with CUDA" OFF)
2525
mxnet_option(USE_OPENCV "Build with OpenCV support" ON)
2626
mxnet_option(USE_OPENMP "Build with Openmp support" ON)
27-
mxnet_option(USE_OPENMP_BUNDLED_LLVM "Build with bundled llvm openmp from 3rdparty" OFF)
2827
mxnet_option(USE_CUDNN "Build with cudnn support" ON) # one could set CUDNN_ROOT for search path
2928
mxnet_option(USE_SSE "Build with x86 SSE instruction support" ON IF NOT ARM)
3029
mxnet_option(USE_F16C "Build with x86 F16C instruction support" ON) # autodetects support if ON
@@ -434,11 +433,11 @@ if(USE_OPENMP)
434433
find_package(OpenMP REQUIRED)
435434
# This should build on Windows, but there's some problem and I don't have a Windows box, so
436435
# could a Windows user please fix?
437-
if(USE_OPENMP_BUNDLED_LLVM AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/openmp/CMakeLists.txt
438-
AND SYSTEM_ARCHITECTURE STREQUAL "x86_64"
439-
AND NOT MSVC
440-
AND NOT CMAKE_CROSSCOMPILING)
441-
message("Using bundlded LLVM OpenMP")
436+
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/openmp/CMakeLists.txt
437+
AND SYSTEM_ARCHITECTURE STREQUAL "x86_64"
438+
AND NOT MSVC
439+
AND NOT CMAKE_CROSSCOMPILING)
440+
442441
# Intel/llvm OpenMP: https://github.com/llvm-mirror/openmp
443442
set(OPENMP_STANDALONE_BUILD TRUE)
444443
set(LIBOMP_ENABLE_SHARED TRUE)
@@ -452,7 +451,6 @@ if(USE_OPENMP)
452451
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
453452
add_definitions(-DMXNET_USE_OPENMP=1)
454453
else()
455-
message("Using platform provided OpenMP")
456454
if(OPENMP_FOUND)
457455
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
458456
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")

docs/faq/env_var.md

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,6 @@ $env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
3939

4040
## Set the Number of Threads
4141

42-
* MXNET_OMP_MAX_THREADS
43-
- Values: Int ```(default=Number of processors / Number of processors * 2 in X86)```
44-
- Maximum number of threads to use in individual operators through OpenMP. If not set, OMP_NUM_THREADS is considered after.
4542
* MXNET_GPU_WORKER_NTHREADS
4643
- Values: Int ```(default=2)```
4744
- The maximum number of threads to use on each GPU. This parameter is used to parallelize the computation within a single GPU card.
@@ -50,7 +47,7 @@ $env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
5047
- The maximum number of concurrent threads that do the memory copy job on each GPU.
5148
* MXNET_CPU_WORKER_NTHREADS
5249
- Values: Int ```(default=1)```
53-
- The maximum number of scheduling threads on CPU. It specifies how many operators can be run in parallel. Note that most CPU operators are parallelized by OpenMP. To change the number of threads used by individual operators, please set `MXNET_OMP_MAX_THREADS` instead.
50+
- The maximum number of scheduling threads on CPU. It specifies how many operators can be run in parallel. Note that most CPU operators are parallelized by OpenMP. To change the number of threads used by individual operators, please set `OMP_NUM_THREADS` instead.
5451
* MXNET_CPU_PRIORITY_NTHREADS
5552
- Values: Int ```(default=4)```
5653
- The number of threads given to prioritized CPU jobs.
@@ -59,13 +56,10 @@ $env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
5956
- The number of threads used for NNPACK. NNPACK package aims to provide high-performance implementations of some layers for multi-core CPUs. Checkout [NNPACK](http://mxnet.io/faq/nnpack.html) to know more about it.
6057
* MXNET_MP_WORKER_NTHREADS
6158
- Values: Int ```(default=1)```
62-
- The number of scheduling threads on CPU given to multiprocess workers (after fork). Enlarge this number allows more operators to run in parallel in individual workers but please consider reducing the overall `num_workers` to avoid thread contention (not available on Windows).
59+
- The number of scheduling threads on CPU given to multiprocess workers. Enlarge this number allows more operators to run in parallel in individual workers but please consider reducing the overall `num_workers` to avoid thread contention (not available on Windows).
6360
* MXNET_MP_OPENCV_NUM_THREADS
6461
- Values: Int ```(default=0)```
6562
- The number of OpenCV execution threads given to multiprocess workers. OpenCV multithreading is disabled if `MXNET_MP_OPENCV_NUM_THREADS` < 1 (default). Enlarge this number may boost the performance of individual workers when executing underlying OpenCV functions but please consider reducing the overall `num_workers` to avoid thread contention (not available on Windows).
66-
* MXNET_GPU_COPY_NTHREADS
67-
- Values:: Int ```(default=2)```
68-
- Number of threads for copying data from CPU to GPU.
6963

7064
## Memory Options
7165

src/c_api/c_api.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,12 @@
4646
#include "mxnet/libinfo.h"
4747
#include "mxnet/imperative.h"
4848
#include "mxnet/lib_api.h"
49-
#include "../initialize.h"
5049
#include "./c_api_common.h"
5150
#include "../operator/custom/custom-inl.h"
5251
#include "../operator/tensor/matrix_op-inl.h"
5352
#include "../operator/tvmop/op_module.h"
5453
#include "../common/utils.h"
54+
#include "../common/library.h"
5555

5656
using namespace mxnet;
5757

@@ -95,7 +95,7 @@ inline int MXAPIGetFunctionRegInfo(const FunRegType *e,
9595
// Loads library and initializes it
9696
int MXLoadLib(const char *path) {
9797
API_BEGIN();
98-
void *lib = LibraryInitializer::Get()->lib_load(path);
98+
void *lib = load_lib(path);
9999
if (!lib)
100100
LOG(FATAL) << "Unable to load library";
101101

src/common/library.cc

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
/*!
21+
* Copyright (c) 2015 by Contributors
22+
* \file library.cc
23+
* \brief Dynamically loading accelerator library
24+
* and accessing its functions
25+
*/
26+
27+
#if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
28+
#include <windows.h>
29+
#else
30+
#include <dlfcn.h>
31+
#endif
32+
33+
#include <string>
34+
#include "library.h"
35+
36+
#if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
37+
/*!
38+
* \brief Retrieve the system error message for the last-error code
39+
* \param err string that gets the error message
40+
*/
41+
void win_err(char **err) {
42+
uint32_t dw = GetLastError();
43+
FormatMessage(
44+
FORMAT_MESSAGE_ALLOCATE_BUFFER |
45+
FORMAT_MESSAGE_FROM_SYSTEM |
46+
FORMAT_MESSAGE_IGNORE_INSERTS,
47+
NULL,
48+
dw,
49+
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
50+
reinterpret_cast<char*>(err),
51+
0, NULL);
52+
}
53+
#endif
54+
55+
56+
/*!
57+
* \brief Loads the dynamic shared library file
58+
* \param path library file location
59+
* \return handle a pointer for the loaded library, nullptr if loading unsuccessful
60+
*/
61+
void* load_lib(const char* path) {
62+
void *handle = nullptr;
63+
std::string path_str(path);
64+
// check if library was already loaded
65+
if (loaded_libs.find(path_str) == loaded_libs.end()) {
66+
// if not, load it
67+
#if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
68+
handle = LoadLibrary(path);
69+
if (!handle) {
70+
char *err_msg = nullptr;
71+
win_err(&err_msg);
72+
LOG(FATAL) << "Error loading library: '" << path << "'\n" << err_msg;
73+
LocalFree(err_msg);
74+
return nullptr;
75+
}
76+
#else
77+
handle = dlopen(path, RTLD_LAZY);
78+
if (!handle) {
79+
LOG(FATAL) << "Error loading library: '" << path << "'\n" << dlerror();
80+
return nullptr;
81+
}
82+
#endif // _WIN32 or _WIN64 or __WINDOWS__
83+
// then store the pointer to the library
84+
loaded_libs[path_str] = handle;
85+
} else {
86+
// otherwise just look up the pointer
87+
handle = loaded_libs[path_str];
88+
}
89+
return handle;
90+
}
91+
92+
/*!
93+
* \brief Closes the loaded dynamic shared library file
94+
* \param handle library file handle
95+
*/
96+
void close_lib(void* handle) {
97+
#if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
98+
FreeLibrary((HMODULE)handle);
99+
#else
100+
dlclose(handle);
101+
#endif // _WIN32 or _WIN64 or __WINDOWS__
102+
}
103+
104+
/*!
105+
* \brief Obtains address of given function in the loaded library
106+
* \param handle pointer for the loaded library
107+
* \param func function pointer that gets output address
108+
* \param name function name to be fetched
109+
*/
110+
void get_sym(void* handle, void** func, char* name) {
111+
#if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
112+
*func = GetProcAddress((HMODULE)handle, name);
113+
if (!(*func)) {
114+
char *err_msg = nullptr;
115+
win_err(&err_msg);
116+
LOG(FATAL) << "Error getting function '" << name << "' from library\n" << err_msg;
117+
LocalFree(err_msg);
118+
}
119+
#else
120+
*func = dlsym(handle, name);
121+
if (!(*func)) {
122+
LOG(FATAL) << "Error getting function '" << name << "' from library\n" << dlerror();
123+
}
124+
#endif // _WIN32 or _WIN64 or __WINDOWS__
125+
}

src/common/library.h

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
/*!
21+
* Copyright (c) 2015 by Contributors
22+
* \file library.h
23+
* \brief Defining library loading functions
24+
*/
25+
#ifndef MXNET_COMMON_LIBRARY_H_
26+
#define MXNET_COMMON_LIBRARY_H_
27+
28+
#include <iostream>
29+
#include <map>
30+
#include <string>
31+
#include "dmlc/io.h"
32+
33+
// map of libraries loaded
34+
static std::map<std::string, void*> loaded_libs;
35+
36+
void* load_lib(const char* path);
37+
void close_lib(void* handle);
38+
void get_sym(void* handle, void** func, char* name);
39+
40+
/*!
41+
* \brief a templated function that fetches from the library
42+
* a function pointer of any given datatype and name
43+
* \param T a template parameter for data type of function pointer
44+
* \param lib library handle
45+
* \param func_name function name to search for in the library
46+
* \return func a function pointer
47+
*/
48+
template<typename T>
49+
T get_func(void *lib, char *func_name) {
50+
T func;
51+
get_sym(lib, reinterpret_cast<void**>(&func), func_name);
52+
if (!func)
53+
LOG(FATAL) << "Unable to get function '" << func_name << "' from library";
54+
return func;
55+
}
56+
57+
#endif // MXNET_COMMON_LIBRARY_H_

src/common/utils.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,21 +50,9 @@
5050
#include "../operator/nn/mkldnn/mkldnn_base-inl.h"
5151
#endif
5252

53-
#if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
54-
#include <windows.h>
55-
#else
56-
#include <unistd.h>
57-
#endif
58-
59-
6053
namespace mxnet {
6154
namespace common {
6255

63-
#if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
64-
inline size_t current_process_id() { return ::GetCurrentProcessId(); }
65-
#else
66-
inline size_t current_process_id() { return getpid(); }
67-
#endif
6856
/*!
6957
* \brief IndPtr should be non-negative, in non-decreasing order, start with 0
7058
* and end with value equal with size of indices.

src/engine/threaded_engine_perdevice.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
#include <dmlc/parameter.h>
2929
#include <dmlc/concurrency.h>
3030
#include <dmlc/thread_group.h>
31-
#include "../initialize.h"
3231
#include "./threaded_engine.h"
3332
#include "./thread_pool.h"
3433
#include "../common/lazy_alloc_array.h"
@@ -77,8 +76,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
7776
void Start() override {
7877
if (is_worker_) return;
7978
gpu_worker_nthreads_ = common::GetNumThreadsPerGPU();
80-
// MXNET_CPU_WORKER_NTHREADS
81-
cpu_worker_nthreads_ = LibraryInitializer::Get()->cpu_worker_nthreads_;
79+
cpu_worker_nthreads_ = dmlc::GetEnv("MXNET_CPU_WORKER_NTHREADS", 1);
8280
gpu_copy_nthreads_ = dmlc::GetEnv("MXNET_GPU_COPY_NTHREADS", 2);
8381
// create CPU task
8482
int cpu_priority_nthreads = dmlc::GetEnv("MXNET_CPU_PRIORITY_NTHREADS", 4);

0 commit comments

Comments
 (0)