Skip to content
This repository was archived by the owner on Aug 5, 2022. It is now read-only.

Commit c6e2503

Browse files
committed
Merge remote-tracking branch 'remotes/internal/release_1.0.4'
2 parents 85749df + d9d52b7 commit c6e2503

34 files changed

+4877
-278
lines changed

Makefile

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ ifeq ($(CAFFE_MLSL_SHUFFLE), 1)
8080
COMMON_FLAGS += -DCAFFE_MLSL_SHUFFLE
8181
endif
8282

83-
ifeq ($(FW_OVERLAP_OPT), 1)
83+
ifneq ($(FW_OVERLAP_OPT), 0)
8484
COMMON_FLAGS += -DFW_OVERLAP_OPT
8585
endif
8686
endif
@@ -547,6 +547,12 @@ LIBRARY_DIRS += $(LIB_BUILD_DIR)
547547
# Automatic dependency generation (nvcc is handled separately)
548548
CXXFLAGS += -MMD -MP
549549

550+
##########SGD FUSION#######################
551+
ifeq ($(ENABLE_SGD_FUSION), 1)
552+
COMMON_FLAGS += -DENABLE_SGD_FUSION
553+
endif
554+
###########################################
555+
#
550556
# Complete build flags.
551557
COMMON_FLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir))
552558
CXXFLAGS += -std=c++11 -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS)

Makefile.config.example

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,5 +170,8 @@ DISTRIBUTE_DIR := distribute
170170
# The ID of the GPU that 'make runtest' will use to run unit tests.
171171
TEST_GPUID := 0
172172

173+
# Uncomment for enabling SGD fusion
174+
# ENABLE_SGD_FUSION := 1
175+
173176
# enable pretty build (comment to see full commands)
174177
Q ?= @

Makefile.mkldnn

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
CAFFE_ROOTDIR := $(shell pwd)
2-
MKLDNN_ROOTDIR := external/mkldnn
2+
MKLDNN_ROOTDIR := $(CAFFE_ROOTDIR)/external/mkldnn
33
MKLDNN_TMPDIR := $(MKLDNN_ROOTDIR)/tmp
44
MKLDNN_SRCDIR := $(MKLDNN_ROOTDIR)/src
55
MKLDNN_BUILDDIR := $(MKLDNN_ROOTDIR)/build
@@ -22,7 +22,7 @@ ifneq (,$(findstring ccache,$(CC)))
2222
endif
2323

2424
MKLDNN_GITHUB := https://github.com/01org/mkl-dnn.git
25-
MKLDNN_CMAKE_FLAGS += $(MKLDNN_SRCDIR) -DCMAKE_INSTALL_PREFIX=$(CAFFE_ROOTDIR)/$(MKLDNN_INSTALLDIR) -DMKLROOT=${MKL_ROOTDIR} -B$(CAFFE_ROOTDIR)/$(MKLDNN_BUILDDIR) -DCMAKE_CXX_COMPILER="$(MKLDNN_CXX)" -DCMAKE_C_COMPILER="$(MKLDNN_CC)"
25+
MKLDNN_CMAKE_FLAGS += $(MKLDNN_SRCDIR) -DCMAKE_INSTALL_PREFIX=$(MKLDNN_INSTALLDIR) -DMKLROOT=${MKL_ROOTDIR} -B$(MKLDNN_BUILDDIR) -DCMAKE_CXX_COMPILER="$(MKLDNN_CXX)" -DCMAKE_C_COMPILER="$(MKLDNN_CC)"
2626

2727
ifeq ("$(wildcard $(MKLDNN_INSTALLDIR)/include/mkldnn.hpp)", "")
2828
mkldnn_download:
@@ -32,8 +32,8 @@ mkldnn_download:
3232

3333
mkldnn_build: mkldnn_download
3434
cmake $(MKLDNN_CMAKE_FLAGS)
35-
make -C $(CAFFE_ROOTDIR)/$(MKLDNN_BUILDDIR) -j$(shell cat /proc/cpuinfo |grep 'processor'|wc -l)
36-
make -C $(CAFFE_ROOTDIR)/$(MKLDNN_BUILDDIR) install
35+
make -C $(MKLDNN_BUILDDIR) -j$(shell cat /proc/cpuinfo |grep 'processor'|wc -l)
36+
make -C $(MKLDNN_BUILDDIR) install
3737
else
3838
mkldnn_download:
3939
mkldnn_build:

cmake/Dependencies.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ if(USE_MLSL)
122122
if(CAFFE_MLSL_SHUFFLE)
123123
add_definitions("-DCAFFE_MLSL_SHUFFLE")
124124
endif()
125-
if(FW_OVERLAP_OPT)
125+
if(FW_OVERLAP_OPT OR NOT DEFINED FW_OVERLAP_OPT)
126126
message(STATUS "Forward overlapping optimization is enabled!")
127127
add_definitions("-DFW_OVERLAP_OPT")
128128
endif()

examples/cpp_classification/batch_classification.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,10 @@ int main(int argc, char** argv) {
422422
cout<<"Use mean file: "<<FLAGS_mean_file<<endl;
423423
}
424424

425+
#ifdef USE_MLSL
426+
caffe::mn::init(&argc,&argv);
427+
#endif
428+
425429
Classifier classifier(FLAGS_model, FLAGS_weights, FLAGS_mean_file,
426430
FLAGS_mean_value, FLAGS_label_file, FLAGS_engine, FLAGS_batch_size);
427431

examples/cpp_classification/classification.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,10 @@ int main(int argc, char** argv) {
285285
engine = argv[6];
286286
}
287287

288+
#ifdef USE_MLSL
289+
caffe::mn::init(&argc,&argv);
290+
#endif
291+
288292
Classifier classifier(model_file, trained_file, mean_file, label_file, engine);
289293

290294

examples/pycaffe/tune_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def tuneModelDefinition(model_path, iteration):
2323
caffe_path = os.path.join(working_dir, "..", "..", "build", "tools", "caffe")
2424
if not os.path.exists(caffe_path):
2525
print "Caffe binary does not exist; please build Caffe binary first."
26-
sys,exit(1)
26+
sys.exit(1)
2727

2828
base_model_name = os.path.basename(model_path)
2929
model_dir = os.path.dirname(model_path)

include/caffe/blob.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ class Blob {
109109
return shape_[CanonicalAxisIndex(index)];
110110
}
111111
inline int num_axes() const { return shape_.size(); }
112-
inline int count() const { return count_; }
112+
inline long count() const { return count_; }
113113

114114
/**
115115
* @brief Compute the volume of a slice; i.e., the product of dimensions
@@ -332,8 +332,8 @@ class Blob {
332332
shared_ptr<SyncedMemory> shape_data_;
333333
#endif
334334
vector<int> shape_;
335-
int count_;
336-
int capacity_;
335+
long count_;
336+
long capacity_;
337337

338338
DISABLE_COPY_AND_ASSIGN(Blob);
339339
}; // class Blob

include/caffe/layer.hpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5555
#define LOG_BLOB(layer, blob, part, blob_id, description) \
5656
do \
5757
{ \
58-
int elems_to_log = std::min(MAX_ELEMS_TO_LOG, blob->count()); \
59-
for (int idx = 0; idx < elems_to_log; idx++) \
58+
long elems_to_log = std::min(static_cast<long>(MAX_ELEMS_TO_LOG), blob->count()); \
59+
for (long idx = 0; idx < elems_to_log; idx++) \
6060
{ \
6161
LOG_LAYER(layer) << description \
6262
<< ", blob_id " << blob_id \
@@ -68,8 +68,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6868
#define LOG_PARAM_BLOB(blob, part, blob_id, description) \
6969
do \
7070
{ \
71-
int elems_to_log = std::min(MAX_ELEMS_TO_LOG, blob->count()); \
72-
for (int idx = 0; idx < elems_to_log; idx++) \
71+
long elems_to_log = std::min(static_cast<long>(MAX_ELEMS_TO_LOG), blob->count()); \
72+
for (long idx = 0; idx < elems_to_log; idx++) \
7373
{ \
7474
DLOG(INFO) << description \
7575
<< ", blob_id " << blob_id \
@@ -521,7 +521,12 @@ class Layer {
521521
CHECK_EQ(top.size(), num_loss_weights) << "loss_weight must be "
522522
"unspecified or specified once per top blob.";
523523
for (int top_id = 0; top_id < top.size(); ++top_id) {
524+
#ifdef USE_MLSL
525+
const Dtype loss_weight = layer_param_.loss_weight(top_id) /
526+
GetDistribution().get_data_parts();
527+
#else
524528
const Dtype loss_weight = layer_param_.loss_weight(top_id);
529+
#endif
525530
if (loss_weight == Dtype(0)) { continue; }
526531
this->set_loss(top_id, loss_weight);
527532
const int count = top[top_id]->count();

include/caffe/layers/batch_norm_layer.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,19 @@ class BatchNormLayer : public Layer<Dtype> {
117117
const Dtype* data_to_be_replicated,
118118
FuncTy op_func);
119119

120+
void ForwardStatsBatch_cpu(const vector<Blob<Dtype>*>& bottom,
121+
const vector<Blob<Dtype>*>& top, int stats_batch_idx);
122+
void BackwardStatsBatch_cpu(const vector<Blob<Dtype>*>& top,
123+
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom,
124+
int stats_batch_idx);
125+
120126
Blob<Dtype> mean_, variance_, temp_, x_norm_;
121127
bool use_global_stats_;
122128
Dtype moving_average_fraction_;
123129
int channels_;
124130
Dtype eps_;
131+
int num_stats_batches_;
132+
int stats_batch_size_;
125133

126134
// extra temporarary variables is used to carry out sums/broadcasting
127135
// using BLAS

0 commit comments

Comments
 (0)