Skip to content

Commit 26eda05

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents 79ca3c2 + 0d01459 commit 26eda05

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

110 files changed

+2794
-623
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ endif
187187
# git version of mlpack is used.)
188188
#cd methods/mlpack/src/ && ./build_scripts.sh
189189
# Compile the DLIBML scripts.
190+
g++ -O2 -std=c++11 methods/dlibml/src/SVM.cpp -o methods/dlibml/dlibml_svm -I"$(INCLUDEPATH)" -L"$(LIBPATH)" -ldlib -lmlpack -lboost_program_options -lblas -llapack
190191
g++ -O2 -std=c++11 methods/dlibml/src/ANN.cpp -o methods/dlibml/dlibml_ann -I"$(INCLUDEPATH)" -L"$(LIBPATH)" -ldlib -lmlpack -lboost_program_options -lblas -llapack
191192
g++ -O2 -std=c++11 methods/dlibml/src/ALLKNN.cpp -o methods/dlibml/dlibml_allknn -I"$(INCLUDEPATH)" -L"$(LIBPATH)" -ldlib -lmlpack -lboost_program_options -lblas -llapack
192193
g++ -O2 -std=c++11 methods/dlibml/src/KMEANS.cpp -o methods/dlibml/dlibml_kmeans -I"$(INCLUDEPATH)" -L"$(LIBPATH)" -ldlib -lmlpack -lboost_program_options -lblas -llapack

config.yaml

Lines changed: 180 additions & 28 deletions
Large diffs are not rendered by default.

libraries/dtimeout_install.sh

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/bin/bash
2+
#
3+
# Wrapper script to unpack and build dtimeout.
4+
#
5+
# Include files will be installed to ../include/.
6+
# Library files will be installed to ../lib/.
7+
#
8+
# One dtimeout.tar.gz file should be located in this directory.
9+
tars=`ls dtimeout.tar.gz | wc -l`;
10+
if [ "$tars" -eq "0" ];
11+
then
12+
echo "No source dtimeout.tar.gz found in libraries/!"
13+
exit 1
14+
fi
15+
16+
# Remove any old directory.
17+
rm -rf dtimeout/
18+
mkdir dtimeout/
19+
tar -xzpf dtimeout.tar.gz --strip-components=1 -C dtimeout/
20+
21+
cd dtimeout/
22+
python3 setup.py build
23+
PYVER=`python3 -c 'import sys; print("python" + sys.version[0:3])'`;
24+
mkdir -p ../lib/$PYVER/site-packages/
25+
PYTHONPATH=../lib/$PYVER/site-packages/ python3 setup.py install --prefix=../ -O2

libraries/install_all.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,9 @@ if [ "$?" -ne "0" ]; then
8585
echo "Error installing R!";
8686
exit 1;
8787
fi
88+
89+
./dtimeout_install.sh $1
90+
if [ "$?" -ne "0" ]; then
91+
echo "Error installing R!";
92+
exit 1;
93+
fi

libraries/install_r_packages.r

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
install.packages('mlr', repos='http://cran.us.r-project.org')
2+
install.packages('tictoc', repos='http://cran.us.r-project.org')
3+
install.packages('LiblineaR', repos='http://cran.us.r-project.org')
4+
install.packages('adabag', repos='http://cran.us.r-project.org')
5+
install.packages('rpart', repos='http://cran.us.r-project.org')
6+
install.packages('class', repos='http://cran.us.r-project.org')
7+
install.packages('randomForest', repos='http://cran.us.r-project.org')
8+
install.packages('e1071', repos='http://cran.us.r-project.org')
9+
install.packages('penalized', repos='http://cran.us.r-project.org')

libraries/package-urls.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
dtimeout https://github.com/pnpnpn/timeout-decorator/archive/master.tar.gz
12
ann https://www.cs.umd.edu/~mount/ANN/Files/1.1.2/ann_1.1.2.tar.gz
23
flann https://github.com/mariusmuja/flann/archive/1.9.1.tar.gz
34
HLearn https://github.com/mikeizbicki/HLearn/archive/2.0.0.0.tar.gz

libraries/r_install.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,5 @@ prefix_path="$(readlink -m ../)"
2424
./configure --prefix=$prefix_path --enable-R-shlib
2525
make
2626
make install
27+
cd ..
28+
bin/Rscript install_r_packages.r

methods/R/adaboost.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
'''
2+
@file adaboost.py
3+
Class to benchmark the R Adaboost method.
4+
'''
5+
6+
import os
7+
import sys
8+
import inspect
9+
10+
# Import the util path, this method even works if the path contains symlinks to
11+
# modules.
12+
cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
13+
os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
14+
if cmd_subfolder not in sys.path:
15+
sys.path.insert(0, cmd_subfolder)
16+
17+
#Import the metrics definitions path.
18+
metrics_folder = os.path.realpath(os.path.abspath(os.path.join(
19+
os.path.split(inspect.getfile(inspect.currentframe()))[0], "../metrics")))
20+
if metrics_folder not in sys.path:
21+
sys.path.insert(0, metrics_folder)
22+
23+
from log import *
24+
from profiler import *
25+
from definitions import *
26+
from misc import *
27+
28+
import shlex
29+
import subprocess
30+
import re
31+
import collections
32+
import numpy as np
33+
34+
'''
35+
This class implements the adaboost benchmark.
36+
'''
37+
class ADABOOST(object):
38+
39+
'''
40+
Create the adaboost benchmark instance.
41+
@param dataset - Input dataset to perform adaboost on.
42+
@param timeout - The time until the timeout. Default no timeout.
43+
@param path - Path to the R executable.
44+
@param verbose - Display informational messages.
45+
'''
46+
def __init__(self, dataset, timeout=0, path=os.environ["R_PATH"],
47+
verbose=True):
48+
self.verbose = verbose
49+
self.dataset = dataset
50+
self.path = path
51+
self.timeout = timeout
52+
53+
def __del__(self):
54+
Log.Info("Clean up.", self.verbose)
55+
filelist = ["predictions.csv", "log.txt"]
56+
for f in filelist:
57+
if os.path.isfile(f):
58+
os.remove(f)
59+
60+
'''
61+
Adaboost. If the method has been successfully completed return
62+
the elapsed time in seconds.
63+
@param options - Extra options for the method.
64+
@return - Elapsed time in seconds or a negative value if the method was not
65+
successful.
66+
'''
67+
def RunMetrics(self, options):
68+
Log.Info("Perform Adaboost.", self.verbose)
69+
70+
opts = {}
71+
72+
if "max_iterations" in options:
73+
opts["max_iterations"] = int(options.pop("max_iterations"))
74+
else:
75+
opts["max_iterations"] = 100
76+
77+
if len(options) > 0:
78+
Log.Fatal("Unknown parameters: " + str(options))
79+
raise Exception("unknown parameters")
80+
81+
if len(self.dataset) < 2:
82+
Log.Fatal("This method requires two or more datasets.")
83+
return -1
84+
85+
# Split the command using shell-like syntax.
86+
cmd = shlex.split("libraries/bin/Rscript " + self.path + "adaboost.r" +
87+
" -t " + self.dataset[0] + " -T " + self.dataset[1] + " -m " +
88+
str(opts["max_iterations"]))
89+
90+
# Run command with the nessecary arguments and return its output as a byte
91+
# string. We have untrusted input so we disable all shell based features.
92+
try:
93+
s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False,
94+
timeout=self.timeout)
95+
except subprocess.TimeoutExpired as e:
96+
Log.Warn(str(e))
97+
return -2
98+
except Exception as e:
99+
Log.Fatal("Could not execute command: " + str(cmd))
100+
return -1
101+
102+
# Datastructure to store the results.
103+
metrics = {}
104+
# Parse data: runtime.
105+
timer = self.parseTimer(str(s))
106+
if timer != -1:
107+
metrics['Runtime'] = timer
108+
predictions = np.genfromtxt("predictions.csv", delimiter = ',')
109+
predictions = predictions[1:]
110+
truelabels = np.genfromtxt(self.dataset[2], delimiter = ',')
111+
confusionMatrix = Metrics.ConfusionMatrix(truelabels, predictions)
112+
metrics['ACC'] = Metrics.AverageAccuracy(confusionMatrix)
113+
metrics['MCC'] = Metrics.MCCMultiClass(confusionMatrix)
114+
metrics['Precision'] = Metrics.AvgPrecision(confusionMatrix)
115+
metrics['Recall'] = Metrics.AvgRecall(confusionMatrix)
116+
metrics['MSE'] = Metrics.SimpleMeanSquaredError(truelabels, predictions)
117+
118+
Log.Info(("total time: %fs" % (metrics['Runtime'])), self.verbose)
119+
120+
return metrics
121+
122+
'''
123+
Parse the timer data form a given string.
124+
@param data - String to parse timer data from.
125+
@return - Namedtuple that contains the timer data or -1 in case of an error.
126+
'''
127+
def parseTimer(self, data):
128+
# Compile the regular expression pattern into a regular expression object to
129+
# parse the timer data.
130+
pattern = re.findall("(\d+\.\d+). *sec elapsed", data)
131+
return float(pattern[0])

methods/R/adaboost.r

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Read the command line arguments in a vector.
2+
library(mlr)
3+
library(tictoc)
4+
myArgs <- commandArgs(trailingOnly = TRUE)
5+
6+
trainFile <- myArgs[2]
7+
testFile <- myArgs[4]
8+
maxiter <- as.integer(myArgs[6])
9+
10+
trainData <- read.csv(trainFile, header = FALSE, sep = ",")
11+
testData <- read.csv(testFile, header = FALSE, sep = ",")
12+
13+
names = character()
14+
for ( i in 1:ncol(trainData) )
15+
{
16+
names[length(names) + 1] = paste("V", toString(i), sep = "")
17+
}
18+
names(trainData) = names
19+
testData[, ncol(trainData)] = sample(0:1, size = nrow(testData), replace = T)
20+
names(testData) = names
21+
22+
tar = paste("V", toString(ncol(trainData)), sep = "")
23+
24+
tic()
25+
trainTask <- makeClassifTask(data = trainData, target = tar)
26+
testTask <- makeClassifTask(data = testData, target = tar)
27+
adaboost.learner <- makeLearner("classif.boosting",
28+
par.vals = list(mfinal = maxiter),
29+
predict.type = "response")
30+
fmodel <- train(adaboost.learner, trainTask)
31+
fpmodel <- predict(fmodel, testTask)
32+
toc(log = TRUE)
33+
34+
out <- capture.output(tic.log(format = TRUE))
35+
cat(out, file="log.txt", append=FALSE)
36+
37+
pred <- as.numeric(fpmodel$data$response)
38+
write.csv(pred, "predictions.csv", row.names = F)

methods/R/dtc.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
'''
2+
@file dtc.py
3+
Class to benchmark the R Decision Tree method.
4+
'''
5+
6+
import os
7+
import sys
8+
import inspect
9+
10+
# Import the util path, this method even works if the path contains symlinks to
11+
# modules.
12+
cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
13+
os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
14+
if cmd_subfolder not in sys.path:
15+
sys.path.insert(0, cmd_subfolder)
16+
17+
#Import the metrics definitions path.
18+
metrics_folder = os.path.realpath(os.path.abspath(os.path.join(
19+
os.path.split(inspect.getfile(inspect.currentframe()))[0], "../metrics")))
20+
if metrics_folder not in sys.path:
21+
sys.path.insert(0, metrics_folder)
22+
23+
from log import *
24+
from profiler import *
25+
from definitions import *
26+
from misc import *
27+
28+
import shlex
29+
import subprocess
30+
import re
31+
import collections
32+
import numpy as np
33+
34+
'''
35+
This class implements the Decision Tree benchmark.
36+
'''
37+
class DTC(object):
38+
39+
'''
40+
Create the Decision Tree benchmark instance.
41+
@param dataset - Input dataset to perform DTC on.
42+
@param timeout - The time until the timeout. Default no timeout.
43+
@param path - Path to the R executable.
44+
@param verbose - Display informational messages.
45+
'''
46+
def __init__(self, dataset, timeout=0, path=os.environ["R_PATH"],
47+
verbose=True):
48+
self.verbose = verbose
49+
self.dataset = dataset
50+
self.path = path
51+
self.timeout = timeout
52+
self.build_opts = {}
53+
54+
def __del__(self):
55+
Log.Info("Clean up.", self.verbose)
56+
filelist = ["predictions.csv", "log.txt"]
57+
for f in filelist:
58+
if os.path.isfile(f):
59+
os.remove(f)
60+
61+
'''
62+
DTC. If the method has been successfully completed return
63+
the elapsed time in seconds.
64+
@param options - Extra options for the method.
65+
@return - Elapsed time in seconds or a negative value if the method was not
66+
successful.
67+
'''
68+
def RunMetrics(self, options):
69+
Log.Info("Perform DTC.", self.verbose)
70+
71+
# Get all the parameters.
72+
self.build_opts = {}
73+
if "max_depth" in options:
74+
self.build_opts["max_depth"] = int(options.pop("max_depth"))
75+
else:
76+
self.build_opts["max_depth"] = 30
77+
78+
if "minimum_samples_split" in options:
79+
self.build_opts["min_samples_split"] = \
80+
int(options.pop("minimum_samples_split"))
81+
else:
82+
self.build_opts["min_samples_split"] = 20
83+
84+
85+
if len(options) > 0:
86+
Log.Fatal("Unknown parameters: " + str(options))
87+
raise Exception("unknown parameters")
88+
89+
if len(self.dataset) < 2:
90+
Log.Fatal("This method requires two or more datasets.")
91+
return -1
92+
93+
# Split the command using shell-like syntax.
94+
cmd = shlex.split("libraries/bin/Rscript " + self.path + "dtc.r" +
95+
" -t " + self.dataset[0] + " -T " +
96+
self.dataset[1] + " -md " + str(self.build_opts["max_depth"]) +
97+
" -ms " + str(self.build_opts["min_samples_split"]) )
98+
99+
# Run command with the nessecary arguments and return its output as a byte
100+
# string. We have untrusted input so we disable all shell based features.
101+
try:
102+
s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False,
103+
timeout=self.timeout)
104+
except subprocess.TimeoutExpired as e:
105+
Log.Warn(str(e))
106+
return -2
107+
except Exception as e:
108+
Log.Fatal("Could not execute command: " + str(cmd))
109+
return -1
110+
111+
# Datastructure to store the results.
112+
metrics = {}
113+
# Parse data: runtime.
114+
timer = self.parseTimer(str(s))
115+
if timer != -1:
116+
metrics['Runtime'] = timer
117+
predictions = np.genfromtxt("predictions.csv", delimiter = ',')
118+
predictions = predictions[1:]
119+
truelabels = np.genfromtxt(self.dataset[2], delimiter = ',')
120+
confusionMatrix = Metrics.ConfusionMatrix(truelabels, predictions)
121+
metrics['ACC'] = Metrics.AverageAccuracy(confusionMatrix)
122+
metrics['MCC'] = Metrics.MCCMultiClass(confusionMatrix)
123+
metrics['Precision'] = Metrics.AvgPrecision(confusionMatrix)
124+
metrics['Recall'] = Metrics.AvgRecall(confusionMatrix)
125+
metrics['MSE'] = Metrics.SimpleMeanSquaredError(truelabels, predictions)
126+
127+
Log.Info(("total time: %fs" % (metrics['Runtime'])), self.verbose)
128+
129+
return metrics
130+
131+
'''
132+
Parse the timer data form a given string.
133+
@param data - String to parse timer data from.
134+
@return - Namedtuple that contains the timer data or -1 in case of an error.
135+
'''
136+
def parseTimer(self, data):
137+
# Compile the regular expression pattern into a regular expression object to
138+
# parse the timer data.
139+
pattern = re.findall("(\d+\.\d+). *sec elapsed", data)
140+
return float(pattern[0])

0 commit comments

Comments
 (0)