-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSVM_kernel_polynominal.py
59 lines (50 loc) · 2.6 KB
/
SVM_kernel_polynominal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.svm import SVC
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
# Encode categorical features as an integer array.
enc = OrdinalEncoder()
# Encode labels with value between 0 and n_classes-1.
lb_enc = LabelEncoder()
# Get data from CSV file
rawData = pd.read_csv('dataset4classfication.csv', header = 0)
# Thay thế các giá trị NaN bằng Mean
rawData.fillna(rawData.mean())
# Slipt trainning data and labels from rawData
# Get 1000 rows in the column with an index form 0 to 13
tranningData = rawData.iloc[:, 0:13]
# The label is the last column (column "Label" is the last column in dataset)
labels = rawData['Label']
# Get 1000 rows in the column with an index of 14 (label column)
labels = rawData.iloc[:, 14]
# Encode trainning data to number
enc.fit(tranningData)
tranningData = enc.transform(tranningData)
# Encode labels to number
lb_enc.fit(labels)
labels = lb_enc.transform(labels)
scaler = MinMaxScaler() # Default behavior is to scale to [0,1]
tranningData = scaler.fit_transform(tranningData)
# Generate data for testing and training: 80% of trained data and 20% of data are used for testing, and set random_state to randomly select data.
X_train, X_test, y_train, y_test = train_test_split(tranningData, labels, test_size = 0.20)
svclassifier = SVC(kernel='poly', gamma=0.5, degree=3)
svclassifier.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = svclassifier.predict(X_test)
# Model Accuracy: how often is the classifier correct?
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
# Compute the precision
# The precision is the ratio tp / (tp + fp) where tp is the number of true positives and fp the number of false positives.
# average="micro" : Calculate metrics globally by counting the total true positives, false negatives and false positives.
print("Precision score: ", metrics.precision_score(y_test, y_pred, average="micro"))
# Compute the recall
# The recall is the ratio tp / (tp + fn) where tp is the number of true positives and fn the number of false negatives
# average="macro": Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account.
print("Recall score: ", metrics.recall_score(y_test, y_pred, average="macro"))
# Compute confusion matrix
print("Confusion Matrix: \n", metrics.confusion_matrix(y_test, y_pred))