17
17
import copy
18
18
# Defining abstract base classes
19
19
from abc import ABC , abstractmethod
20
+ # Data preprocessing tools
21
+ import sklearn .preprocessing as skpp
20
22
# Display messages
21
23
import ioput .info as info
22
24
# RVE response database
@@ -35,6 +37,7 @@ def set_clustering_data(dirs_dict, problem_dict, mat_dict, rg_dict, clst_dict):
35
37
n_voxels_dims = rg_dict ['n_voxels_dims' ]
36
38
# Get clustering data
37
39
clustering_solution_method = clst_dict ['clustering_solution_method' ]
40
+ standardization_method = clst_dict ['standardization_method' ]
38
41
clustering_scheme = clst_dict ['clustering_scheme' ]
39
42
# Compute total number of voxels
40
43
n_voxels = np .prod (n_voxels_dims )
@@ -65,6 +68,19 @@ def set_clustering_data(dirs_dict, problem_dict, mat_dict, rg_dict, clst_dict):
65
68
info .displayinfo ('5' , 'Computing cluster analysis global data matrix...' )
66
69
# Compute clustering global data matrix containing all clustering features
67
70
clustering_data .set_global_data_matrix (rve_elastic_database .rve_global_response )
71
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
72
+ info .displayinfo ('5' , 'Standardizing cluster analysis global data matrix...' )
73
+ # Instantiate standardization algorithm
74
+ if standardization_method == 1 :
75
+ standardizer = MinMaxScaler ()
76
+ elif standardization_method == 2 :
77
+ standardizer = StandardScaler ()
78
+ else :
79
+ raise RuntimeError ('Unknown standardization method.' )
80
+ # Standardize clustering global data matrix
81
+ clustering_data .global_data_matrix = \
82
+ standardizer .get_standardized_data_matrix (clustering_data .global_data_matrix )
83
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
68
84
# Store clustering global data matrix
69
85
clst_dict ['clst_quantities' ] = clustering_data .global_data_matrix
70
86
#
@@ -329,3 +345,76 @@ class StrainConcentrationTensor(FeatureAlgorithm):
329
345
def get_feature_data_matrix (self , rve_response ):
330
346
data_matrix = copy .deepcopy (rve_response )
331
347
return data_matrix
348
+ #
349
+ # Data standardization algorithms
350
+ # ==========================================================================================
351
+ class Standardizer (ABC ):
352
+ '''Data standardization algorithm interface.'''
353
+ @abstractmethod
354
+ def __init__ (self ):
355
+ '''Standardization algorithm constructor.'''
356
+ pass
357
+ # --------------------------------------------------------------------------------------
358
+ @abstractmethod
359
+ def get_standardized_data_matrix (self , data_matrix ):
360
+ '''Standardize provided data matrix.
361
+
362
+ Parameters
363
+ ----------
364
+ data_matrix: ndarray of shape (n_items, n_features)
365
+ Data matrix to be standardized.
366
+
367
+ Returns
368
+ -------
369
+ data_matrix: ndarray of shape (n_items, n_features)
370
+ Transformed data matrix.
371
+ '''
372
+ pass
373
+ # ------------------------------------------------------------------------------------------
374
+ class MinMaxScaler (Standardizer ):
375
+ '''Transform features by scaling each feature to a given min-max range.
376
+
377
+ Attributes
378
+ ----------
379
+ _feature_range : tuple(min, max), default=(0, 1)
380
+ Desired range of transformed data.
381
+
382
+ Notes
383
+ -----
384
+ The Min-Max scaling algorithm is taken from scikit-learn (https://scikit-learn.org).
385
+ Further information can be found in there.
386
+ '''
387
+ def __init__ (self , feature_range = (0 , 1 )):
388
+ '''Standardization algorithm constructor.'''
389
+ self ._feature_range = feature_range
390
+ # --------------------------------------------------------------------------------------
391
+ def get_standardized_data_matrix (self , data_matrix ):
392
+ '''Standardize provided data matrix.'''
393
+ # Instatiante standardizer
394
+ standardizer = skpp .MinMaxScaler (feature_range = self ._feature_range , copy = False )
395
+ # Fit scaling parameters and transform data
396
+ data_matrix = standardizer .fit_transform (data_matrix )
397
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
398
+ return data_matrix
399
+ # ------------------------------------------------------------------------------------------
400
+ class StandardScaler (Standardizer ):
401
+ '''Transform features by removing the mean and scaling to unit variance (standard
402
+ normal distribution).
403
+
404
+ Notes
405
+ -----
406
+ The Standard scaling algorithm is taken from scikit-learn (https://scikit-learn.org).
407
+ Further information can be found in there.
408
+ '''
409
+ def __init__ (self , feature_range = (0 , 1 )):
410
+ '''Standardization algorithm constructor.'''
411
+ self ._feature_range = feature_range
412
+ # --------------------------------------------------------------------------------------
413
+ def get_standardized_data_matrix (self , data_matrix ):
414
+ '''Standardize provided data matrix.'''
415
+ # Instatiante standardizer
416
+ standardizer = skpp .StandardScaler (with_mean = True , with_std = True , copy = False )
417
+ # Fit scaling parameters and transform data
418
+ data_matrix = standardizer .fit_transform (data_matrix )
419
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
420
+ return data_matrix
0 commit comments