Skip to content

Commit 23a5167

Browse files
PRv0.6.0 (#5)
* Implemented Mini-Batch K-Means and Birch clustering algorithms (scikit-learn). * Implemented Agglomerative Clustering (scikit-learn). * Fixed cluster labels mapping dictionary checking procedure. * Implemented Agglomerative Clustering (scipy). * Implemented Birch clustering algorithm (pyclustering). * Implemented Cure clustering algorithm (pyclustering). * Implemented K-Means clustering algorithm (pyclustering). * Small documentation fixes and reorder clustering algorithms labels. * Implemented X-Means clustering algorithm (pyclustering). * Implemented Agglomerative clustering algorithm (fastcluster) and reorder clustering algorithms labels. * Implemented two data standardization algorithms to standardize the clustering global data matrix. * Included standardization algorithm as an optional parameter in CRATE's input data file. * Changed default self-consistent scheme and fixed CRATE's input data file documentation. * Updated CRATE's input data file to include implemented clustering algorithms.
1 parent 0638094 commit 23a5167

File tree

5 files changed

+933
-81
lines changed

5 files changed

+933
-81
lines changed

clustering/clusteringdata.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
import copy
1818
# Defining abstract base classes
1919
from abc import ABC, abstractmethod
20+
# Data preprocessing tools
21+
import sklearn.preprocessing as skpp
2022
# Display messages
2123
import ioput.info as info
2224
# RVE response database
@@ -35,6 +37,7 @@ def set_clustering_data(dirs_dict, problem_dict, mat_dict, rg_dict, clst_dict):
3537
n_voxels_dims = rg_dict['n_voxels_dims']
3638
# Get clustering data
3739
clustering_solution_method = clst_dict['clustering_solution_method']
40+
standardization_method = clst_dict['standardization_method']
3841
clustering_scheme = clst_dict['clustering_scheme']
3942
# Compute total number of voxels
4043
n_voxels = np.prod(n_voxels_dims)
@@ -65,6 +68,19 @@ def set_clustering_data(dirs_dict, problem_dict, mat_dict, rg_dict, clst_dict):
6568
info.displayinfo('5', 'Computing cluster analysis global data matrix...')
6669
# Compute clustering global data matrix containing all clustering features
6770
clustering_data.set_global_data_matrix(rve_elastic_database.rve_global_response)
71+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
72+
info.displayinfo('5', 'Standardizing cluster analysis global data matrix...')
73+
# Instantiate standardization algorithm
74+
if standardization_method == 1:
75+
standardizer = MinMaxScaler()
76+
elif standardization_method == 2:
77+
standardizer = StandardScaler()
78+
else:
79+
raise RuntimeError('Unknown standardization method.')
80+
# Standardize clustering global data matrix
81+
clustering_data.global_data_matrix = \
82+
standardizer.get_standardized_data_matrix(clustering_data.global_data_matrix)
83+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6884
# Store clustering global data matrix
6985
clst_dict['clst_quantities'] = clustering_data.global_data_matrix
7086
#
@@ -329,3 +345,76 @@ class StrainConcentrationTensor(FeatureAlgorithm):
329345
def get_feature_data_matrix(self, rve_response):
330346
data_matrix = copy.deepcopy(rve_response)
331347
return data_matrix
348+
#
349+
# Data standardization algorithms
350+
# ==========================================================================================
351+
class Standardizer(ABC):
352+
'''Data standardization algorithm interface.'''
353+
@abstractmethod
354+
def __init__(self):
355+
'''Standardization algorithm constructor.'''
356+
pass
357+
# --------------------------------------------------------------------------------------
358+
@abstractmethod
359+
def get_standardized_data_matrix(self, data_matrix):
360+
'''Standardize provided data matrix.
361+
362+
Parameters
363+
----------
364+
data_matrix: ndarray of shape (n_items, n_features)
365+
Data matrix to be standardized.
366+
367+
Returns
368+
-------
369+
data_matrix: ndarray of shape (n_items, n_features)
370+
Transformed data matrix.
371+
'''
372+
pass
373+
# ------------------------------------------------------------------------------------------
374+
class MinMaxScaler(Standardizer):
375+
'''Transform features by scaling each feature to a given min-max range.
376+
377+
Attributes
378+
----------
379+
_feature_range : tuple(min, max), default=(0, 1)
380+
Desired range of transformed data.
381+
382+
Notes
383+
-----
384+
The Min-Max scaling algorithm is taken from scikit-learn (https://scikit-learn.org).
385+
Further information can be found in there.
386+
'''
387+
def __init__(self, feature_range=(0, 1)):
388+
'''Standardization algorithm constructor.'''
389+
self._feature_range = feature_range
390+
# --------------------------------------------------------------------------------------
391+
def get_standardized_data_matrix(self, data_matrix):
392+
'''Standardize provided data matrix.'''
393+
# Instatiante standardizer
394+
standardizer = skpp.MinMaxScaler(feature_range=self._feature_range, copy=False)
395+
# Fit scaling parameters and transform data
396+
data_matrix = standardizer.fit_transform(data_matrix)
397+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
398+
return data_matrix
399+
# ------------------------------------------------------------------------------------------
400+
class StandardScaler(Standardizer):
401+
'''Transform features by removing the mean and scaling to unit variance (standard
402+
normal distribution).
403+
404+
Notes
405+
-----
406+
The Standard scaling algorithm is taken from scikit-learn (https://scikit-learn.org).
407+
Further information can be found in there.
408+
'''
409+
def __init__(self, feature_range=(0, 1)):
410+
'''Standardization algorithm constructor.'''
411+
self._feature_range = feature_range
412+
# --------------------------------------------------------------------------------------
413+
def get_standardized_data_matrix(self, data_matrix):
414+
'''Standardize provided data matrix.'''
415+
# Instatiante standardizer
416+
standardizer = skpp.StandardScaler(with_mean=True, with_std=True, copy=False)
417+
# Fit scaling parameters and transform data
418+
data_matrix = standardizer.fit_transform(data_matrix)
419+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
420+
return data_matrix

0 commit comments

Comments
 (0)