Source code for moonstone.normalization.processed.scaling_normalization

import logging
import numpy as np
from sklearn import preprocessing

from moonstone.normalization.processed.base import BaseScaler
from moonstone.analysis import stats

logger = logging.getLogger(__name__)


[docs]class StandardScaler(BaseScaler):
    """
    ML algorithms such as SVM assume that all features are centered around zero and have
    similar variance. Scikit-learn module preprocessing.scale performs this normalization on a single array.
    More info at : https://scikit-learn.org/stable/modules/preprocessing.html
    :return:
    """

[docs]    def __init__(self, raw_x):
        super().__init__(raw_x)
        logger.info(f'Starting instance of {__class__.__name__} in {__name__}.')
        if not isinstance(self.raw_x, np.ndarray):
            raise ValueError('A NumPy array is required for normalization. Got {}'.format(type(self.raw_x)))

[docs]    def scale(self):
        """
        Takes a NumPy array of the independent variables, or features, as 'x' for ML training.
        """

        logger.info("Counts standardized by Standard Scalar. Check Mean ~0.0 and similar Variances:")
        scaled_x = preprocessing.scale(self.raw_x, with_mean=True, with_std=True)
        stats.normalized_stats(scaled_x)
        return scaled_x