using System; using System.Data; using System.Linq; using System.Collections.Generic; using System.Reflection; using System.ComponentModel; namespace PricePrediction.MachineLearning { /// /// Standardize features by removing the mean and scaling to unit variance. /// more : https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html /// public class StandardScaler where T : new() { private List _mean; private List _standardDeviation; /// /// fit then transform /// /// /// public List FitTransform(List listOfObjects) { return Fit(listOfObjects).Transform(listOfObjects); } /// /// Reset then /// /// /// public StandardScaler Fit(List listOfObjects) { _mean = new(); _standardDeviation = new(); if (listOfObjects.Count < 1) throw new Exception("no data"); var dt = listOfObjects.ToArraysOfColumns(); for (int i = 0; i < dt.Length; i++) { _mean.Add(dt[i].Average()); _standardDeviation.Add(Calculations.StandardDeviation(dt[i])); } return this; } /// /// Get /// /// /// public List Transform(List listOfObjects) { if (_mean == null) throw new Exception("This StandardScaler instance is not fitted yet. Call 'Fit' with appropriate arguments before using this estimator."); //if (dt.Columns.Count != _mean.Count) // throw new Exception("number of fitted columns not same as current one"); var dt = listOfObjects.ToArraysOfColumns(); for (int c = 0; c < dt.Length; c++) for (int r = 0; r < dt[c].Length; r++) dt[c][r] = (dt[c][r] - _mean[c]) / _standardDeviation[c]; return ToListOfObject(dt); } private static List ToListOfObject(double[][] arr) { var res = new List(); PropertyDescriptorCollection properties = TypeDescriptor.GetProperties(typeof(T)); var ObjectsCount = arr[0].Length; for (int i = 0; i < ObjectsCount; i++) { T o = new(); for (int j = 0; j < properties.Count; j++) properties[j].SetValue(o, arr[j][i]); res.Add(o); } return res; } } }