using System;
using System.Data;
using System.Linq;
using System.Collections.Generic;
using System.Reflection;
using System.ComponentModel;
namespace PricePrediction.MachineLearning
{
///
/// Standardize features by removing the mean and scaling to unit variance.
/// more : https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
///
public class StandardScaler where T : new()
{
private List _mean;
private List _standardDeviation;
///
/// fit then transform
///
///
///
public List FitTransform(List listOfObjects)
{
return Fit(listOfObjects).Transform(listOfObjects);
}
///
/// Reset then
///
///
///
public StandardScaler Fit(List listOfObjects)
{
_mean = new();
_standardDeviation = new();
if (listOfObjects.Count < 1)
throw new Exception("no data");
var dt = listOfObjects.ToArraysOfColumns();
for (int i = 0; i < dt.Length; i++)
{
_mean.Add(dt[i].Average());
_standardDeviation.Add(Calculations.StandardDeviation(dt[i]));
}
return this;
}
///
/// Get
///
///
///
public List Transform(List listOfObjects)
{
if (_mean == null)
throw new Exception("This StandardScaler instance is not fitted yet. Call 'Fit' with appropriate arguments before using this estimator.");
//if (dt.Columns.Count != _mean.Count)
// throw new Exception("number of fitted columns not same as current one");
var dt = listOfObjects.ToArraysOfColumns();
for (int c = 0; c < dt.Length; c++)
for (int r = 0; r < dt[c].Length; r++)
dt[c][r] = (dt[c][r] - _mean[c]) / _standardDeviation[c];
return ToListOfObject(dt);
}
private static List ToListOfObject(double[][] arr)
{
var res = new List();
PropertyDescriptorCollection properties = TypeDescriptor.GetProperties(typeof(T));
var ObjectsCount = arr[0].Length;
for (int i = 0; i < ObjectsCount; i++)
{
T o = new();
for (int j = 0; j < properties.Count; j++)
properties[j].SetValue(o, arr[j][i]);
res.Add(o);
}
return res;
}
}
}