Last active
August 27, 2024 09:42
-
-
Save andreiz/4642209 to your computer and use it in GitHub Desktop.
Revisions
-
andreiz revised this gist
Jun 27, 2013 . 1 changed file with 45 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -49,6 +49,9 @@ for ($j=0; $j < NUM_FEATURES+1; $j++) $weights[$j] = mt_rand()/mt_getrandmax()*5.0; // Calculate the data we need for feature scaling (mean/variance) $scaling = calc_feature_scaling($training); $learning_rate = 0.05; $steps = 20000; // number of steps to take for gradient descent @@ -59,9 +62,10 @@ for ($j = 0; $j < NUM_FEATURES+1; $j++) { $sum_m = 0.0; for ($i = 0; $i < $NUM_SAMPLES; $i++) { $scaled_data = scale($training[$i], $scaling); $h = hypothesis($scaled_data, $weights); // The first weight has a dummy 1 "feature" value $part = ($h - $labels[$i]) * ($j==0 ? 1.0 : $scaled_data[$j-1]); $sum_m = $sum_m + $part; } $temp[$j] = $weights[$j] - $learning_rate * $sum_m/$NUM_SAMPLES; @@ -77,7 +81,7 @@ print "\nValidating training\n"; $correct = 0; for ($i = 0; $i < $NUM_SAMPLES; $i++) { $predict = predict(scale($training[$i], $scaling), $weights); printf("Input: %-16s actual: %d, predict: %d", vector_to_str($training[$i]), $labels[$i], $predict); if ($labels[$i] != $predict) print " - miss"; @@ -96,7 +100,7 @@ array(-14., 1.1, 1.), ); for ($i = 0; $i < sizeof($test); $i++) { $predict = predict(scale($test[$i], $scaling), $weights); printf("Input: %-16s predict: %d\n", vector_to_str($test[$i]), $predict); } @@ -122,6 +126,43 @@ function predict($input, $weights) return $predict; } function scale($input, $scaling) { foreach ($input as $f => &$value) { $value = ($value - $scaling['mean'][$f]) / $scaling['variance'][$f]; } return $input; } function calc_feature_scaling($data) { $mins = array_fill(0, NUM_FEATURES, INF); $maxs = array_fill(0, NUM_FEATURES, -INF); $sums = array_fill(0, NUM_FEATURES, 0); $scaling = array('mean' => array(), 'variance' => array()); $N = sizeof($data); foreach ($data as $i => $row) { foreach ($row as $f => $value) { if ($value > $maxs[$f]) $maxs[$f] = $value; if ($value < $mins[$f]) $mins[$f] = $value; $sums[$f] += $value; } } for ($f = 0; $f < NUM_FEATURES; $f++) { $scaling['mean'][$f] = $sums[$f] / $N; $scaling['variance'][$f] = $maxs[$f] - $mins[$f]; if ($scaling['variance'][$f] == 0) throw new Exception("Feature #$f has the same value in all the samples, invalid data"); } return $scaling; } function vector_to_str($x) { return '['.implode(", ", $x).']'; -
andreiz revised this gist
Feb 22, 2013 . 1 changed file with 6 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -51,8 +51,8 @@ $learning_rate = 0.05; $steps = 20000; // number of steps to take for gradient descent $temp = array(); // temp array to hold updates for weights during the loop for ($n = 0; $n < $steps; $n++) { // For each weight, perform the gradient descent step and save the result to temp @@ -78,7 +78,10 @@ $correct = 0; for ($i = 0; $i < $NUM_SAMPLES; $i++) { $predict = predict($training[$i], $weights); printf("Input: %-16s actual: %d, predict: %d", vector_to_str($training[$i]), $labels[$i], $predict); if ($labels[$i] != $predict) print " - miss"; print "\n"; if ($predict == $labels[$i]) $correct++; } @@ -94,7 +97,7 @@ ); for ($i = 0; $i < sizeof($test); $i++) { $predict = predict($test[$i], $weights); printf("Input: %-16s predict: %d\n", vector_to_str($test[$i]), $predict); } function hypothesis($x, $weights) -
andreiz created this gist
Jan 26, 2013 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,127 @@ <?php error_reporting(E_ALL); define('NUM_FEATURES', 3); // My dataset describes cities around the world where I might consider living. // Each sample (city) consists of 3 features: // * Feature 1: average low winter temperature in the city // * Feature 2: city population, in millions // * Feature 3: does the city have an airport I can fly to from USA directly? // // The labels (categories) are 1 (yes) and 0 (no). // All the data is floating-point. $training = array( array(-11., 2.6, 1.), array( 8., 0.78, 1.), array( 15., 4.2, 0.), array(-16., 0.18, 0.), array( 3., 1.1, 0.), array( 7., 1.4, 1.), array( -3., 1.44, 1.), array( -7., 0.52, 0.), array( 30., 0.82, 1.), array( 20., 1.32, 0.), ); $labels = array( 0., 1., 0., 0., 1., 1., 1., 0., 0., 1 ); $NUM_SAMPLES = sizeof($training); // Initialize the weights array to random starting values. // There are always 1+NUM_FEATURES weights, because the first weight // does not correspond to a feature value, since: // weights * features = weight0 + weight1 * feature1 + weight2 * feature2 + ... $weights = array(); for ($j=0; $j < NUM_FEATURES+1; $j++) $weights[$j] = mt_rand()/mt_getrandmax()*5.0; $learning_rate = 0.05; $steps = 20000; // number of steps to take for gradient descent $temp = array(); // temp array to hold updates for weights during the loop for ($n = 0; $n < $steps; $n++) { // For each weight, perform the gradient descent step and save the result to temp for ($j = 0; $j < NUM_FEATURES+1; $j++) { $sum_m = 0.0; for ($i = 0; $i < $NUM_SAMPLES; $i++) { $h = hypothesis($training[$i], $weights); // The first weight has a dummy 1 "feature" value $part = ($h - $labels[$i]) * ($j==0 ? 1.0 : $training[$i][$j-1]); $sum_m = $sum_m + $part; } $temp[$j] = $weights[$j] - $learning_rate * $sum_m/$NUM_SAMPLES; } $weights = $temp; } echo "Executed $n steps\n"; echo "Weights: ", vector_to_str($weights), "\n"; // Validate the results print "\nValidating training\n"; $correct = 0; for ($i = 0; $i < $NUM_SAMPLES; $i++) { $predict = predict($training[$i], $weights); printf("Input: %s, actual: %d, predict: %d\n", vector_to_str($training[$i]), $labels[$i], $predict); if ($predict == $labels[$i]) $correct++; } printf("Correctness = %.0f%%\n", $correct/$NUM_SAMPLES*100.0); // Try some predictions print "\nTesting the model\n"; $test = array( array(-1., 1.1, 1.), array(23., 0.9, 0.), array( 4., 1.9, 0.), array(-14., 1.1, 1.), ); for ($i = 0; $i < sizeof($test); $i++) { $predict = predict($test[$i], $weights); printf("Input: %s, predict: %d\n", vector_to_str($test[$i]), $predict); } function hypothesis($x, $weights) { $score = $weights[0]; // free weight $k = sizeof($x); // Calculate dot product for ($i = 0; $i < $k; $i++) $score += $weights[$i+1] * $x[$i]; // Run through the sigmoid (logistic) function return 1.0/(1.0 + exp(-$score)); } function predict($input, $weights) { $output = hypothesis($input, $weights); // Threshold on 0.5 if ($output >= 0.50) $predict = 1; else $predict = 0; return $predict; } function vector_to_str($x) { return '['.implode(", ", $x).']'; } ?>