Skip to content

Instantly share code, notes, and snippets.

@andreiz
Last active August 27, 2024 09:42
Show Gist options
  • Select an option

  • Save andreiz/4642209 to your computer and use it in GitHub Desktop.

Select an option

Save andreiz/4642209 to your computer and use it in GitHub Desktop.

Revisions

  1. andreiz revised this gist Jun 27, 2013. 1 changed file with 45 additions and 4 deletions.
    49 changes: 45 additions & 4 deletions classifier.php
    Original file line number Diff line number Diff line change
    @@ -49,6 +49,9 @@
    for ($j=0; $j < NUM_FEATURES+1; $j++)
    $weights[$j] = mt_rand()/mt_getrandmax()*5.0;

    // Calculate the data we need for feature scaling (mean/variance)
    $scaling = calc_feature_scaling($training);

    $learning_rate = 0.05;
    $steps = 20000; // number of steps to take for gradient descent

    @@ -59,9 +62,10 @@
    for ($j = 0; $j < NUM_FEATURES+1; $j++) {
    $sum_m = 0.0;
    for ($i = 0; $i < $NUM_SAMPLES; $i++) {
    $h = hypothesis($training[$i], $weights);
    $scaled_data = scale($training[$i], $scaling);
    $h = hypothesis($scaled_data, $weights);
    // The first weight has a dummy 1 "feature" value
    $part = ($h - $labels[$i]) * ($j==0 ? 1.0 : $training[$i][$j-1]);
    $part = ($h - $labels[$i]) * ($j==0 ? 1.0 : $scaled_data[$j-1]);
    $sum_m = $sum_m + $part;
    }
    $temp[$j] = $weights[$j] - $learning_rate * $sum_m/$NUM_SAMPLES;
    @@ -77,7 +81,7 @@
    print "\nValidating training\n";
    $correct = 0;
    for ($i = 0; $i < $NUM_SAMPLES; $i++) {
    $predict = predict($training[$i], $weights);
    $predict = predict(scale($training[$i], $scaling), $weights);
    printf("Input: %-16s actual: %d, predict: %d", vector_to_str($training[$i]), $labels[$i], $predict);
    if ($labels[$i] != $predict)
    print " - miss";
    @@ -96,7 +100,7 @@
    array(-14., 1.1, 1.),
    );
    for ($i = 0; $i < sizeof($test); $i++) {
    $predict = predict($test[$i], $weights);
    $predict = predict(scale($test[$i], $scaling), $weights);
    printf("Input: %-16s predict: %d\n", vector_to_str($test[$i]), $predict);
    }

    @@ -122,6 +126,43 @@ function predict($input, $weights)
    return $predict;
    }

    function scale($input, $scaling)
    {
    foreach ($input as $f => &$value) {
    $value = ($value - $scaling['mean'][$f]) /
    $scaling['variance'][$f];
    }
    return $input;
    }

    function calc_feature_scaling($data)
    {
    $mins = array_fill(0, NUM_FEATURES, INF);
    $maxs = array_fill(0, NUM_FEATURES, -INF);
    $sums = array_fill(0, NUM_FEATURES, 0);
    $scaling = array('mean' => array(),
    'variance' => array());
    $N = sizeof($data);
    foreach ($data as $i => $row) {
    foreach ($row as $f => $value) {
    if ($value > $maxs[$f])
    $maxs[$f] = $value;
    if ($value < $mins[$f])
    $mins[$f] = $value;
    $sums[$f] += $value;
    }
    }

    for ($f = 0; $f < NUM_FEATURES; $f++) {
    $scaling['mean'][$f] = $sums[$f] / $N;
    $scaling['variance'][$f] = $maxs[$f] - $mins[$f];
    if ($scaling['variance'][$f] == 0)
    throw new Exception("Feature #$f has the same value in all the samples, invalid data");
    }

    return $scaling;
    }

    function vector_to_str($x)
    {
    return '['.implode(", ", $x).']';
  2. andreiz revised this gist Feb 22, 2013. 1 changed file with 6 additions and 3 deletions.
    9 changes: 6 additions & 3 deletions classifier.php
    Original file line number Diff line number Diff line change
    @@ -51,8 +51,8 @@

    $learning_rate = 0.05;
    $steps = 20000; // number of steps to take for gradient descent
    $temp = array(); // temp array to hold updates for weights during the loop

    $temp = array(); // temp array to hold updates for weights during the loop
    for ($n = 0; $n < $steps; $n++) {

    // For each weight, perform the gradient descent step and save the result to temp
    @@ -78,7 +78,10 @@
    $correct = 0;
    for ($i = 0; $i < $NUM_SAMPLES; $i++) {
    $predict = predict($training[$i], $weights);
    printf("Input: %s, actual: %d, predict: %d\n", vector_to_str($training[$i]), $labels[$i], $predict);
    printf("Input: %-16s actual: %d, predict: %d", vector_to_str($training[$i]), $labels[$i], $predict);
    if ($labels[$i] != $predict)
    print " - miss";
    print "\n";
    if ($predict == $labels[$i])
    $correct++;
    }
    @@ -94,7 +97,7 @@
    );
    for ($i = 0; $i < sizeof($test); $i++) {
    $predict = predict($test[$i], $weights);
    printf("Input: %s, predict: %d\n", vector_to_str($test[$i]), $predict);
    printf("Input: %-16s predict: %d\n", vector_to_str($test[$i]), $predict);
    }

    function hypothesis($x, $weights)
  3. andreiz created this gist Jan 26, 2013.
    127 changes: 127 additions & 0 deletions classifier.php
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,127 @@
    <?php

    error_reporting(E_ALL);

    define('NUM_FEATURES', 3);

    // My dataset describes cities around the world where I might consider living.
    // Each sample (city) consists of 3 features:
    // * Feature 1: average low winter temperature in the city
    // * Feature 2: city population, in millions
    // * Feature 3: does the city have an airport I can fly to from USA directly?
    //
    // The labels (categories) are 1 (yes) and 0 (no).
    // All the data is floating-point.

    $training = array(
    array(-11., 2.6, 1.),
    array( 8., 0.78, 1.),
    array( 15., 4.2, 0.),
    array(-16., 0.18, 0.),
    array( 3., 1.1, 0.),
    array( 7., 1.4, 1.),
    array( -3., 1.44, 1.),
    array( -7., 0.52, 0.),
    array( 30., 0.82, 1.),
    array( 20., 1.32, 0.),
    );

    $labels = array(
    0.,
    1.,
    0.,
    0.,
    1.,
    1.,
    1.,
    0.,
    0.,
    1
    );

    $NUM_SAMPLES = sizeof($training);

    // Initialize the weights array to random starting values.
    // There are always 1+NUM_FEATURES weights, because the first weight
    // does not correspond to a feature value, since:
    // weights * features = weight0 + weight1 * feature1 + weight2 * feature2 + ...
    $weights = array();
    for ($j=0; $j < NUM_FEATURES+1; $j++)
    $weights[$j] = mt_rand()/mt_getrandmax()*5.0;

    $learning_rate = 0.05;
    $steps = 20000; // number of steps to take for gradient descent
    $temp = array(); // temp array to hold updates for weights during the loop

    for ($n = 0; $n < $steps; $n++) {

    // For each weight, perform the gradient descent step and save the result to temp
    for ($j = 0; $j < NUM_FEATURES+1; $j++) {
    $sum_m = 0.0;
    for ($i = 0; $i < $NUM_SAMPLES; $i++) {
    $h = hypothesis($training[$i], $weights);
    // The first weight has a dummy 1 "feature" value
    $part = ($h - $labels[$i]) * ($j==0 ? 1.0 : $training[$i][$j-1]);
    $sum_m = $sum_m + $part;
    }
    $temp[$j] = $weights[$j] - $learning_rate * $sum_m/$NUM_SAMPLES;
    }

    $weights = $temp;
    }

    echo "Executed $n steps\n";
    echo "Weights: ", vector_to_str($weights), "\n";

    // Validate the results
    print "\nValidating training\n";
    $correct = 0;
    for ($i = 0; $i < $NUM_SAMPLES; $i++) {
    $predict = predict($training[$i], $weights);
    printf("Input: %s, actual: %d, predict: %d\n", vector_to_str($training[$i]), $labels[$i], $predict);
    if ($predict == $labels[$i])
    $correct++;
    }
    printf("Correctness = %.0f%%\n", $correct/$NUM_SAMPLES*100.0);

    // Try some predictions
    print "\nTesting the model\n";
    $test = array(
    array(-1., 1.1, 1.),
    array(23., 0.9, 0.),
    array( 4., 1.9, 0.),
    array(-14., 1.1, 1.),
    );
    for ($i = 0; $i < sizeof($test); $i++) {
    $predict = predict($test[$i], $weights);
    printf("Input: %s, predict: %d\n", vector_to_str($test[$i]), $predict);
    }

    function hypothesis($x, $weights)
    {
    $score = $weights[0]; // free weight
    $k = sizeof($x);
    // Calculate dot product
    for ($i = 0; $i < $k; $i++)
    $score += $weights[$i+1] * $x[$i];
    // Run through the sigmoid (logistic) function
    return 1.0/(1.0 + exp(-$score));
    }

    function predict($input, $weights)
    {
    $output = hypothesis($input, $weights);
    // Threshold on 0.5
    if ($output >= 0.50)
    $predict = 1;
    else
    $predict = 0;
    return $predict;
    }

    function vector_to_str($x)
    {
    return '['.implode(", ", $x).']';
    }

    ?>