Skip to content

Instantly share code, notes, and snippets.

@gdiasag
Created December 3, 2024 20:14
Show Gist options
  • Select an option

  • Save gdiasag/af16e6aace4fdf8cd498b04158417375 to your computer and use it in GitHub Desktop.

Select an option

Save gdiasag/af16e6aace4fdf8cd498b04158417375 to your computer and use it in GitHub Desktop.

proj1

This project inplements a minimal neural network in C for classifying handwritten digits from the MNIST dataset.

Usage

Requirements

The project requires:

  • GCC compiler

  • MNIST dataset files

    • train-images.idx3-ubyte
    • train-labels.idx1-ubyte

Compilation

Sequential version:

gcc -O3 -march=native -ffast-math -o nn nn.c -lm

Run

Make sure the dataset files are properly placed in the data directory.

./nn
/**
* Sequential version:
*
* ________________________________________________________
* Executed in 256.52 secs fish external
* usr time 255.69 secs 24.00 micros 255.69 secs
* sys time 0.34 secs 982.00 micros 0.34 secs
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#define INPUT_SIZE 784
#define HIDDEN_SIZE 256
#define OUTPUT_SIZE 10
#define LEARNING_RATE 0.0005f
#define MOMENTUM 0.9f
#define EPOCHS 20
#define BATCH_SIZE 64
#define IMAGE_SIZE 28
#define TRAIN_SPLIT 0.8
#define TRAIN_IMG_PATH "data/train-images.idx3-ubyte"
#define TRAIN_LBL_PATH "data/train-labels.idx1-ubyte"
typedef struct {
float *weights;
float *biases;
float *weight_momentum;
float *bias_momentum;
int input_size;
int output_size;
} Layer;
typedef struct {
Layer hidden;
Layer output;
} Network;
typedef struct {
unsigned char *images;
unsigned char *labels;
int nImages;
} InputData;
void softmax(float *input, int size)
{
float max = input[0];
float sum = 0;
for (int i = 1; i < size; i++) {
if (input[i] > max) {
max = input[i];
}
}
for (int i = 0; i < size; i++) {
input[i] = expf(input[i] - max);
sum += input[i];
}
for (int i = 0; i < size; i++) {
input[i] /= sum;
}
}
void init_layer(Layer *layer, int in_size, int out_size)
{
int n = in_size * out_size;
layer->input_size = in_size;
layer->output_size = out_size;
layer->weights = malloc(n * sizeof(float));
layer->biases = calloc(out_size, sizeof(float));
layer->weight_momentum = calloc(n, sizeof(float));
layer->bias_momentum = calloc(out_size, sizeof(float));
float scale = sqrtf(2.0f / in_size);
for (int i = 0; i < n; i++) {
layer->weights[i] = ((float)rand() / RAND_MAX - 0.5f) * 2 * scale;
}
}
void forward(Layer *layer, float *input, float *output)
{
for (int i = 0; i < layer->output_size; i++) {
output[i] = layer->biases[i];
}
for (int j = 0; j < layer->input_size; j++) {
float in_j = input[j];
float *weight_row = &layer->weights[j * layer->output_size];
for (int i = 0; i < layer->output_size; i++) {
output[i] += in_j * weight_row[i];
}
}
for (int i = 0; i < layer->output_size; i++) {
output[i] = output[i] > 0 ? output[i] : 0;
}
}
void backward(Layer *layer, float *input, float *output_grad, float *input_grad,
float lr)
{
if (input_grad) {
for (int j = 0; j < layer->input_size; j++) {
input_grad[j] = 0.0f;
float *weight_row = &layer->weights[j * layer->output_size];
for (int i = 0; i < layer->output_size; i++) {
input_grad[j] += output_grad[i] * weight_row[i];
}
}
}
for (int j = 0; j < layer->input_size; j++) {
float in_j = input[j];
float *weight_row = &layer->weights[j * layer->output_size];
float *momentum_row = &layer->weight_momentum[j * layer->output_size];
for (int i = 0; i < layer->output_size; i++) {
float grad = output_grad[i] * in_j;
momentum_row[i] = MOMENTUM * momentum_row[i] + lr * grad;
weight_row[i] -= momentum_row[i];
if (input_grad) {
input_grad[j] += output_grad[i] * weight_row[i];
}
}
}
for (int i = 0; i < layer->output_size; i++) {
layer->bias_momentum[i] = MOMENTUM * layer->bias_momentum[i] + lr * output_grad[i];
layer->biases[i] -= layer->bias_momentum[i];
}
}
float* train(Network *net, float *input, int label, float lr) {
static float final_output[OUTPUT_SIZE];
float hidden_output[HIDDEN_SIZE];
float output_grad[OUTPUT_SIZE] = {0}, hidden_grad[HIDDEN_SIZE] = {0};
forward(&net->hidden, input, hidden_output);
forward(&net->output, hidden_output, final_output);
softmax(final_output, OUTPUT_SIZE);
for (int i = 0; i < OUTPUT_SIZE; i++) {
output_grad[i] = final_output[i] - (i == label);
}
backward(&net->output, hidden_output, output_grad, hidden_grad, lr);
for (int i = 0; i < HIDDEN_SIZE; i++) {
hidden_grad[i] *= hidden_output[i] > 0 ? 1 : 0;
}
backward(&net->hidden, input, hidden_grad, NULL, lr);
return final_output;
}
int predict(Network *net, float *input)
{
float hidden_output[HIDDEN_SIZE], final_output[OUTPUT_SIZE];
forward(&net->hidden, input, hidden_output);
forward(&net->output, hidden_output, final_output);
softmax(final_output, OUTPUT_SIZE);
int max_index = 0;
for (int i = 1; i < OUTPUT_SIZE; i++) {
if (final_output[i] > final_output[max_index]) {
max_index = i;
}
}
return max_index;
}
void read_mnist_images(const char *filename, unsigned char **images, int *nImages)
{
FILE *file = fopen(filename, "rb");
if (file == NULL) {
exit(1);
}
int temp, rows, cols;
fread(&temp, sizeof(int), 1, file);
fread(nImages, sizeof(int), 1, file);
*nImages = __builtin_bswap32(*nImages);
fread(&rows, sizeof(int), 1, file);
fread(&cols, sizeof(int), 1, file);
rows = __builtin_bswap32(rows);
cols = __builtin_bswap32(cols);
*images = malloc((*nImages) * IMAGE_SIZE * IMAGE_SIZE);
fread(*images, sizeof(unsigned char), (*nImages) * IMAGE_SIZE * IMAGE_SIZE, file);
fclose(file);
}
void read_mnist_labels(const char *filename, unsigned char **labels, int *nLabels)
{
FILE *file = fopen(filename, "rb");
if (file == NULL) {
exit(1);
}
int temp;
fread(&temp, sizeof(int), 1, file);
fread(nLabels, sizeof(int), 1, file);
*nLabels = __builtin_bswap32(*nLabels);
*labels = malloc(*nLabels);
fread(*labels, sizeof(unsigned char), *nLabels, file);
fclose(file);
}
void shuffle_data(unsigned char *images, unsigned char *labels, int n)
{
for (int i = n - 1; i > 0; i--) {
int j = rand() % (i + 1);
for (int k = 0; k < INPUT_SIZE; k++) {
unsigned char temp = images[i * INPUT_SIZE + k];
images[i * INPUT_SIZE + k] = images[j * INPUT_SIZE + k];
images[j * INPUT_SIZE + k] = temp;
}
unsigned char temp = labels[i];
labels[i] = labels[j];
labels[j] = temp;
}
}
int main() {
Network net;
InputData data = {0};
float learning_rate = LEARNING_RATE, img[INPUT_SIZE];
clock_t start, end;
double cpu_time_used;
srand(time(NULL));
init_layer(&net.hidden, INPUT_SIZE, HIDDEN_SIZE);
init_layer(&net.output, HIDDEN_SIZE, OUTPUT_SIZE);
read_mnist_images(TRAIN_IMG_PATH, &data.images, &data.nImages);
read_mnist_labels(TRAIN_LBL_PATH, &data.labels, &data.nImages);
shuffle_data(data.images, data.labels, data.nImages);
int train_size = (int)(data.nImages * TRAIN_SPLIT);
int test_size = data.nImages - train_size;
for (int epoch = 0; epoch < EPOCHS; epoch++) {
start = clock();
float total_loss = 0;
for (int i = 0; i < train_size; i++) {
for (int k = 0; k < INPUT_SIZE; k++) {
img[k] = data.images[i * INPUT_SIZE + k] / 255.0f;
}
float* final_output = train(&net, img, data.labels[i], learning_rate);
total_loss += -logf(final_output[data.labels[i]] + 1e-10f);
}
int correct = 0;
for (int i = train_size; i < data.nImages; i++) {
for (int k = 0; k < INPUT_SIZE; k++) {
img[k] = data.images[i * INPUT_SIZE + k] / 255.0f;
}
if (predict(&net, img) == data.labels[i]) {
correct++;
}
}
end = clock();
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("Epoch %d, Accuracy: %.2f%%, Avg Loss: %.4f, Time: %.2f seconds\n",
epoch + 1, (float)correct / test_size * 100, total_loss / train_size, cpu_time_used);
}
free(net.hidden.weights);
free(net.hidden.biases);
free(net.hidden.weight_momentum);
free(net.hidden.bias_momentum);
free(net.output.weights);
free(net.output.biases);
free(net.output.weight_momentum);
free(net.output.bias_momentum);
free(data.images);
free(data.labels);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment