NOTE: The number of epochs in the call to nn_train needs to be increased to show reliable results.
/*
* Simple Neural Network Library in C
* Single-file implementation for easy compilation
*
* Compile: gcc -Wall -Wextra -std=c99 -O2 -o nn nn.c -lm
* Run: ./nn
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
/* ==================== Type Definitions ==================== */
/* Function pointer types for activation functions */
typedef double (*activation_fn)(double x);
typedef double (*activation_deriv_fn)(double x);
/* Layer structure - represents a fully connected layer */
typedef struct {
int input_size; /* Number of inputs to this layer */
int output_size; /* Number of neurons in this layer */
double *weights; /* Weight matrix [output_size][input_size] */
double *biases; /* Bias vector [output_size] */
double *outputs; /* Activated outputs [output_size] */
double *pre_activation; /* Pre-activation values [output_size] */
double *deltas; /* Error gradients [output_size] */
double *weight_grads; /* Weight gradients [output_size][input_size] */
double *bias_grads; /* Bias gradients [output_size] */
activation_fn activate; /* Activation function */
activation_deriv_fn activate_deriv;/* Activation derivative */
} nn_layer_t;
/* Network structure - contains all layers */
typedef struct {
int num_layers; /* Number of layers (excluding input) */
nn_layer_t **layers; /* Array of layer pointers */
double learning_rate; /* Learning rate for SGD */
double **layer_inputs; /* Store inputs for each layer */
} nn_network_t;
/* ==================== Activation Functions ==================== */
/* Sigmoid activation: 1 / (1 + e^-x) */
static double nn_sigmoid(double x) {
return 1.0 / (1.0 + exp(-x));
}
/* Sigmoid derivative: sigmoid(x) * (1 - sigmoid(x)) */
static double nn_sigmoid_deriv(double x) {
double s = nn_sigmoid(x);
return s * (1.0 - s);
}
/* ReLU activation: max(0, x) */
static double nn_relu(double x) {
return x > 0.0 ? x : 0.0;
}
/* ReLU derivative: 1 if x > 0, else 0 */
static double nn_relu_deriv(double x) {
return x > 0.0 ? 1.0 : 0.0;
}
/* Tanh activation */
static double nn_tanh(double x) {
return tanh(x);
}
/* Tanh derivative: 1 - tanh(x)^2 */
static double nn_tanh_deriv(double x) {
double t = tanh(x);
return 1.0 - t * t;
}
/* Linear activation (identity) */
static double nn_linear(double x) {
return x;
}
/* Linear derivative (always 1) */
static double nn_linear_deriv(double x) {
(void)x; /* Unused parameter */
return 1.0;
}
/* ==================== Layer Functions ==================== */
/* Create a new layer with specified dimensions */
static nn_layer_t* nn_create_layer(int input_size, int output_size,
activation_fn activate,
activation_deriv_fn activate_deriv) {
nn_layer_t *layer = (nn_layer_t*)malloc(sizeof(nn_layer_t));
if (!layer) {
fprintf(stderr, "Error: Failed to allocate layer\n");
exit(EXIT_FAILURE);
}
layer->input_size = input_size;
layer->output_size = output_size;
/* Allocate weight matrix: output_size rows, input_size columns */
layer->weights = (double*)malloc(output_size * input_size * sizeof(double));
layer->biases = (double*)malloc(output_size * sizeof(double));
layer->outputs = (double*)malloc(output_size * sizeof(double));
layer->pre_activation = (double*)malloc(output_size * sizeof(double));
layer->deltas = (double*)malloc(output_size * sizeof(double));
layer->weight_grads = (double*)malloc(output_size * input_size * sizeof(double));
layer->bias_grads = (double*)malloc(output_size * sizeof(double));
layer->activate = activate;
layer->activate_deriv = activate_deriv;
/* Check all allocations succeeded */
if (!layer->weights || !layer->biases || !layer->outputs ||
!layer->pre_activation || !layer->deltas ||
!layer->weight_grads || !layer->bias_grads) {
fprintf(stderr, "Error: Failed to allocate layer arrays\n");
exit(EXIT_FAILURE);
}
return layer;
}
/* Free a layer and all its memory */
static void nn_free_layer(nn_layer_t *layer) {
if (!layer) return;
free(layer->weights);
free(layer->biases);
free(layer->outputs);
free(layer->pre_activation);
free(layer->deltas);
free(layer->weight_grads);
free(layer->bias_grads);
free(layer);
}
/* Initialize layer weights with small random values */
static void nn_init_layer_weights(nn_layer_t *layer, unsigned int seed) {
srand(seed);
/* Xavier initialization scaled for sigmoid */
double scale = sqrt(6.0 / (layer->input_size + layer->output_size));
for (int i = 0; i < layer->output_size * layer->input_size; i++) {
layer->weights[i] = ((double)rand() / RAND_MAX - 0.5) * 2.0 * scale;
layer->weight_grads[i] = 0.0;
}
for (int i = 0; i < layer->output_size; i++) {
layer->biases[i] = 0.0;
layer->bias_grads[i] = 0.0;
}
}
/* ==================== Network Functions ==================== */
/* Create a neural network with specified layer sizes */
nn_network_t* nn_create_network(const int *layer_sizes, int num_layers,
double learning_rate) {
if (num_layers < 2) {
fprintf(stderr, "Error: Network needs at least 2 layers (input + output)\n");
return NULL;
}
nn_network_t *net = (nn_network_t*)malloc(sizeof(nn_network_t));
if (!net) {
fprintf(stderr, "Error: Failed to allocate network\n");
return NULL;
}
/* num_layers - 1 because first layer is input layer (no computation) */
net->num_layers = num_layers - 1;
net->learning_rate = learning_rate;
net->layers = (nn_layer_t**)malloc(net->num_layers * sizeof(nn_layer_t*));
net->layer_inputs = (double**)malloc(net->num_layers * sizeof(double*));
if (!net->layers || !net->layer_inputs) {
fprintf(stderr, "Error: Failed to allocate network arrays\n");
free(net);
return NULL;
}
/* Create each layer */
for (int i = 0; i < net->num_layers; i++) {
net->layers[i] = nn_create_layer(layer_sizes[i], layer_sizes[i + 1],
nn_sigmoid, nn_sigmoid_deriv);
net->layer_inputs[i] = (double*)malloc(layer_sizes[i] * sizeof(double));
if (!net->layer_inputs[i]) {
fprintf(stderr, "Error: Failed to allocate layer inputs\n");
exit(EXIT_FAILURE);
}
}
return net;
}
/* Free network and all its layers */
void nn_free_network(nn_network_t *net) {
if (!net) return;
for (int i = 0; i < net->num_layers; i++) {
nn_free_layer(net->layers[i]);
free(net->layer_inputs[i]);
}
free(net->layers);
free(net->layer_inputs);
free(net);
}
/* Initialize all network weights */
void nn_init_weights(nn_network_t *net, unsigned int seed) {
for (int i = 0; i < net->num_layers; i++) {
nn_init_layer_weights(net->layers[i], seed + i);
}
}
/* Set activation function for all layers */
void nn_set_activation(nn_network_t *net, activation_fn activate,
activation_deriv_fn activate_deriv) {
for (int i = 0; i < net->num_layers; i++) {
net->layers[i]->activate = activate;
net->layers[i]->activate_deriv = activate_deriv;
}
}
/* Set activation function for specific layer */
void nn_set_layer_activation(nn_network_t *net, int layer_idx,
activation_fn activate,
activation_deriv_fn activate_deriv) {
if (layer_idx < 0 || layer_idx >= net->num_layers) {
fprintf(stderr, "Error: Invalid layer index %d\n", layer_idx);
return;
}
net->layers[layer_idx]->activate = activate;
net->layers[layer_idx]->activate_deriv = activate_deriv;
}
/* ==================== Forward Pass ==================== */
/* Perform forward propagation through the network */
double* nn_forward(nn_network_t *net, const double *input) {
double *current_input = (double*)input;
for (int i = 0; i < net->num_layers; i++) {
nn_layer_t *layer = net->layers[i];
/* Store input for backpropagation */
memcpy(net->layer_inputs[i], current_input,
layer->input_size * sizeof(double));
/* Compute weighted sum + bias for each neuron */
for (int j = 0; j < layer->output_size; j++) {
double sum = layer->biases[j];
for (int k = 0; k < layer->input_size; k++) {
sum += layer->weights[j * layer->input_size + k] * current_input[k];
}
layer->pre_activation[j] = sum;
layer->outputs[j] = layer->activate(sum);
}
/* Output becomes input for next layer */
current_input = layer->outputs;
}
return current_input;
}
/* ==================== Backward Pass ==================== */
/* Perform backpropagation to compute gradients */
void nn_backward(nn_network_t *net, const double *target) {
nn_layer_t *last_layer = net->layers[net->num_layers - 1];
/* Output layer: compute error directly from target */
for (int i = 0; i < last_layer->output_size; i++) {
double error = last_layer->outputs[i] - target[i];
last_layer->deltas[i] = error * last_layer->activate_deriv(last_layer->pre_activation[i]);
}
/* Hidden layers: propagate error backwards */
for (int i = net->num_layers - 2; i >= 0; i--) {
nn_layer_t *layer = net->layers[i];
nn_layer_t *next_layer = net->layers[i + 1];
for (int j = 0; j < layer->output_size; j++) {
double error = 0.0;
/* Sum weighted deltas from next layer */
for (int k = 0; k < next_layer->output_size; k++) {
error += next_layer->deltas[k] * next_layer->weights[k * next_layer->input_size + j];
}
layer->deltas[j] = error * layer->activate_deriv(layer->pre_activation[j]);
}
}
/* Accumulate gradients for all layers */
for (int i = 0; i < net->num_layers; i++) {
nn_layer_t *layer = net->layers[i];
double *inputs = net->layer_inputs[i];
for (int j = 0; j < layer->output_size; j++) {
layer->bias_grads[j] += layer->deltas[j];
for (int k = 0; k < layer->input_size; k++) {
layer->weight_grads[j * layer->input_size + k] +=
layer->deltas[j] * inputs[k];
}
}
}
}
/* Update weights using accumulated gradients */
static void nn_update_weights(nn_network_t *net, int batch_size) {
double lr = net->learning_rate / batch_size;
for (int i = 0; i < net->num_layers; i++) {
nn_layer_t *layer = net->layers[i];
for (int j = 0; j < layer->output_size * layer->input_size; j++) {
layer->weights[j] -= lr * layer->weight_grads[j];
layer->weight_grads[j] = 0.0;
}
for (int j = 0; j < layer->output_size; j++) {
layer->biases[j] -= lr * layer->bias_grads[j];
layer->bias_grads[j] = 0.0;
}
}
}
/* ==================== Training ==================== */
/* Compute Mean Squared Error loss */
double nn_mse_loss(const double *predicted, const double *target, int size) {
double sum = 0.0;
for (int i = 0; i < size; i++) {
double diff = predicted[i] - target[i];
sum += diff * diff;
}
return sum / size;
}
/* Train the network on a dataset */
void nn_train(nn_network_t *net, double **inputs, double **targets,
int num_samples, int epochs, int verbose) {
int output_size = net->layers[net->num_layers - 1]->output_size;
for (int epoch = 0; epoch < epochs; epoch++) {
double total_loss = 0.0;
/* Forward and backward pass for each sample */
for (int s = 0; s < num_samples; s++) {
nn_forward(net, inputs[s]);
nn_backward(net, targets[s]);
double *output = net->layers[net->num_layers - 1]->outputs;
total_loss += nn_mse_loss(output, targets[s], output_size);
}
/* Update weights after processing all samples */
nn_update_weights(net, num_samples);
/* Print progress */
if (verbose && (epoch + 1) % 100 == 0) {
printf("Epoch %5d, Loss: %.6f\n", epoch + 1, total_loss / num_samples);
}
}
}
/* Get prediction for a single input */
double* nn_predict(nn_network_t *net, const double *input) {
return nn_forward(net, input);
}
/* ==================== Utility Functions ==================== */
/* Print network architecture */
void nn_print_architecture(nn_network_t *net) {
printf("Network Architecture:\n");
printf(" Layers: %d\n", net->num_layers + 1);
printf(" Learning Rate: %.4f\n", net->learning_rate);
printf(" Structure: ");
for (int i = 0; i < net->num_layers; i++) {
printf("%d", net->layers[i]->input_size);
if (i < net->num_layers - 1) {
printf(" -> ");
}
}
printf(" -> %d\n", net->layers[net->num_layers - 1]->output_size);
}
/* Print network outputs for debugging */
void nn_print_outputs(nn_network_t *net) {
printf("Layer Outputs:\n");
for (int i = 0; i < net->num_layers; i++) {
nn_layer_t *layer = net->layers[i];
printf(" Layer %d: [", i);
for (int j = 0; j < layer->output_size; j++) {
printf("%.4f", layer->outputs[j]);
if (j < layer->output_size - 1) printf(", ");
}
printf("]\n");
}
}
/* ==================== Example: XOR Problem ==================== */
int main(void) {
printf("=== Simple Neural Network Library Demo ===\n\n");
/* Define network architecture: 2 inputs, 4 hidden, 1 output */
int layer_sizes[] = {2, 4, 1};
int num_layers = sizeof(layer_sizes) / sizeof(layer_sizes[0]);
/* Create network with learning rate 0.1 */
nn_network_t *net = nn_create_network(layer_sizes, num_layers, 0.1);
if (!net) {
fprintf(stderr, "Failed to create network\n");
return EXIT_FAILURE;
}
nn_print_architecture(net);
printf("\n");
/* Initialize weights with seed for reproducibility */
nn_init_weights(net, 42);
/* XOR training data */
double inputs[4][2] = {
{0.0, 0.0},
{0.0, 1.0},
{1.0, 0.0},
{1.0, 1.0}
};
double targets[4][1] = {
{0.0},
{1.0},
{1.0},
{0.0}
};
/* Create arrays of pointers for training function */
double *input_ptrs[4];
double *target_ptrs[4];
for (int i = 0; i < 4; i++) {
input_ptrs[i] = inputs[i];
target_ptrs[i] = targets[i];
}
/* Train the network */
printf("Training on XOR problem (1000 epochs)...\n\n");
nn_train(net, input_ptrs, target_ptrs, 4, 1000, 1);
/* Test the trained network */
printf("\n=== Test Results ===\n");
printf("Input -> Output (Target)\n");
printf("-----------------------------------\n");
for (int i = 0; i < 4; i++) {
double *output = nn_predict(net, inputs[i]);
printf("[%.1f, %.1f] -> %.4f (%.1f)\n",
inputs[i][0], inputs[i][1], output[0], targets[i][0]);
}
/* Calculate final accuracy */
printf("\n=== Final Metrics ===\n");
double final_loss = 0.0;
int correct = 0;
for (int i = 0; i < 4; i++) {
double *output = nn_predict(net, inputs[i]);
final_loss += nn_mse_loss(output, targets[i], 1);
/* Check if prediction matches target (threshold 0.5) */
double predicted_class = output[0] > 0.5 ? 1.0 : 0.0;
if (predicted_class == targets[i][0]) {
correct++;
}
}
printf("Final MSE Loss: %.6f\n", final_loss / 4.0);
printf("Accuracy: %d/4 (%.1f%%)\n", correct, (correct / 4.0) * 100.0);
/* Clean up */
nn_free_network(net);
printf("\n=== Demo Complete ===\n");
return EXIT_SUCCESS;
}
Leave a Reply