8 files changed, 237 insertions, 213 deletions
diff --git a/src/lib/include/neuralnet/matrix.h b/src/lib/include/neuralnet/matrix.h
index 0cb40cf..b7281bf 100644
--- a/src/lib/include/neuralnet/matrix.h
+++ b/src/lib/include/neuralnet/matrix.h
@@ -33,7 +33,8 @@ void nnMatrixToArray(const nnMatrix* in, R* out);
 void nnMatrixRowToArray(const nnMatrix* in, int row, R* out);
 /// Copy a column from a source to a target matrix.
-void nnMatrixCopyCol(const nnMatrix* in, nnMatrix* out, int col_in, int col_out);
+void nnMatrixCopyCol(
+    const nnMatrix* in, nnMatrix* out, int col_in, int col_out);
 /// Mutable borrow of a matrix.
 nnMatrix nnMatrixBorrow(nnMatrix* in);
@@ -56,20 +57,24 @@ void nnMatrixMul(const nnMatrix* left, const nnMatrix* right, nnMatrix* out);
 ///
 /// This function multiples two matrices row-by-row instead of row-by-column.
 /// nnMatrixMul(A, B, O) == nnMatrixMulRows(A, B^T, O).
-void nnMatrixMulRows(const nnMatrix* left, const nnMatrix* right, nnMatrix* out);
+void nnMatrixMulRows(
+    const nnMatrix* left, const nnMatrix* right, nnMatrix* out);
 /// Matrix multiply-add.
 ///
 /// out = left + (right * scale)
-void nnMatrixMulAdd(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out);
+void nnMatrixMulAdd(
+    const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out);
 /// Matrix multiply-subtract.
 ///
 /// out = left - (right * scale)
-void nnMatrixMulSub(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out);
+void nnMatrixMulSub(
+    const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out);
 /// Hadamard product of two matrices.
-void nnMatrixMulPairs(const nnMatrix* left, const nnMatrix* right, nnMatrix* out);
+void nnMatrixMulPairs(
+    const nnMatrix* left, const nnMatrix* right, nnMatrix* out);
 /// Add two matrices.
 void nnMatrixAdd(const nnMatrix* left, const nnMatrix* right, nnMatrix* out);
diff --git a/src/lib/include/neuralnet/neuralnet.h b/src/lib/include/neuralnet/neuralnet.h
index 1cf1c53..05c9406 100644
--- a/src/lib/include/neuralnet/neuralnet.h
+++ b/src/lib/include/neuralnet/neuralnet.h
@@ -5,7 +5,7 @@
 typedef struct nnMatrix nnMatrix;
 typedef struct nnNeuralNetwork nnNeuralNetwork;
-typedef struct nnQueryObject nnQueryObject;
+typedef struct nnQueryObject   nnQueryObject;
 /// Neuron activation.
 typedef enum nnActivation {
@@ -15,7 +15,8 @@ typedef enum nnActivation {
 } nnActivation;
 /// Create a network.
-nnNeuralNetwork* nnMakeNet(int num_layers, const int* layer_sizes, const nnActivation* activations);
+nnNeuralNetwork* nnMakeNet(
+    int num_layers, const int* layer_sizes, const nnActivation* activations);
 /// Delete the network and free its internal memory.
 void nnDeleteNet(nnNeuralNetwork**);
@@ -36,7 +37,8 @@ void nnSetBiases(nnNeuralNetwork*, const R* biases);
 void nnQuery(const nnNeuralNetwork*, nnQueryObject*, const nnMatrix* input);
 /// Query the network, array version.
-void nnQueryArray(const nnNeuralNetwork*, nnQueryObject*, const R* input, R* output);
+void nnQueryArray(
+    const nnNeuralNetwork*, nnQueryObject*, const R* input, R* output);
 /// Create a query object.
 ///
diff --git a/src/lib/include/neuralnet/train.h b/src/lib/include/neuralnet/train.h
index 79f8e7b..6d811c2 100644
--- a/src/lib/include/neuralnet/train.h
+++ b/src/lib/include/neuralnet/train.h
@@ -14,18 +14,18 @@ typedef struct nnMatrix nnMatrix;
 /// activation with many inputs. Thus, a (0,1) initialization is really
 /// (0,scale), for example.
 typedef enum nnWeightInitStrategy {
-  nnWeightInit01,      // (0,1) range.
+  nnWeightInit01,     // (0,1) range.
-  nnWeightInit11,      // (-1,+1) range.
+  nnWeightInit11,     // (-1,+1) range.
-  nnWeightInitNormal,  // Normal distribution.
+  nnWeightInitNormal, // Normal distribution.
 } nnWeightInitStrategy;
 /// Network training parameters.
 typedef struct nnTrainingParams {
-  R learning_rate;
+  R                    learning_rate;
-  int max_iterations;
+  int                  max_iterations;
-  uint64_t seed;
+  uint64_t             seed;
  nnWeightInitStrategy weight_init;
-  bool debug;
+  bool                 debug;
 } nnTrainingParams;
 /// Train the network.
@@ -36,7 +36,5 @@ typedef struct nnTrainingParams {
 /// |targets| is a matrix of targets, one row per target and as many columns as
 /// the target's dimension.
 void nnTrain(
-  nnNeuralNetwork*,
+    nnNeuralNetwork*, const nnMatrix* inputs, const nnMatrix* targets,
-  const nnMatrix* inputs,
+    const nnTrainingParams*);
-  const nnMatrix* targets,
-  const nnTrainingParams*);
diff --git a/src/lib/src/activation.h b/src/lib/src/activation.h
index 42ab73f..b56a69e 100644
--- a/src/lib/src/activation.h
+++ b/src/lib/src/activation.h
@@ -4,17 +4,13 @@
 #include <math.h>
-static inline R sigmoid(R x) {
+static inline R sigmoid(R x) { return 1. / (1. + exp(-x)); }
-  return 1. / (1. + exp(-x));
-}
-static inline R relu(R x) {
+static inline R relu(R x) { return fmax(0, x); }
-  return fmax(0, x);
-}
 #define NN_MAP_ARRAY(f, in, out, size) \
-  for (int i = 0; i < size; ++i) { \
+  for (int i = 0; i < size; ++i) {     \
-    out[i] = f(in[i]); \
+    out[i] = f(in[i]);                 \
  }
 #define sigmoid_array(in, out, size) NN_MAP_ARRAY(sigmoid, in, out, size)
diff --git a/src/lib/src/matrix.c b/src/lib/src/matrix.c
index f937c01..174504f 100644
--- a/src/lib/src/matrix.c
+++ b/src/lib/src/matrix.c
@@ -8,10 +8,10 @@ nnMatrix nnMatrixMake(int rows, int cols) {
  R* values = calloc(rows * cols, sizeof(R));
  assert(values != 0);
-  return (nnMatrix) {
+  return (nnMatrix){
-    .rows = rows,
+      .rows   = rows,
-    .cols = cols,
+      .cols   = cols,
-    .values = values,
+      .values = values,
  };
 }
@@ -21,8 +21,8 @@ void nnMatrixDel(nnMatrix* matrix) {
  if (matrix->values != 0) {
    free(matrix->values);
    matrix->values = 0;
-    matrix->rows = 0;
+    matrix->rows   = 0;
-    matrix->cols = 0;
+    matrix->cols   = 0;
  }
 }
@@ -30,12 +30,12 @@ void nnMatrixMove(nnMatrix* in, nnMatrix* out) {
  assert(in);
  assert(out);
-  out->rows = in->rows;
+  out->rows   = in->rows;
-  out->cols = in->cols;
+  out->cols   = in->cols;
  out->values = in->values;
-  in->rows = 0;
+  in->rows   = 0;
-  in->cols = 0;
+  in->cols   = 0;
  in->values = 0;
 }
@@ -45,8 +45,8 @@ void nnMatrixCopy(const nnMatrix* in, nnMatrix* out) {
  assert(in->rows == out->rows);
  assert(in->cols == out->cols);
-  const R* in_value = in->values;
+  const R* in_value  = in->values;
-  R* out_value = out->values;
+  R*       out_value = out->values;
  for (int i = 0; i < in->rows * in->cols; ++i) {
    *out_value++ = *in_value++;
@@ -73,7 +73,8 @@ void nnMatrixRowToArray(const nnMatrix* in, int row, R* out) {
  }
 }
-void nnMatrixCopyCol(const nnMatrix* in, nnMatrix* out, int col_in, int col_out) {
+void nnMatrixCopyCol(
+    const nnMatrix* in, nnMatrix* out, int col_in, int col_out) {
  assert(in);
  assert(out);
  assert(in->rows == out->rows);
@@ -89,8 +90,8 @@ nnMatrix nnMatrixBorrow(nnMatrix* in) {
  assert(in);
  nnMatrix out;
-  out.rows = in->rows;
+  out.rows   = in->rows;
-  out.cols = in->cols;
+  out.cols   = in->cols;
  out.values = in->values;
  return out;
 }
@@ -101,8 +102,8 @@ nnMatrix nnMatrixBorrowRows(nnMatrix* in, int row_start, int num_rows) {
  assert(row_start + num_rows <= in->rows);
  nnMatrix out;
-  out.rows = num_rows;
+  out.rows   = num_rows;
-  out.cols = in->cols;
+  out.cols   = in->cols;
  out.values = nnMatrixRow_mut(in, row_start);
  return out;
 }
@@ -139,9 +140,9 @@ void nnMatrixMul(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) {
    const R* p_left_value = &left->values[i * left->cols];
    for (int j = 0; j < left->cols; ++j) {
-      const R left_value = *p_left_value;
+      const R  left_value  = *p_left_value;
      const R* right_value = &right->values[j * right->cols];
-      R* out_value = &out->values[i * out->cols];
+      R*       out_value   = &out->values[i * out->cols];
      for (int k = 0; k < right->cols; ++k) {
        *out_value++ += left_value * *right_value++;
@@ -152,7 +153,8 @@ void nnMatrixMul(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) {
  }
 }
-void nnMatrixMulRows(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) {
+void nnMatrixMulRows(
+    const nnMatrix* left, const nnMatrix* right, nnMatrix* out) {
  assert(left != 0);
  assert(right != 0);
  assert(out != 0);
@@ -165,7 +167,7 @@ void nnMatrixMulRows(const nnMatrix* left, const nnMatrix* right, nnMatrix* out)
  R* out_value = out->values;
  for (int i = 0; i < left->rows; ++i) {
-    const R* left_row = &left->values[i * left->cols];
+    const R* left_row    = &left->values[i * left->cols];
    const R* right_value = right->values;
    for (int j = 0; j < right->rows; ++j) {
@@ -181,7 +183,8 @@ void nnMatrixMulRows(const nnMatrix* left, const nnMatrix* right, nnMatrix* out)
  }
 }
-void nnMatrixMulAdd(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) {
+void nnMatrixMulAdd(
+    const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) {
  assert(left);
  assert(right);
  assert(out);
@@ -190,16 +193,17 @@ void nnMatrixMulAdd(const nnMatrix* left, const nnMatrix* right, R scale, nnMatr
  assert(left->rows == out->rows);
  assert(left->cols == out->cols);
-  const R* left_value = left->values;
+  const R* left_value  = left->values;
  const R* right_value = right->values;
-  R* out_value = out->values;
+  R*       out_value   = out->values;
  for (int i = 0; i < left->rows * left->cols; ++i) {
    *out_value++ = *left_value++ + *right_value++ * scale;
  }
 }
-void nnMatrixMulSub(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) {
+void nnMatrixMulSub(
+    const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) {
  assert(left);
  assert(right);
  assert(out);
@@ -208,16 +212,17 @@ void nnMatrixMulSub(const nnMatrix* left, const nnMatrix* right, R scale, nnMatr
  assert(left->rows == out->rows);
  assert(left->cols == out->cols);
-  const R* left_value = left->values;
+  const R* left_value  = left->values;
  const R* right_value = right->values;
-  R* out_value = out->values;
+  R*       out_value   = out->values;
  for (int i = 0; i < left->rows * left->cols; ++i) {
    *out_value++ = *left_value++ - *right_value++ * scale;
  }
 }
-void nnMatrixMulPairs(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) {
+void nnMatrixMulPairs(
+    const nnMatrix* left, const nnMatrix* right, nnMatrix* out) {
  assert(left != 0);
  assert(right != 0);
  assert(out != 0);
@@ -226,9 +231,9 @@ void nnMatrixMulPairs(const nnMatrix* left, const nnMatrix* right, nnMatrix* out
  assert(left->rows == out->rows);
  assert(left->cols == out->cols);
-  R* left_value = left->values;
+  R* left_value  = left->values;
  R* right_value = right->values;
-  R* out_value = out->values;
+  R* out_value   = out->values;
  for (int i = 0; i < left->rows * left->cols; ++i) {
    *out_value++ = *left_value++ * *right_value++;
@@ -244,9 +249,9 @@ void nnMatrixAdd(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) {
  assert(left->rows == out->rows);
  assert(left->cols == out->cols);
-  const R* left_value = left->values;
+  const R* left_value  = left->values;
  const R* right_value = right->values;
-  R* out_value = out->values;
+  R*       out_value   = out->values;
  for (int i = 0; i < left->rows * left->cols; ++i) {
    *out_value++ = *left_value++ + *right_value++;
@@ -262,16 +267,17 @@ void nnMatrixSub(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) {
  assert(left->rows == out->rows);
  assert(left->cols == out->cols);
-  const R* left_value = left->values;
+  const R* left_value  = left->values;
  const R* right_value = right->values;
-  R* out_value = out->values;
+  R*       out_value   = out->values;
  for (int i = 0; i < left->rows * left->cols; ++i) {
    *out_value++ = *left_value++ - *right_value++;
  }
 }
-void nnMatrixAddRow(const nnMatrix* matrix, const nnMatrix* row, nnMatrix* out) {
+void nnMatrixAddRow(
+    const nnMatrix* matrix, const nnMatrix* row, nnMatrix* out) {
  assert(matrix);
  assert(row);
  assert(out);
@@ -281,7 +287,7 @@ void nnMatrixAddRow(const nnMatrix* matrix, const nnMatrix* row, nnMatrix* out)
  assert(matrix->cols == out->cols);
  const R* matrix_value = matrix->values;
-  R* out_value = out->values;
+  R*       out_value    = out->values;
  for (int i = 0; i < matrix->rows; ++i) {
    const R* row_value = row->values;
@@ -320,8 +326,8 @@ void nnMatrixGt(const nnMatrix* in, R threshold, nnMatrix* out) {
  assert(in->rows == out->rows);
  assert(in->cols == out->cols);
-  const R* in_value = in->values;
+  const R* in_value  = in->values;
-  R* out_value = out->values;
+  R*       out_value = out->values;
  for (int i = 0; i < in->rows * in->cols; ++i) {
    *out_value++ = (*in_value++) > threshold ? 1 : 0;
diff --git a/src/lib/src/neuralnet.c b/src/lib/src/neuralnet.c
index cac611a..a5fc59b 100644
--- a/src/lib/src/neuralnet.c
+++ b/src/lib/src/neuralnet.c
@@ -1,13 +1,14 @@
 #include <neuralnet/neuralnet.h>
-#include <neuralnet/matrix.h>
 #include "activation.h"
 #include "neuralnet_impl.h"
+#include <neuralnet/matrix.h>
 #include <assert.h>
 #include <stdlib.h>
-nnNeuralNetwork* nnMakeNet(int num_layers, const int* layer_sizes, const nnActivation* activations) {
+nnNeuralNetwork* nnMakeNet(
+    int num_layers, const int* layer_sizes, const nnActivation* activations) {
  assert(num_layers > 0);
  assert(layer_sizes);
  assert(activations);
@@ -19,10 +20,10 @@ nnNeuralNetwork* nnMakeNet(int num_layers, const int* layer_sizes, const nnActiv
  net->num_layers = num_layers;
-  net->weights = calloc(num_layers, sizeof(nnMatrix));
+  net->weights     = calloc(num_layers, sizeof(nnMatrix));
-  net->biases  = calloc(num_layers, sizeof(nnMatrix));
+  net->biases      = calloc(num_layers, sizeof(nnMatrix));
  net->activations = calloc(num_layers, sizeof(nnActivation));
-  if ( (net->weights == 0) || (net->biases == 0) || (net->activations == 0) ) {
+  if ((net->weights == 0) || (net->biases == 0) || (net->activations == 0)) {
    nnDeleteNet(&net);
    return 0;
  }
@@ -30,15 +31,15 @@ nnNeuralNetwork* nnMakeNet(int num_layers, const int* layer_sizes, const nnActiv
  for (int l = 0; l < num_layers; ++l) {
    // layer_sizes = { input layer size, first hidden layer size, ...}
    const int layer_input_size  = layer_sizes[l];
-    const int layer_output_size = layer_sizes[l+1];
+    const int layer_output_size = layer_sizes[l + 1];
    // We store the transpose of the weight matrix as written in textbooks.
    // Our vectors are row vectors and the matrices row-major.
    const int rows = layer_input_size;
    const int cols = layer_output_size;
-    net->weights[l] = nnMatrixMake(rows, cols);
+    net->weights[l]     = nnMatrixMake(rows, cols);
-    net->biases[l]  = nnMatrixMake(1, cols);
+    net->biases[l]      = nnMatrixMake(1, cols);
    net->activations[l] = activations[l];
  }
@@ -46,7 +47,7 @@ nnNeuralNetwork* nnMakeNet(int num_layers, const int* layer_sizes, const nnActiv
 }
 void nnDeleteNet(nnNeuralNetwork** net) {
-  if ( (!net) || (!(*net)) ) {
+  if ((!net) || (!(*net))) {
    return;
  }
  if ((*net)->weights != 0) {
@@ -77,7 +78,7 @@ void nnSetWeights(nnNeuralNetwork* net, const R* weights) {
  for (int l = 0; l < net->num_layers; ++l) {
    nnMatrix* layer_weights = &net->weights[l];
-    R* layer_values = layer_weights->values;
+    R*        layer_values  = layer_weights->values;
    for (int j = 0; j < layer_weights->rows * layer_weights->cols; ++j) {
      *layer_values++ = *weights++;
@@ -91,7 +92,7 @@ void nnSetBiases(nnNeuralNetwork* net, const R* biases) {
  for (int l = 0; l < net->num_layers; ++l) {
    nnMatrix* layer_biases = &net->biases[l];
-    R* layer_values = layer_biases->values;
+    R*        layer_values = layer_biases->values;
    for (int j = 0; j < layer_biases->rows * layer_biases->cols; ++j) {
      *layer_values++ = *biases++;
@@ -99,7 +100,8 @@ void nnSetBiases(nnNeuralNetwork* net, const R* biases) {
  }
 }
-void nnQuery(const nnNeuralNetwork* net, nnQueryObject* query, const nnMatrix* input) {
+void nnQuery(
+    const nnNeuralNetwork* net, nnQueryObject* query, const nnMatrix* input) {
  assert(net);
  assert(query);
  assert(input);
@@ -123,29 +125,34 @@ void nnQuery(const nnNeuralNetwork* net, nnQueryObject* query, const nnMatrix* i
      // We could also rewrite the original Mul function to go row x row,
      // decomposing the multiplication. Preserving the original meaning of Mul
      // makes everything clearer.
-      nnMatrix output_vector = nnMatrixBorrowRows(&query->layer_outputs[l], i, 1);
+      nnMatrix output_vector =
+          nnMatrixBorrowRows(&query->layer_outputs[l], i, 1);
      nnMatrixMul(&input_vector, layer_weights, &output_vector);
      nnMatrixAddRow(&output_vector, layer_biases, &output_vector);
      switch (net->activations[l]) {
-        case nnIdentity:
+      case nnIdentity:
-          break;  // Nothing to do for the identity function.
+        break; // Nothing to do for the identity function.
-        case nnSigmoid:
+      case nnSigmoid:
-          sigmoid_array(output_vector.values, output_vector.values, output_vector.cols);
+        sigmoid_array(
-          break;
+            output_vector.values, output_vector.values, output_vector.cols);
-        case nnRelu:
+        break;
-          relu_array(output_vector.values, output_vector.values, output_vector.cols);
+      case nnRelu:
-          break;
+        relu_array(
-        default:
+            output_vector.values, output_vector.values, output_vector.cols);
-          assert(0);
+        break;
+      default:
+        assert(0);
      }
-      input_vector = output_vector;  // Borrow.
+      input_vector = output_vector; // Borrow.
    }
  }
 }
-void nnQueryArray(const nnNeuralNetwork* net, nnQueryObject* query, const R* input, R* output) {
+void nnQueryArray(
+    const nnNeuralNetwork* net, nnQueryObject* query, const R* input,
+    R* output) {
  assert(net);
  assert(query);
  assert(input);
@@ -177,9 +184,9 @@ nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int num_inputs) {
    return 0;
  }
  for (int l = 0; l < net->num_layers; ++l) {
-      const nnMatrix* layer_weights = &net->weights[l];
+    const nnMatrix* layer_weights     = &net->weights[l];
-      const int layer_output_size = nnLayerOutputSize(layer_weights);
+    const int       layer_output_size = nnLayerOutputSize(layer_weights);
-      query->layer_outputs[l] = nnMatrixMake(num_inputs, layer_output_size);
+    query->layer_outputs[l] = nnMatrixMake(num_inputs, layer_output_size);
  }
  query->network_outputs = &query->layer_outputs[net->num_layers - 1];
@@ -187,7 +194,7 @@ nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int num_inputs) {
 }
 void nnDeleteQueryObject(nnQueryObject** query) {
-  if ( (!query) || (!(*query)) ) {
+  if ((!query) || (!(*query))) {
    return;
  }
  if ((*query)->layer_outputs != 0) {
diff --git a/src/lib/src/neuralnet_impl.h b/src/lib/src/neuralnet_impl.h
index 26107b5..18694f4 100644
--- a/src/lib/src/neuralnet_impl.h
+++ b/src/lib/src/neuralnet_impl.h
@@ -14,10 +14,10 @@
 ///
 ///   w11 w12 w21 w22
 typedef struct nnNeuralNetwork {
-  int num_layers;             // Number of non-input layers (hidden + output).
+  int           num_layers;  // Number of non-input layers (hidden + output).
-  nnMatrix* weights;          // One matrix per non-input layer.
+  nnMatrix*     weights;     // One matrix per non-input layer.
-  nnMatrix* biases;           // One vector per non-input layer.
+  nnMatrix*     biases;      // One vector per non-input layer.
-  nnActivation* activations;  // One per non-input layer.
+  nnActivation* activations; // One per non-input layer.
 } nnNeuralNetwork;
 /// A query object that holds all the memory necessary to query a network.
@@ -31,6 +31,6 @@ typedef struct nnNeuralNetwork {
 /// convenience.
 typedef struct nnQueryObject {
  int       num_layers;
-  nnMatrix* layer_outputs;    // Output matrices, one output per layer.
+  nnMatrix* layer_outputs;   // Output matrices, one output per layer.
-  nnMatrix* network_outputs;  // Points to the last output matrix.
+  nnMatrix* network_outputs; // Points to the last output matrix.
 } nnTrainingQueryObject;
diff --git a/src/lib/src/train.c b/src/lib/src/train.c
index 3061a99..9244907 100644
--- a/src/lib/src/train.c
+++ b/src/lib/src/train.c
@@ -1,7 +1,7 @@
 #include <neuralnet/train.h>
-#include <neuralnet/matrix.h>
 #include "neuralnet_impl.h"
+#include <neuralnet/matrix.h>
 #include <random/mt19937-64.h>
 #include <random/normal.h>
@@ -14,13 +14,13 @@
 #define LOGD printf
 // If debug mode is requested, we will show progress every this many iterations.
-static const int PROGRESS_THRESHOLD = 5;  // %
+static const int PROGRESS_THRESHOLD = 5; // %
 /// Computes the total MSE from the output error matrix.
 R ComputeMSE(const nnMatrix* errors) {
-  R sum_sq = 0;
+  R         sum_sq = 0;
-  const int N = errors->rows * errors->cols;
+  const int N      = errors->rows * errors->cols;
-  const R* value = errors->values;
+  const R*  value  = errors->values;
  for (int i = 0; i < N; ++i) {
    sum_sq += *value * *value;
    value++;
@@ -30,7 +30,7 @@ R ComputeMSE(const nnMatrix* errors) {
 /// Holds the bits required to compute a sigmoid gradient.
 typedef struct nnSigmoidGradientElements {
-  nnMatrix ones;      // A vector of just ones, same size as the layer.
+  nnMatrix ones; // A vector of just ones, same size as the layer.
 } nnSigmoidGradientElements;
 /// Holds the various elements required to compute gradients. These depend on
@@ -49,7 +49,8 @@ typedef struct nnGradientElements {
 } nnGradientElements;
 // Initialize the network's weights randomly and set their biases to 0.
-void nnInitNet(nnNeuralNetwork* net, uint64_t seed, const nnWeightInitStrategy strategy) {
+void nnInitNet(
+    nnNeuralNetwork* net, uint64_t seed, const nnWeightInitStrategy strategy) {
  assert(net);
  mt19937_64 rng = mt19937_64_make();
@@ -60,41 +61,42 @@ void nnInitNet(nnNeuralNetwork* net, uint64_t seed, const nnWeightInitStrategy s
    nnMatrix* biases  = &net->biases[l];
    const R layer_size = (R)nnLayerInputSize(weights);
-    const R scale = 1. / layer_size;
+    const R scale      = 1. / layer_size;
-    const R stdev = 1. / sqrt((R)layer_size);
+    const R stdev      = 1. / sqrt((R)layer_size);
-    const R sigma = stdev * stdev;
+    const R sigma      = stdev * stdev;
    R* value = weights->values;
    for (int k = 0; k < weights->rows * weights->cols; ++k) {
      switch (strategy) {
-        case nnWeightInit01: {
+      case nnWeightInit01: {
-          const R x01 = mt19937_64_gen_real3(&rng);  // (0, +1) interval.
+        const R x01 = mt19937_64_gen_real3(&rng); // (0, +1) interval.
-          *value++ = scale * x01;
+        *value++    = scale * x01;
-          break;
+        break;
-        }
+      }
-        case nnWeightInit11: {
+      case nnWeightInit11: {
-          const R x11 = mt19937_64_gen_real4(&rng);  // (-1, +1) interval.
+        const R x11 = mt19937_64_gen_real4(&rng); // (-1, +1) interval.
-          *value++ = scale * x11;
+        *value++    = scale * x11;
-          break;
+        break;
+      }
+      case nnWeightInitNormal: {
+        // Using initialization with a normal distribution of standard
+        // deviation 1 / sqrt(num_layer_weights) to prevent saturation when
+        // multiplying inputs.
+        const R u01 = mt19937_64_gen_real3(&rng); // (0, +1) interval.
+        const R v01 = mt19937_64_gen_real3(&rng); // (0, +1) interval.
+        R       z0, z1;
+        normal2(u01, v01, &z0, &z1);
+        z0       = normal_transform(z0, /*mu=*/0, sigma);
+        z1       = normal_transform(z1, /*mu=*/0, sigma);
+        *value++ = z0;
+        if (k < weights->rows * weights->cols - 1) {
+          *value++ = z1;
+          ++k;
        }
-        case nnWeightInitNormal:
+        break;
-          // Using initialization with a normal distribution of standard
+      }
-          // deviation 1 / sqrt(num_layer_weights) to prevent saturation when
+      default:
-          // multiplying inputs.
+        assert(false);
-          const R u01 = mt19937_64_gen_real3(&rng);  // (0, +1) interval.
-          const R v01 = mt19937_64_gen_real3(&rng);  // (0, +1) interval.
-          R z0, z1;
-          normal2(u01, v01, &z0, &z1);
-          z0 = normal_transform(z0, /*mu=*/0, sigma);
-          z1 = normal_transform(z1, /*mu=*/0, sigma);
-          *value++ = z0;
-          if (k < weights->rows * weights->cols - 1) {
-            *value++ = z1;
-            ++k;
-          }
-          break;
-        default:
-          assert(false);
      }
    }
@@ -112,9 +114,7 @@ void nnInitNet(nnNeuralNetwork* net, uint64_t seed, const nnWeightInitStrategy s
 //
 // For now, each iteration trains with one sample (row) at a time.
 void nnTrain(
-    nnNeuralNetwork* net,
+    nnNeuralNetwork* net, const nnMatrix* inputs, const nnMatrix* targets,
-    const nnMatrix* inputs,
-    const nnMatrix* targets,
    const nnTrainingParams* params) {
  assert(net);
  assert(inputs);
@@ -129,34 +129,35 @@ void nnTrain(
  nnMatrix* errors = calloc(net->num_layers, sizeof(nnMatrix));
  // Allocate the weight transpose matrices up front for backpropagation.
-  //nnMatrix* weights_T = calloc(net->num_layers, sizeof(nnMatrix));
+  // nnMatrix* weights_T = calloc(net->num_layers, sizeof(nnMatrix));
  // Allocate the weight delta matrices.
  nnMatrix* weight_deltas = calloc(net->num_layers, sizeof(nnMatrix));
  // Allocate the data structures required to compute gradients.
  // This depends on each layer's activation type.
-  nnGradientElements* gradient_elems = calloc(net->num_layers, sizeof(nnGradientElements));
+  nnGradientElements* gradient_elems =
+      calloc(net->num_layers, sizeof(nnGradientElements));
  // Allocate the output transpose vectors for weight delta calculation.
  // This is one column vector per layer.
  nnMatrix* outputs_T = calloc(net->num_layers, sizeof(nnMatrix));
  assert(errors != 0);
-  //assert(weights_T != 0);
+  // assert(weights_T != 0);
  assert(weight_deltas != 0);
  assert(gradient_elems);
  assert(outputs_T);
  for (int l = 0; l < net->num_layers; ++l) {
-    const nnMatrix* layer_weights = &net->weights[l];
+    const nnMatrix*    layer_weights     = &net->weights[l];
-    const int layer_output_size = net->weights[l].cols;
+    const int          layer_output_size = net->weights[l].cols;
-    const nnActivation activation = net->activations[l];
+    const nnActivation activation        = net->activations[l];
    errors[l] = nnMatrixMake(1, layer_weights->cols);
-    //weights_T[l] = nnMatrixMake(layer_weights->cols, layer_weights->rows);
+    // weights_T[l] = nnMatrixMake(layer_weights->cols, layer_weights->rows);
-    //nnMatrixTranspose(layer_weights, &weights_T[l]);
+    // nnMatrixTranspose(layer_weights, &weights_T[l]);
    weight_deltas[l] = nnMatrixMake(layer_weights->rows, layer_weights->cols);
@@ -164,21 +165,21 @@ void nnTrain(
    // Allocate the gradient elements and vectors for weight delta calculation.
    nnGradientElements* elems = &gradient_elems[l];
-    elems->type = activation;
+    elems->type               = activation;
    switch (activation) {
-      case nnIdentity:
+    case nnIdentity:
-        break;  // Gradient vector will be borrowed, no need to allocate.
+      break; // Gradient vector will be borrowed, no need to allocate.
-      case nnSigmoid:
+    case nnSigmoid:
-        elems->gradient = nnMatrixMake(1, layer_output_size);
+      elems->gradient = nnMatrixMake(1, layer_output_size);
-        // Allocate the 1s vectors.
+      // Allocate the 1s vectors.
-        elems->sigmoid.ones = nnMatrixMake(1, layer_output_size);
+      elems->sigmoid.ones = nnMatrixMake(1, layer_output_size);
-        nnMatrixInitConstant(&elems->sigmoid.ones, 1);
+      nnMatrixInitConstant(&elems->sigmoid.ones, 1);
-        break;
+      break;
-      case nnRelu:
+    case nnRelu:
-        elems->gradient = nnMatrixMake(1, layer_output_size);
+      elems->gradient = nnMatrixMake(1, layer_output_size);
-        break;
+      break;
    }
  }
@@ -195,9 +196,9 @@ void nnTrain(
  // If debug mode is requested, we will show progress every Nth iteration.
  const int progress_frame =
-    (params->max_iterations < PROGRESS_THRESHOLD)
+      (params->max_iterations < PROGRESS_THRESHOLD)
-    ? 1
+          ? 1
-    : (params->max_iterations * PROGRESS_THRESHOLD / 100);
+          : (params->max_iterations * PROGRESS_THRESHOLD / 100);
  // --- TRAIN
@@ -209,8 +210,10 @@ void nnTrain(
    for (int sample = 0; sample < inputs->rows; ++sample) {
      // Slice the input and target matrices with the batch size.
      // We are not mutating the inputs, but we need the cast to borrow.
-      nnMatrix training_inputs  = nnMatrixBorrowRows((nnMatrix*)inputs,  sample, 1);
+      nnMatrix training_inputs =
-      nnMatrix training_targets = nnMatrixBorrowRows((nnMatrix*)targets, sample, 1);
+          nnMatrixBorrowRows((nnMatrix*)inputs, sample, 1);
+      nnMatrix training_targets =
+          nnMatrixBorrowRows((nnMatrix*)targets, sample, 1);
      // Will need the input transposed for backpropagation.
      // Assuming one training input per iteration for now.
@@ -221,8 +224,10 @@ void nnTrain(
      // part of the derivative, -2(t-o). Also, we compute o-t instead to
      // remove that outer negative sign.
      nnQuery(net, query, &training_inputs);
-      //nnMatrixSub(&training_targets, training_outputs, &errors[net->num_layers - 1]);
+      // nnMatrixSub(&training_targets, training_outputs,
-      nnMatrixSub(training_outputs, &training_targets, &errors[net->num_layers - 1]);
+      // &errors[net->num_layers - 1]);
+      nnMatrixSub(
+          training_outputs, &training_targets, &errors[net->num_layers - 1]);
      // Update outputs_T, which we need during weight updates.
      for (int l = 0; l < net->num_layers; ++l) {
@@ -232,12 +237,12 @@ void nnTrain(
      // Update weights and biases for each internal layer, backpropagating
      // errors along the way.
      for (int l = net->num_layers - 1; l >= 0; --l) {
-        const nnMatrix* layer_output = &query->layer_outputs[l];
+        const nnMatrix*     layer_output  = &query->layer_outputs[l];
-        nnMatrix* layer_weights = &net->weights[l];
+        nnMatrix*           layer_weights = &net->weights[l];
-        nnMatrix* layer_biases  = &net->biases[l];
+        nnMatrix*           layer_biases  = &net->biases[l];
-        nnGradientElements* elems = &gradient_elems[l];
+        nnGradientElements* elems         = &gradient_elems[l];
-        nnMatrix* gradient = &elems->gradient;
+        nnMatrix*           gradient      = &elems->gradient;
-        const nnActivation activation = net->activations[l];
+        const nnActivation  activation    = net->activations[l];
        // Compute the gradient (the part of the expression that does not
        // contain the output of the previous layer).
@@ -246,55 +251,58 @@ void nnTrain(
        // Sigmoid:  G = error_k * output_k * (1 - output_k).
        // Relu:     G = error_k * (output_k > 0 ? 1 : 0)
        switch (activation) {
-          case nnIdentity:
+        case nnIdentity:
-            // TODO: Just copy the pointer?
+          // TODO: Just copy the pointer?
-            *gradient = nnMatrixBorrow(&errors[l]);
+          *gradient = nnMatrixBorrow(&errors[l]);
-            break;
+          break;
-          case nnSigmoid:
+        case nnSigmoid:
-            nnMatrixSub(&elems->sigmoid.ones, layer_output, gradient);
+          nnMatrixSub(&elems->sigmoid.ones, layer_output, gradient);
-            nnMatrixMulPairs(layer_output, gradient, gradient);
+          nnMatrixMulPairs(layer_output, gradient, gradient);
-            nnMatrixMulPairs(&errors[l], gradient, gradient);
+          nnMatrixMulPairs(&errors[l], gradient, gradient);
-            break;
+          break;
-          case nnRelu:
+        case nnRelu:
-            nnMatrixGt(layer_output, 0, gradient);
+          nnMatrixGt(layer_output, 0, gradient);
-            nnMatrixMulPairs(&errors[l], gradient, gradient);
+          nnMatrixMulPairs(&errors[l], gradient, gradient);
-            break;
+          break;
        }
        // Outer product to compute the weight deltas.
-        const nnMatrix* output_T = (l == 0) ? &training_inputs_T : &outputs_T[l-1];
+        const nnMatrix* output_T =
+            (l == 0) ? &training_inputs_T : &outputs_T[l - 1];
        nnMatrixMul(output_T, gradient, &weight_deltas[l]);
        // Backpropagate the error before updating weights.
        if (l > 0) {
          // G * W^T == G *^T W.
-          //nnMatrixMul(gradient, &weights_T[l], &errors[l-1]);
+          // nnMatrixMul(gradient, &weights_T[l], &errors[l-1]);
-          nnMatrixMulRows(gradient, layer_weights, &errors[l-1]);
+          nnMatrixMulRows(gradient, layer_weights, &errors[l - 1]);
        }
        // Update weights.
        nnMatrixScale(&weight_deltas[l], params->learning_rate);
        // The gradient has a negative sign from -(t - o), but we have computed
        // e = o - t instead, so we can subtract directly.
-        //nnMatrixAdd(layer_weights, &weight_deltas[l], layer_weights);
+        // nnMatrixAdd(layer_weights, &weight_deltas[l], layer_weights);
        nnMatrixSub(layer_weights, &weight_deltas[l], layer_weights);
        // Update weight transpose matrix for the next training iteration.
-        //nnMatrixTranspose(layer_weights, &weights_T[l]);
+        // nnMatrixTranspose(layer_weights, &weights_T[l]);
        // Update biases.
        // This is the same formula as for weights, except that the o_j term is
        // just 1. We can simply re-use the gradient that we have already
        // computed for the weight update.
-        //nnMatrixMulAdd(layer_biases, gradient, params->learning_rate, layer_biases);
+        // nnMatrixMulAdd(layer_biases, gradient, params->learning_rate,
-        nnMatrixMulSub(layer_biases, gradient, params->learning_rate, layer_biases);
+        // layer_biases);
+        nnMatrixMulSub(
+            layer_biases, gradient, params->learning_rate, layer_biases);
      }
      // TODO: Add this under a verbose debugging mode.
      // if (params->debug) {
-      //   LOGD("Iter: %d, Sample: %d, Error: %f\n", iter, sample, ComputeMSE(&errors[net->num_layers - 1]));
+      //   LOGD("Iter: %d, Sample: %d, Error: %f\n", iter, sample,
-      //   LOGD("TGT: ");
+      //   ComputeMSE(&errors[net->num_layers - 1])); LOGD("TGT: "); for (int i
-      //   for (int i = 0; i < training_targets.cols; ++i) {
+      //   = 0; i < training_targets.cols; ++i) {
      //     printf("%.3f  ", training_targets.values[i]);
      //   }
      //   printf("\n");
@@ -307,42 +315,44 @@ void nnTrain(
    }
    if (params->debug && ((iter % progress_frame) == 0)) {
-      LOGD("Iter: %d/%d, Error: %f\n",
+      LOGD(
-        iter, params->max_iterations, ComputeMSE(&errors[net->num_layers - 1]));
+          "Iter: %d/%d, Error: %f\n", iter, params->max_iterations,
+          ComputeMSE(&errors[net->num_layers - 1]));
    }
  }
  // Print the final error.
  if (params->debug) {
-    LOGD("Iter: %d/%d, Error: %f\n",
+    LOGD(
-      params->max_iterations, params->max_iterations, ComputeMSE(&errors[net->num_layers - 1]));
+        "Iter: %d/%d, Error: %f\n", params->max_iterations,
+        params->max_iterations, ComputeMSE(&errors[net->num_layers - 1]));
  }
  for (int l = 0; l < net->num_layers; ++l) {
    nnMatrixDel(&errors[l]);
    nnMatrixDel(&outputs_T[l]);
-    //nnMatrixDel(&weights_T[l]);
+    // nnMatrixDel(&weights_T[l]);
    nnMatrixDel(&weight_deltas[l]);
    nnGradientElements* elems = &gradient_elems[l];
    switch (elems->type) {
-      case nnIdentity:
+    case nnIdentity:
-        break;  // Gradient vector is borrowed, no need to deallocate.
+      break; // Gradient vector is borrowed, no need to deallocate.
-      case nnSigmoid:
+    case nnSigmoid:
-        nnMatrixDel(&elems->gradient);
+      nnMatrixDel(&elems->gradient);
-        nnMatrixDel(&elems->sigmoid.ones);
+      nnMatrixDel(&elems->sigmoid.ones);
-        break;
+      break;
-      case nnRelu:
+    case nnRelu:
-        nnMatrixDel(&elems->gradient);
+      nnMatrixDel(&elems->gradient);
-        break;
+      break;
    }
  }
  nnMatrixDel(&training_inputs_T);
  free(errors);
  free(outputs_T);
-  //free(weights_T);
+  // free(weights_T);
  free(weight_deltas);
  free(gradient_elems);
 }