Define vector outer product (nnMatrixMulOuter), which removes the need to transpose layer inputs during training.

author: 3gg <3gg@shellblade.net> 2023-12-16 11:06:03 -0800
committer: 3gg <3gg@shellblade.net> 2023-12-16 11:06:03 -0800
commit: dc538733da8d49e7240d00fb05517053076fe261 (patch)
tree: 752cefb55f072bebbe716b8fa4e4df2baadc3138
parent: 2067bd53b182429d059a61b0e060f92b4f317ed1 (diff)
3 files changed, 39 insertions, 28 deletions
diff --git a/src/lib/include/neuralnet/matrix.h b/src/lib/include/neuralnet/matrix.h
index f80b985..4cb0d25 100644
--- a/src/lib/include/neuralnet/matrix.h
+++ b/src/lib/include/neuralnet/matrix.h
@@ -56,13 +56,20 @@ void nnMatrixInitConstant(nnMatrix*, R value);
 /// Multiply two matrices.
 void nnMatrixMul(const nnMatrix* left, const nnMatrix* right, nnMatrix* out);
-/// Multiply two matrices, row variant.
+/// Multiply two matrices, row-by-row variant.
 ///
-/// This function multiples two matrices row-by-row instead of row-by-column.
+/// This function multiples two matrices row-by-row instead of row-by-column,
-/// nnMatrixMul(A, B, O) == nnMatrixMulRows(A, B^T, O).
+/// which is equivalent to regular multiplication after transposing the right
+/// hand matrix.
+///
+///     nnMatrixMul(A, B, O) == nnMatrixMulRows(A, B^T, O).
 void nnMatrixMulRows(
    const nnMatrix* left, const nnMatrix* right, nnMatrix* out);
+/// Compute the outer product of two vectors.
+void nnMatrixMulOuter(
+    const nnMatrix* left, const nnMatrix* right, nnMatrix* out);
 /// Matrix multiply-add.
 ///
 /// out = left + (right * scale)
diff --git a/src/lib/src/matrix.c b/src/lib/src/matrix.c
index d5c3fcc..29511eb 100644
--- a/src/lib/src/matrix.c
+++ b/src/lib/src/matrix.c
@@ -189,6 +189,32 @@ void nnMatrixMulRows(
  }
 }
+void nnMatrixMulOuter(
+    const nnMatrix* left, const nnMatrix* right, nnMatrix* out) {
+  assert(left != 0);
+  assert(right != 0);
+  assert(out != 0);
+  assert(out != left);
+  assert(out != right);
+  assert((left->rows == 1) || (left->cols == 1));   // Vector.
+  assert((right->rows == 1) || (right->cols == 1)); // Vector.
+  const int N = left->rows * left->cols;
+  const int M = right->rows * right->cols;
+  assert((out->rows == N) && (out->cols == M));
+  const R* left_value = left->values;
+  R*       out_value  = out->values;
+  for (int i = 0; i < N; ++i) {
+    const R* right_value = right->values;
+    
+    for (int j = 0; j < M; ++j) {
+      *out_value++ = *left_value * *right_value++;
+    }
+    left_value++;
+  }
+}
 void nnMatrixMulAdd(
    const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) {
  assert(left);
diff --git a/src/lib/src/train.c b/src/lib/src/train.c
index ccff553..fe9f598 100644
--- a/src/lib/src/train.c
+++ b/src/lib/src/train.c
@@ -153,14 +153,9 @@ void nnTrain(
  nnGradientElements* gradient_elems =
      calloc(net->num_layers, sizeof(nnGradientElements));
-  // Allocate the output transpose vectors for weight delta calculation.
-  // This is one column vector per layer.
-  nnMatrix* outputs_T = calloc(net->num_layers, sizeof(nnMatrix));
  assert(errors != 0);
  assert(weight_deltas != 0);
  assert(gradient_elems);
-  assert(outputs_T);
  for (int l = 0; l < net->num_layers; ++l) {
    const int          layer_input_size  = nnLayerInputSize(net, l);
@@ -169,7 +164,6 @@ void nnTrain(
    errors[l]        = nnMatrixMake(1, layer_output_size);
    weight_deltas[l] = nnMatrixMake(layer_input_size, layer_output_size);
-    outputs_T[l]     = nnMatrixMake(layer_output_size, 1);
    // Allocate the gradient elements and vectors for weight delta calculation.
    nnGradientElements* elems = &gradient_elems[l];
@@ -199,9 +193,6 @@ void nnTrain(
  // the outputs.
  const nnMatrix* const training_outputs = query->network_outputs;
-  // A vector to store the training input transposed.
-  nnMatrix training_inputs_T = nnMatrixMake(inputs->cols, 1);
  // If debug mode is requested, we will show progress every Nth iteration.
  const int progress_frame =
      (params->max_iterations < PROGRESS_THRESHOLD)
@@ -223,10 +214,6 @@ void nnTrain(
      const nnMatrix training_targets =
          nnMatrixBorrowRows((nnMatrix*)targets, sample, 1);
-      // Will need the input transposed for backpropagation.
-      // Assuming one training input per iteration for now.
-      nnMatrixTranspose(&training_inputs, &training_inputs_T);
      // Forward pass.
      nnQuery(net, query, &training_inputs);
@@ -240,14 +227,11 @@ void nnTrain(
      nnMatrixSub(
          training_outputs, &training_targets, &errors[net->num_layers - 1]);
-      // Update outputs_T, which we need during weight updates.
-      for (int l = 0; l < net->num_layers; ++l) {
-        nnMatrixTranspose(&query->layer_outputs[l], &outputs_T[l]);
-      }
      // Update weights and biases for each internal layer, back-propagating
      // errors along the way.
      for (int l = net->num_layers - 1; l >= 0; --l) {
+        const nnMatrix* layer_input =
+            (l == 0) ? &training_inputs : &query->layer_outputs[l - 1];
        const nnMatrix*     layer_output = &query->layer_outputs[l];
        nnGradientElements* elems        = &gradient_elems[l];
        nnMatrix*           gradient     = &elems->gradient;
@@ -310,10 +294,7 @@ void nnTrain(
          nnMatrix*     layer_biases  = &linear->biases;
          // Outer product to compute the weight deltas.
-          // This layer's input is the previous layer's output.
+          nnMatrixMulOuter(layer_input, gradient, &weight_deltas[l]);
-          const nnMatrix* input_T =
-              (l == 0) ? &training_inputs_T : &outputs_T[l - 1];
-          nnMatrixMul(input_T, gradient, &weight_deltas[l]);
          // Update weights.
          nnMatrixScale(&weight_deltas[l], params->learning_rate);
@@ -360,7 +341,6 @@ void nnTrain(
  // Clean up.
  for (int l = 0; l < net->num_layers; ++l) {
    nnMatrixDel(&errors[l]);
-    nnMatrixDel(&outputs_T[l]);
    nnMatrixDel(&weight_deltas[l]);
    nnGradientElements* elems = &gradient_elems[l];
@@ -378,9 +358,7 @@ void nnTrain(
      break;
    }
  }
-  nnMatrixDel(&training_inputs_T);
  free(errors);
-  free(outputs_T);
  free(weight_deltas);
  free(gradient_elems);
 }
author	3gg <3gg@shellblade.net>	2023-12-16 11:06:03 -0800
committer	3gg <3gg@shellblade.net>	2023-12-16 11:06:03 -0800
commit	dc538733da8d49e7240d00fb05517053076fe261 (patch)
tree	752cefb55f072bebbe716b8fa4e4df2baadc3138
parent	2067bd53b182429d059a61b0e060f92b4f317ed1 (diff)