diff options
| author | 3gg <3gg@shellblade.net> | 2023-12-16 11:06:03 -0800 |
|---|---|---|
| committer | 3gg <3gg@shellblade.net> | 2023-12-16 11:06:03 -0800 |
| commit | dc538733da8d49e7240d00fb05517053076fe261 (patch) | |
| tree | 752cefb55f072bebbe716b8fa4e4df2baadc3138 | |
| parent | 2067bd53b182429d059a61b0e060f92b4f317ed1 (diff) | |
Define vector outer product (nnMatrixMulOuter), which removes the need to transpose layer inputs during training.
| -rw-r--r-- | src/lib/include/neuralnet/matrix.h | 13 | ||||
| -rw-r--r-- | src/lib/src/matrix.c | 26 | ||||
| -rw-r--r-- | src/lib/src/train.c | 28 |
3 files changed, 39 insertions, 28 deletions
diff --git a/src/lib/include/neuralnet/matrix.h b/src/lib/include/neuralnet/matrix.h index f80b985..4cb0d25 100644 --- a/src/lib/include/neuralnet/matrix.h +++ b/src/lib/include/neuralnet/matrix.h | |||
| @@ -56,13 +56,20 @@ void nnMatrixInitConstant(nnMatrix*, R value); | |||
| 56 | /// Multiply two matrices. | 56 | /// Multiply two matrices. |
| 57 | void nnMatrixMul(const nnMatrix* left, const nnMatrix* right, nnMatrix* out); | 57 | void nnMatrixMul(const nnMatrix* left, const nnMatrix* right, nnMatrix* out); |
| 58 | 58 | ||
| 59 | /// Multiply two matrices, row variant. | 59 | /// Multiply two matrices, row-by-row variant. |
| 60 | /// | 60 | /// |
| 61 | /// This function multiples two matrices row-by-row instead of row-by-column. | 61 | /// This function multiples two matrices row-by-row instead of row-by-column, |
| 62 | /// nnMatrixMul(A, B, O) == nnMatrixMulRows(A, B^T, O). | 62 | /// which is equivalent to regular multiplication after transposing the right |
| 63 | /// hand matrix. | ||
| 64 | /// | ||
| 65 | /// nnMatrixMul(A, B, O) == nnMatrixMulRows(A, B^T, O). | ||
| 63 | void nnMatrixMulRows( | 66 | void nnMatrixMulRows( |
| 64 | const nnMatrix* left, const nnMatrix* right, nnMatrix* out); | 67 | const nnMatrix* left, const nnMatrix* right, nnMatrix* out); |
| 65 | 68 | ||
| 69 | /// Compute the outer product of two vectors. | ||
| 70 | void nnMatrixMulOuter( | ||
| 71 | const nnMatrix* left, const nnMatrix* right, nnMatrix* out); | ||
| 72 | |||
| 66 | /// Matrix multiply-add. | 73 | /// Matrix multiply-add. |
| 67 | /// | 74 | /// |
| 68 | /// out = left + (right * scale) | 75 | /// out = left + (right * scale) |
diff --git a/src/lib/src/matrix.c b/src/lib/src/matrix.c index d5c3fcc..29511eb 100644 --- a/src/lib/src/matrix.c +++ b/src/lib/src/matrix.c | |||
| @@ -189,6 +189,32 @@ void nnMatrixMulRows( | |||
| 189 | } | 189 | } |
| 190 | } | 190 | } |
| 191 | 191 | ||
| 192 | void nnMatrixMulOuter( | ||
| 193 | const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { | ||
| 194 | assert(left != 0); | ||
| 195 | assert(right != 0); | ||
| 196 | assert(out != 0); | ||
| 197 | assert(out != left); | ||
| 198 | assert(out != right); | ||
| 199 | assert((left->rows == 1) || (left->cols == 1)); // Vector. | ||
| 200 | assert((right->rows == 1) || (right->cols == 1)); // Vector. | ||
| 201 | const int N = left->rows * left->cols; | ||
| 202 | const int M = right->rows * right->cols; | ||
| 203 | assert((out->rows == N) && (out->cols == M)); | ||
| 204 | |||
| 205 | const R* left_value = left->values; | ||
| 206 | R* out_value = out->values; | ||
| 207 | |||
| 208 | for (int i = 0; i < N; ++i) { | ||
| 209 | const R* right_value = right->values; | ||
| 210 | |||
| 211 | for (int j = 0; j < M; ++j) { | ||
| 212 | *out_value++ = *left_value * *right_value++; | ||
| 213 | } | ||
| 214 | left_value++; | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 192 | void nnMatrixMulAdd( | 218 | void nnMatrixMulAdd( |
| 193 | const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) { | 219 | const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) { |
| 194 | assert(left); | 220 | assert(left); |
diff --git a/src/lib/src/train.c b/src/lib/src/train.c index ccff553..fe9f598 100644 --- a/src/lib/src/train.c +++ b/src/lib/src/train.c | |||
| @@ -153,14 +153,9 @@ void nnTrain( | |||
| 153 | nnGradientElements* gradient_elems = | 153 | nnGradientElements* gradient_elems = |
| 154 | calloc(net->num_layers, sizeof(nnGradientElements)); | 154 | calloc(net->num_layers, sizeof(nnGradientElements)); |
| 155 | 155 | ||
| 156 | // Allocate the output transpose vectors for weight delta calculation. | ||
| 157 | // This is one column vector per layer. | ||
| 158 | nnMatrix* outputs_T = calloc(net->num_layers, sizeof(nnMatrix)); | ||
| 159 | |||
| 160 | assert(errors != 0); | 156 | assert(errors != 0); |
| 161 | assert(weight_deltas != 0); | 157 | assert(weight_deltas != 0); |
| 162 | assert(gradient_elems); | 158 | assert(gradient_elems); |
| 163 | assert(outputs_T); | ||
| 164 | 159 | ||
| 165 | for (int l = 0; l < net->num_layers; ++l) { | 160 | for (int l = 0; l < net->num_layers; ++l) { |
| 166 | const int layer_input_size = nnLayerInputSize(net, l); | 161 | const int layer_input_size = nnLayerInputSize(net, l); |
| @@ -169,7 +164,6 @@ void nnTrain( | |||
| 169 | 164 | ||
| 170 | errors[l] = nnMatrixMake(1, layer_output_size); | 165 | errors[l] = nnMatrixMake(1, layer_output_size); |
| 171 | weight_deltas[l] = nnMatrixMake(layer_input_size, layer_output_size); | 166 | weight_deltas[l] = nnMatrixMake(layer_input_size, layer_output_size); |
| 172 | outputs_T[l] = nnMatrixMake(layer_output_size, 1); | ||
| 173 | 167 | ||
| 174 | // Allocate the gradient elements and vectors for weight delta calculation. | 168 | // Allocate the gradient elements and vectors for weight delta calculation. |
| 175 | nnGradientElements* elems = &gradient_elems[l]; | 169 | nnGradientElements* elems = &gradient_elems[l]; |
| @@ -199,9 +193,6 @@ void nnTrain( | |||
| 199 | // the outputs. | 193 | // the outputs. |
| 200 | const nnMatrix* const training_outputs = query->network_outputs; | 194 | const nnMatrix* const training_outputs = query->network_outputs; |
| 201 | 195 | ||
| 202 | // A vector to store the training input transposed. | ||
| 203 | nnMatrix training_inputs_T = nnMatrixMake(inputs->cols, 1); | ||
| 204 | |||
| 205 | // If debug mode is requested, we will show progress every Nth iteration. | 196 | // If debug mode is requested, we will show progress every Nth iteration. |
| 206 | const int progress_frame = | 197 | const int progress_frame = |
| 207 | (params->max_iterations < PROGRESS_THRESHOLD) | 198 | (params->max_iterations < PROGRESS_THRESHOLD) |
| @@ -223,10 +214,6 @@ void nnTrain( | |||
| 223 | const nnMatrix training_targets = | 214 | const nnMatrix training_targets = |
| 224 | nnMatrixBorrowRows((nnMatrix*)targets, sample, 1); | 215 | nnMatrixBorrowRows((nnMatrix*)targets, sample, 1); |
| 225 | 216 | ||
| 226 | // Will need the input transposed for backpropagation. | ||
| 227 | // Assuming one training input per iteration for now. | ||
| 228 | nnMatrixTranspose(&training_inputs, &training_inputs_T); | ||
| 229 | |||
| 230 | // Forward pass. | 217 | // Forward pass. |
| 231 | nnQuery(net, query, &training_inputs); | 218 | nnQuery(net, query, &training_inputs); |
| 232 | 219 | ||
| @@ -240,14 +227,11 @@ void nnTrain( | |||
| 240 | nnMatrixSub( | 227 | nnMatrixSub( |
| 241 | training_outputs, &training_targets, &errors[net->num_layers - 1]); | 228 | training_outputs, &training_targets, &errors[net->num_layers - 1]); |
| 242 | 229 | ||
| 243 | // Update outputs_T, which we need during weight updates. | ||
| 244 | for (int l = 0; l < net->num_layers; ++l) { | ||
| 245 | nnMatrixTranspose(&query->layer_outputs[l], &outputs_T[l]); | ||
| 246 | } | ||
| 247 | |||
| 248 | // Update weights and biases for each internal layer, back-propagating | 230 | // Update weights and biases for each internal layer, back-propagating |
| 249 | // errors along the way. | 231 | // errors along the way. |
| 250 | for (int l = net->num_layers - 1; l >= 0; --l) { | 232 | for (int l = net->num_layers - 1; l >= 0; --l) { |
| 233 | const nnMatrix* layer_input = | ||
| 234 | (l == 0) ? &training_inputs : &query->layer_outputs[l - 1]; | ||
| 251 | const nnMatrix* layer_output = &query->layer_outputs[l]; | 235 | const nnMatrix* layer_output = &query->layer_outputs[l]; |
| 252 | nnGradientElements* elems = &gradient_elems[l]; | 236 | nnGradientElements* elems = &gradient_elems[l]; |
| 253 | nnMatrix* gradient = &elems->gradient; | 237 | nnMatrix* gradient = &elems->gradient; |
| @@ -310,10 +294,7 @@ void nnTrain( | |||
| 310 | nnMatrix* layer_biases = &linear->biases; | 294 | nnMatrix* layer_biases = &linear->biases; |
| 311 | 295 | ||
| 312 | // Outer product to compute the weight deltas. | 296 | // Outer product to compute the weight deltas. |
| 313 | // This layer's input is the previous layer's output. | 297 | nnMatrixMulOuter(layer_input, gradient, &weight_deltas[l]); |
| 314 | const nnMatrix* input_T = | ||
| 315 | (l == 0) ? &training_inputs_T : &outputs_T[l - 1]; | ||
| 316 | nnMatrixMul(input_T, gradient, &weight_deltas[l]); | ||
| 317 | 298 | ||
| 318 | // Update weights. | 299 | // Update weights. |
| 319 | nnMatrixScale(&weight_deltas[l], params->learning_rate); | 300 | nnMatrixScale(&weight_deltas[l], params->learning_rate); |
| @@ -360,7 +341,6 @@ void nnTrain( | |||
| 360 | // Clean up. | 341 | // Clean up. |
| 361 | for (int l = 0; l < net->num_layers; ++l) { | 342 | for (int l = 0; l < net->num_layers; ++l) { |
| 362 | nnMatrixDel(&errors[l]); | 343 | nnMatrixDel(&errors[l]); |
| 363 | nnMatrixDel(&outputs_T[l]); | ||
| 364 | nnMatrixDel(&weight_deltas[l]); | 344 | nnMatrixDel(&weight_deltas[l]); |
| 365 | 345 | ||
| 366 | nnGradientElements* elems = &gradient_elems[l]; | 346 | nnGradientElements* elems = &gradient_elems[l]; |
| @@ -378,9 +358,7 @@ void nnTrain( | |||
| 378 | break; | 358 | break; |
| 379 | } | 359 | } |
| 380 | } | 360 | } |
| 381 | nnMatrixDel(&training_inputs_T); | ||
| 382 | free(errors); | 361 | free(errors); |
| 383 | free(outputs_T); | ||
| 384 | free(weight_deltas); | 362 | free(weight_deltas); |
| 385 | free(gradient_elems); | 363 | free(gradient_elems); |
| 386 | } | 364 | } |
