diff options
| -rw-r--r-- | CMakeLists.txt | 6 | ||||
| -rw-r--r-- | LICENSE | 2 | ||||
| -rw-r--r-- | src/bin/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | src/bin/mnist/CMakeLists.txt | 11 | ||||
| -rw-r--r-- | src/bin/mnist/src/main.c | 473 | ||||
| -rw-r--r-- | src/lib/CMakeLists.txt | 37 | ||||
| -rw-r--r-- | src/lib/include/neuralnet/matrix.h | 111 | ||||
| -rw-r--r-- | src/lib/include/neuralnet/neuralnet.h | 64 | ||||
| -rw-r--r-- | src/lib/include/neuralnet/train.h | 42 | ||||
| -rw-r--r-- | src/lib/include/neuralnet/types.h | 3 | ||||
| -rw-r--r-- | src/lib/src/activation.h | 21 | ||||
| -rw-r--r-- | src/lib/src/matrix.c | 298 | ||||
| -rw-r--r-- | src/lib/src/neuralnet.c | 228 | ||||
| -rw-r--r-- | src/lib/src/neuralnet_impl.h | 36 | ||||
| -rw-r--r-- | src/lib/src/train.c | 346 | ||||
| -rw-r--r-- | src/lib/test/matrix_test.c | 350 | ||||
| -rw-r--r-- | src/lib/test/neuralnet_test.c | 92 | ||||
| -rw-r--r-- | src/lib/test/test.h | 185 | ||||
| -rw-r--r-- | src/lib/test/test_main.c | 3 | ||||
| -rw-r--r-- | src/lib/test/test_util.h | 22 | ||||
| -rw-r--r-- | src/lib/test/train_linear_perceptron_non_origin_test.c | 67 | ||||
| -rw-r--r-- | src/lib/test/train_linear_perceptron_test.c | 62 | ||||
| -rw-r--r-- | src/lib/test/train_sigmoid_test.c | 66 | ||||
| -rw-r--r-- | src/lib/test/train_xor_test.c | 66 |
24 files changed, 2593 insertions, 1 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..a060fab --- /dev/null +++ b/CMakeLists.txt | |||
| @@ -0,0 +1,6 @@ | |||
| 1 | cmake_minimum_required(VERSION 3.0) | ||
| 2 | |||
| 3 | project(neuralnet) | ||
| 4 | |||
| 5 | add_subdirectory(src/lib) | ||
| 6 | add_subdirectory(src/bin) | ||
| @@ -1,7 +1,7 @@ | |||
| 1 | GNU AFFERO GENERAL PUBLIC LICENSE | 1 | GNU AFFERO GENERAL PUBLIC LICENSE |
| 2 | Version 3, 19 November 2007 | 2 | Version 3, 19 November 2007 |
| 3 | 3 | ||
| 4 | Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> | 4 | Copyright (C) 2022 Marc Sunet <https://shellblade.net/> |
| 5 | 5 | ||
| 6 | Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. | 6 | Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. |
| 7 | 7 | ||
diff --git a/src/bin/CMakeLists.txt b/src/bin/CMakeLists.txt new file mode 100644 index 0000000..051a56f --- /dev/null +++ b/src/bin/CMakeLists.txt | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | cmake_minimum_required(VERSION 3.0) | ||
| 2 | |||
| 3 | add_subdirectory(mnist) | ||
diff --git a/src/bin/mnist/CMakeLists.txt b/src/bin/mnist/CMakeLists.txt new file mode 100644 index 0000000..a6c54f2 --- /dev/null +++ b/src/bin/mnist/CMakeLists.txt | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | cmake_minimum_required(VERSION 3.0) | ||
| 2 | |||
| 3 | add_executable(mnist | ||
| 4 | src/main.c) | ||
| 5 | |||
| 6 | target_link_libraries(mnist PRIVATE | ||
| 7 | neuralnet | ||
| 8 | bsd | ||
| 9 | z) | ||
| 10 | |||
| 11 | target_compile_options(mnist PRIVATE -Wall -Wextra) | ||
diff --git a/src/bin/mnist/src/main.c b/src/bin/mnist/src/main.c new file mode 100644 index 0000000..4d268ac --- /dev/null +++ b/src/bin/mnist/src/main.c | |||
| @@ -0,0 +1,473 @@ | |||
| 1 | #include <neuralnet/matrix.h> | ||
| 2 | #include <neuralnet/neuralnet.h> | ||
| 3 | #include <neuralnet/train.h> | ||
| 4 | |||
| 5 | #include <zlib.h> | ||
| 6 | |||
| 7 | #include <assert.h> | ||
| 8 | #include <bsd/string.h> | ||
| 9 | #include <linux/limits.h> | ||
| 10 | #include <math.h> | ||
| 11 | #include <stdbool.h> | ||
| 12 | #include <stdint.h> | ||
| 13 | #include <stdio.h> | ||
| 14 | #include <stdlib.h> | ||
| 15 | |||
| 16 | static const int TRAIN_ITERATIONS = 100; | ||
| 17 | |||
| 18 | static const int32_t IMAGE_FILE_MAGIC = 0x00000803; | ||
| 19 | static const int32_t LABEL_FILE_MAGIC = 0x00000801; | ||
| 20 | |||
| 21 | // Inputs of 0 cancel weights during training. This value is used to rescale the | ||
| 22 | // input pixels from [0,255] to [PIXEL_LOWER_BOUND, 1.0]. | ||
| 23 | static const double PIXEL_LOWER_BOUND = 0.01; | ||
| 24 | |||
| 25 | // Scale the outputs to (0,1) since the sigmoid cannot produce 0 or 1. | ||
| 26 | static const double LABEL_LOWER_BOUND = 0.01; | ||
| 27 | static const double LABEL_UPPER_BOUND = 0.99; | ||
| 28 | |||
| 29 | // Epsilon used to compare R values. | ||
| 30 | static const double EPS = 1e-10; | ||
| 31 | |||
| 32 | #define min(a,b) ((a) < (b) ? (a) : (b)) | ||
| 33 | |||
| 34 | typedef struct ImageSet { | ||
| 35 | nnMatrix images; // Images flattened into row vectors of the matrix. | ||
| 36 | nnMatrix labels; // One-hot-encoded labels. | ||
| 37 | int count; // Number of images and labels. | ||
| 38 | int rows; // Rows in an image. | ||
| 39 | int cols; // Columns in an image. | ||
| 40 | } ImageSet; | ||
| 41 | |||
| 42 | static void usage(const char* argv0) { | ||
| 43 | fprintf(stderr, "Usage: %s <path to mnist files directory> [num images]\n", argv0); | ||
| 44 | fprintf(stderr, "\n"); | ||
| 45 | fprintf(stderr, " Use -1 for [num images] to use all the images in the data set\n"); | ||
| 46 | } | ||
| 47 | |||
| 48 | static bool R_eq(R a, R b) { | ||
| 49 | return fabs(a-b) <= EPS; | ||
| 50 | } | ||
| 51 | |||
| 52 | static void PrintImage(const nnMatrix* images, int rows, int cols, int image_index) { | ||
| 53 | assert(images); | ||
| 54 | assert((0 <= image_index) && (image_index < images->rows)); | ||
| 55 | |||
| 56 | // Top line. | ||
| 57 | for (int j = 0; j < cols/2; ++j) { | ||
| 58 | printf(" -"); | ||
| 59 | } | ||
| 60 | printf("\n"); | ||
| 61 | |||
| 62 | // Image. | ||
| 63 | const R* value = nnMatrixRow(images, image_index); | ||
| 64 | for (int i = 0; i < rows; ++i) { | ||
| 65 | printf("|"); | ||
| 66 | for (int j = 0; j < cols; ++j) { | ||
| 67 | if (*value > 0.8) { | ||
| 68 | printf("#"); | ||
| 69 | } else if (*value > 0.5) { | ||
| 70 | printf("*"); | ||
| 71 | } | ||
| 72 | else if (*value > PIXEL_LOWER_BOUND) { | ||
| 73 | printf(":"); | ||
| 74 | } else if (*value == 0.0) { | ||
| 75 | // Values should not be exactly 0, otherwise they cancel out weights | ||
| 76 | // during training. | ||
| 77 | printf("X"); | ||
| 78 | } else { | ||
| 79 | printf(" "); | ||
| 80 | } | ||
| 81 | value++; | ||
| 82 | } | ||
| 83 | printf("|\n"); | ||
| 84 | } | ||
| 85 | |||
| 86 | // Bottom line. | ||
| 87 | for (int j = 0; j < cols/2; ++j) { | ||
| 88 | printf(" -"); | ||
| 89 | } | ||
| 90 | printf("\n"); | ||
| 91 | } | ||
| 92 | |||
| 93 | static void PrintLabel(const nnMatrix* labels, int label_index) { | ||
| 94 | assert(labels); | ||
| 95 | assert((0 <= label_index) && (label_index < labels->rows)); | ||
| 96 | |||
| 97 | // Compute the label from the one-hot encoding. | ||
| 98 | const R* value = nnMatrixRow(labels, label_index); | ||
| 99 | int label = -1; | ||
| 100 | for (int i = 0; i < 10; ++i) { | ||
| 101 | if (R_eq(*value++, LABEL_UPPER_BOUND)) { | ||
| 102 | label = i; | ||
| 103 | break; | ||
| 104 | } | ||
| 105 | } | ||
| 106 | assert((0 <= label) && (label <= 9)); | ||
| 107 | |||
| 108 | printf("Label: %d ( ", label); | ||
| 109 | value = nnMatrixRow(labels, label_index); | ||
| 110 | for (int i = 0; i < 10; ++i) { | ||
| 111 | printf("%.3f ", *value++); | ||
| 112 | } | ||
| 113 | printf(")\n"); | ||
| 114 | } | ||
| 115 | |||
| 116 | static R lerp(R a, R b, R t) { | ||
| 117 | return a + t*(b-a); | ||
| 118 | } | ||
| 119 | |||
| 120 | /// Rescales a pixel from [0,255] to [PIXEL_LOWER_BOUND, 1.0]. | ||
| 121 | static R FormatPixel(uint8_t pixel) { | ||
| 122 | const R value = (R)(pixel) / 255.0 * (1.0 - PIXEL_LOWER_BOUND) + PIXEL_LOWER_BOUND; | ||
| 123 | assert(value >= PIXEL_LOWER_BOUND); | ||
| 124 | assert(value <= 1.0); | ||
| 125 | return value; | ||
| 126 | } | ||
| 127 | |||
| 128 | /// Rescales a one-hot-encoded label value to (0,1). | ||
| 129 | static R FormatLabel(R label) { | ||
| 130 | const R value = lerp(LABEL_LOWER_BOUND, LABEL_UPPER_BOUND, label); | ||
| 131 | assert(value > 0.0); | ||
| 132 | assert(value < 1.0); | ||
| 133 | return value; | ||
| 134 | } | ||
| 135 | |||
| 136 | static int32_t ReverseEndian32(int32_t x) { | ||
| 137 | const int32_t x0 = x & 0xff; | ||
| 138 | const int32_t x1 = (x >> 8) & 0xff; | ||
| 139 | const int32_t x2 = (x >> 16) & 0xff; | ||
| 140 | const int32_t x3 = (x >> 24) & 0xff; | ||
| 141 | return (x0 << 24) | (x1 << 16) | (x2 << 8) | x3; | ||
| 142 | } | ||
| 143 | |||
| 144 | static void ImageToMatrix( | ||
| 145 | const uint8_t* pixels, int num_pixels, int row, nnMatrix* images) { | ||
| 146 | assert(pixels); | ||
| 147 | assert(images); | ||
| 148 | |||
| 149 | for (int i = 0; i < num_pixels; ++i) { | ||
| 150 | const R pixel = FormatPixel(pixels[i]); | ||
| 151 | nnMatrixSet(images, row, i, pixel); | ||
| 152 | } | ||
| 153 | } | ||
| 154 | |||
| 155 | static bool ReadImages(gzFile images_file, int max_num_images, ImageSet* image_set) { | ||
| 156 | assert(images_file != Z_NULL); | ||
| 157 | assert(image_set); | ||
| 158 | |||
| 159 | bool success = false; | ||
| 160 | |||
| 161 | uint8_t* pixels = 0; | ||
| 162 | |||
| 163 | int32_t magic, total_images, rows, cols; | ||
| 164 | if ( (gzread(images_file, (char*)&magic, sizeof(int32_t)) != sizeof(int32_t)) || | ||
| 165 | (gzread(images_file, (char*)&total_images, sizeof(int32_t)) != sizeof(int32_t)) || | ||
| 166 | (gzread(images_file, (char*)&rows, sizeof(int32_t)) != sizeof(int32_t)) || | ||
| 167 | (gzread(images_file, (char*)&cols, sizeof(int32_t)) != sizeof(int32_t)) ) { | ||
| 168 | fprintf(stderr, "Failed to read header\n"); | ||
| 169 | goto cleanup; | ||
| 170 | } | ||
| 171 | |||
| 172 | magic = ReverseEndian32(magic); | ||
| 173 | total_images = ReverseEndian32(total_images); | ||
| 174 | rows = ReverseEndian32(rows); | ||
| 175 | cols = ReverseEndian32(cols); | ||
| 176 | |||
| 177 | if (magic != IMAGE_FILE_MAGIC) { | ||
| 178 | fprintf(stderr, "Magic number mismatch. Got %x, expected: %x\n", | ||
| 179 | magic, IMAGE_FILE_MAGIC); | ||
| 180 | goto cleanup; | ||
| 181 | } | ||
| 182 | |||
| 183 | printf("Magic: %.8x\nTotal images: %d\nRows: %d\nCols: %d\n", | ||
| 184 | magic, total_images, rows, cols); | ||
| 185 | |||
| 186 | total_images = max_num_images >= 0 ? min(total_images, max_num_images) : total_images; | ||
| 187 | |||
| 188 | // Images are flattened into single row vectors. | ||
| 189 | const int num_pixels = rows * cols; | ||
| 190 | image_set->images = nnMatrixMake(total_images, num_pixels); | ||
| 191 | image_set->count = total_images; | ||
| 192 | image_set->rows = rows; | ||
| 193 | image_set->cols = cols; | ||
| 194 | |||
| 195 | pixels = calloc(1, num_pixels); | ||
| 196 | if (!pixels) { | ||
| 197 | fprintf(stderr, "Failed to allocate image buffer\n"); | ||
| 198 | goto cleanup; | ||
| 199 | } | ||
| 200 | |||
| 201 | for (int i = 0; i < total_images; ++i) { | ||
| 202 | const int bytes_read = gzread(images_file, pixels, num_pixels); | ||
| 203 | if (bytes_read < num_pixels) { | ||
| 204 | fprintf(stderr, "Failed to read image %d\n", i); | ||
| 205 | goto cleanup; | ||
| 206 | } | ||
| 207 | ImageToMatrix(pixels, num_pixels, i, &image_set->images); | ||
| 208 | } | ||
| 209 | |||
| 210 | success = true; | ||
| 211 | |||
| 212 | cleanup: | ||
| 213 | if (pixels) { | ||
| 214 | free(pixels); | ||
| 215 | } | ||
| 216 | if (!success) { | ||
| 217 | nnMatrixDel(&image_set->images); | ||
| 218 | } | ||
| 219 | return success; | ||
| 220 | } | ||
| 221 | |||
| 222 | static void OneHotEncode(const uint8_t* labels_bytes, int num_labels, nnMatrix* labels) { | ||
| 223 | assert(labels_bytes); | ||
| 224 | assert(labels); | ||
| 225 | assert(labels->rows == num_labels); | ||
| 226 | assert(labels->cols == 10); | ||
| 227 | |||
| 228 | static const R one_hot[10][10] = { | ||
| 229 | { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, | ||
| 230 | { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, | ||
| 231 | { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }, | ||
| 232 | { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, | ||
| 233 | { 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 }, | ||
| 234 | { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, | ||
| 235 | { 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }, | ||
| 236 | { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, | ||
| 237 | { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 }, | ||
| 238 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, | ||
| 239 | }; | ||
| 240 | |||
| 241 | R* value = labels->values; | ||
| 242 | |||
| 243 | for (int i = 0; i < num_labels; ++i) { | ||
| 244 | const uint8_t label = labels_bytes[i]; | ||
| 245 | const R* one_hot_value = one_hot[label]; | ||
| 246 | |||
| 247 | for (int j = 0; j < 10; ++j) { | ||
| 248 | *value++ = FormatLabel(*one_hot_value++); | ||
| 249 | } | ||
| 250 | } | ||
| 251 | } | ||
| 252 | |||
| 253 | static int OneHotDecode(const nnMatrix* label_matrix) { | ||
| 254 | assert(label_matrix); | ||
| 255 | assert(label_matrix->cols == 1); | ||
| 256 | assert(label_matrix->rows == 10); | ||
| 257 | |||
| 258 | R max_value = 0; | ||
| 259 | int pos_max = 0; | ||
| 260 | for (int i = 0; i < 10; ++i) { | ||
| 261 | const R value = nnMatrixAt(label_matrix, 0, i); | ||
| 262 | if (value > max_value) { | ||
| 263 | max_value = value; | ||
| 264 | pos_max = i; | ||
| 265 | } | ||
| 266 | } | ||
| 267 | assert(pos_max >= 0); | ||
| 268 | assert(pos_max <= 10); | ||
| 269 | return pos_max; | ||
| 270 | } | ||
| 271 | |||
| 272 | static bool ReadLabels(gzFile labels_file, int max_num_labels, ImageSet* image_set) { | ||
| 273 | assert(labels_file != Z_NULL); | ||
| 274 | assert(image_set != 0); | ||
| 275 | |||
| 276 | bool success = false; | ||
| 277 | |||
| 278 | uint8_t* labels = 0; | ||
| 279 | |||
| 280 | int32_t magic, total_labels; | ||
| 281 | if ( (gzread(labels_file, (char*)&magic, sizeof(int32_t)) != sizeof(int32_t)) || | ||
| 282 | (gzread(labels_file, (char*)&total_labels, sizeof(int32_t)) != sizeof(int32_t)) ) { | ||
| 283 | fprintf(stderr, "Failed to read header\n"); | ||
| 284 | goto cleanup; | ||
| 285 | } | ||
| 286 | |||
| 287 | magic = ReverseEndian32(magic); | ||
| 288 | total_labels = ReverseEndian32(total_labels); | ||
| 289 | |||
| 290 | if (magic != LABEL_FILE_MAGIC) { | ||
| 291 | fprintf(stderr, "Magic number mismatch. Got %x, expected: %x\n", | ||
| 292 | magic, LABEL_FILE_MAGIC); | ||
| 293 | goto cleanup; | ||
| 294 | } | ||
| 295 | |||
| 296 | printf("Magic: %.8x\nTotal labels: %d\n", magic, total_labels); | ||
| 297 | |||
| 298 | total_labels = max_num_labels >= 0 ? min(total_labels, max_num_labels) : total_labels; | ||
| 299 | |||
| 300 | assert(image_set->count == total_labels); | ||
| 301 | |||
| 302 | // One-hot encoding of labels, 10 values (digits) per label. | ||
| 303 | image_set->labels = nnMatrixMake(total_labels, 10); | ||
| 304 | |||
| 305 | labels = calloc(total_labels, sizeof(uint8_t)); | ||
| 306 | if (!labels) { | ||
| 307 | fprintf(stderr, "Failed to allocate labels buffer\n"); | ||
| 308 | goto cleanup; | ||
| 309 | } | ||
| 310 | |||
| 311 | if (gzread(labels_file, labels, total_labels * sizeof(uint8_t)) != total_labels) { | ||
| 312 | fprintf(stderr, "Failed to read labels\n"); | ||
| 313 | goto cleanup; | ||
| 314 | } | ||
| 315 | |||
| 316 | OneHotEncode(labels, total_labels, &image_set->labels); | ||
| 317 | |||
| 318 | success = true; | ||
| 319 | |||
| 320 | cleanup: | ||
| 321 | if (labels) { | ||
| 322 | free(labels); | ||
| 323 | } | ||
| 324 | if (!success) { | ||
| 325 | nnMatrixDel(&image_set->labels); | ||
| 326 | } | ||
| 327 | return success; | ||
| 328 | } | ||
| 329 | |||
| 330 | int main(int argc, const char** argv) { | ||
| 331 | if (argc < 2) { | ||
| 332 | usage(argv[0]); | ||
| 333 | return 1; | ||
| 334 | } | ||
| 335 | |||
| 336 | bool success = false; | ||
| 337 | |||
| 338 | gzFile train_images_file = Z_NULL; | ||
| 339 | gzFile train_labels_file = Z_NULL; | ||
| 340 | gzFile test_images_file = Z_NULL; | ||
| 341 | gzFile test_labels_file = Z_NULL; | ||
| 342 | ImageSet train_set = { 0 }; | ||
| 343 | ImageSet test_set = { 0 }; | ||
| 344 | nnNeuralNetwork* net = 0; | ||
| 345 | nnQueryObject* query = 0; | ||
| 346 | |||
| 347 | const char* mnist_files_dir = argv[1]; | ||
| 348 | const int max_num_images = argc > 2 ? atoi(argv[2]) : -1; | ||
| 349 | |||
| 350 | char train_labels_path[PATH_MAX]; | ||
| 351 | char train_images_path[PATH_MAX]; | ||
| 352 | char test_labels_path[PATH_MAX]; | ||
| 353 | char test_images_path[PATH_MAX]; | ||
| 354 | strlcpy(train_labels_path, mnist_files_dir, PATH_MAX); | ||
| 355 | strlcpy(train_images_path, mnist_files_dir, PATH_MAX); | ||
| 356 | strlcpy(test_labels_path, mnist_files_dir, PATH_MAX); | ||
| 357 | strlcpy(test_images_path, mnist_files_dir, PATH_MAX); | ||
| 358 | strlcat(train_labels_path, "/train-labels-idx1-ubyte.gz", PATH_MAX); | ||
| 359 | strlcat(train_images_path, "/train-images-idx3-ubyte.gz", PATH_MAX); | ||
| 360 | strlcat(test_labels_path, "/t10k-labels-idx1-ubyte.gz", PATH_MAX); | ||
| 361 | strlcat(test_images_path, "/t10k-images-idx3-ubyte.gz", PATH_MAX); | ||
| 362 | |||
| 363 | train_images_file = gzopen(train_images_path, "r"); | ||
| 364 | if (train_images_file == Z_NULL) { | ||
| 365 | fprintf(stderr, "Failed to open file: %s\n", train_images_path); | ||
| 366 | goto cleanup; | ||
| 367 | } | ||
| 368 | |||
| 369 | train_labels_file = gzopen(train_labels_path, "r"); | ||
| 370 | if (train_labels_file == Z_NULL) { | ||
| 371 | fprintf(stderr, "Failed to open file: %s\n", train_labels_path); | ||
| 372 | goto cleanup; | ||
| 373 | } | ||
| 374 | |||
| 375 | test_images_file = gzopen(test_images_path, "r"); | ||
| 376 | if (test_images_file == Z_NULL) { | ||
| 377 | fprintf(stderr, "Failed to open file: %s\n", test_images_path); | ||
| 378 | goto cleanup; | ||
| 379 | } | ||
| 380 | |||
| 381 | test_labels_file = gzopen(test_labels_path, "r"); | ||
| 382 | if (test_labels_file == Z_NULL) { | ||
| 383 | fprintf(stderr, "Failed to open file: %s\n", test_labels_path); | ||
| 384 | goto cleanup; | ||
| 385 | } | ||
| 386 | |||
| 387 | if (!ReadImages(train_images_file, max_num_images, &train_set)) { | ||
| 388 | goto cleanup; | ||
| 389 | } | ||
| 390 | if (!ReadLabels(train_labels_file, max_num_images, &train_set)) { | ||
| 391 | goto cleanup; | ||
| 392 | } | ||
| 393 | |||
| 394 | if (!ReadImages(test_images_file, max_num_images, &test_set)) { | ||
| 395 | goto cleanup; | ||
| 396 | } | ||
| 397 | if (!ReadLabels(test_labels_file, max_num_images, &test_set)) { | ||
| 398 | goto cleanup; | ||
| 399 | } | ||
| 400 | |||
| 401 | printf("\nTraining image/label pair examples:\n"); | ||
| 402 | for (int i = 0; i < min(3, train_set.images.rows); ++i) { | ||
| 403 | PrintImage(&train_set.images, train_set.rows, train_set.cols, i); | ||
| 404 | PrintLabel(&train_set.labels, i); | ||
| 405 | printf("\n"); | ||
| 406 | } | ||
| 407 | |||
| 408 | // Network definition. | ||
| 409 | const int image_size_pixels = train_set.rows * train_set.cols; | ||
| 410 | const int num_layers = 2; | ||
| 411 | const int layer_sizes[3] = { image_size_pixels, 100, 10 }; | ||
| 412 | const nnActivation layer_activations[2] = { nnSigmoid, nnSigmoid }; | ||
| 413 | if (!(net = nnMakeNet(num_layers, layer_sizes, layer_activations))) { | ||
| 414 | fprintf(stderr, "Failed to create neural network\n"); | ||
| 415 | goto cleanup; | ||
| 416 | } | ||
| 417 | |||
| 418 | // Train. | ||
| 419 | printf("Training with up to %d images from the data set\n\n", max_num_images); | ||
| 420 | const nnTrainingParams training_params = { | ||
| 421 | .learning_rate = 0.1, | ||
| 422 | .max_iterations = TRAIN_ITERATIONS, | ||
| 423 | .seed = 0, | ||
| 424 | .weight_init = nnWeightInitNormal, | ||
| 425 | .debug = true, | ||
| 426 | }; | ||
| 427 | nnTrain(net, &train_set.images, &train_set.labels, &training_params); | ||
| 428 | |||
| 429 | // Test. | ||
| 430 | int hits = 0; | ||
| 431 | query = nnMakeQueryObject(net, /*num_inputs=*/1); | ||
| 432 | for (int i = 0; i < test_set.count; ++i) { | ||
| 433 | const nnMatrix test_image = nnMatrixBorrowRows(&test_set.images, i, 1); | ||
| 434 | const nnMatrix test_label = nnMatrixBorrowRows(&test_set.labels, i, 1); | ||
| 435 | |||
| 436 | nnQuery(net, query, &test_image); | ||
| 437 | |||
| 438 | const int test_label_expected = OneHotDecode(&test_label); | ||
| 439 | const int test_label_actual = OneHotDecode(nnNetOutputs(query)); | ||
| 440 | |||
| 441 | if (test_label_actual == test_label_expected) { | ||
| 442 | ++hits; | ||
| 443 | } | ||
| 444 | } | ||
| 445 | const R hit_ratio = (R)hits / (R)test_set.count; | ||
| 446 | printf("Test images: %d\n", test_set.count); | ||
| 447 | printf("Hits: %d/%d (%.3f%%)\n", hits, test_set.count, hit_ratio*100); | ||
| 448 | |||
| 449 | success = true; | ||
| 450 | |||
| 451 | cleanup: | ||
| 452 | if (query) { | ||
| 453 | nnDeleteQueryObject(&query); | ||
| 454 | } | ||
| 455 | if (net) { | ||
| 456 | nnDeleteNet(&net); | ||
| 457 | } | ||
| 458 | nnMatrixDel(&train_set.images); | ||
| 459 | nnMatrixDel(&test_set.images); | ||
| 460 | if (train_images_file != Z_NULL) { | ||
| 461 | gzclose(train_images_file); | ||
| 462 | } | ||
| 463 | if (train_labels_file != Z_NULL) { | ||
| 464 | gzclose(train_labels_file); | ||
| 465 | } | ||
| 466 | if (test_images_file != Z_NULL) { | ||
| 467 | gzclose(test_images_file); | ||
| 468 | } | ||
| 469 | if (test_labels_file != Z_NULL) { | ||
| 470 | gzclose(test_labels_file); | ||
| 471 | } | ||
| 472 | return success ? 0 : 1; | ||
| 473 | } | ||
diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt new file mode 100644 index 0000000..9e0e924 --- /dev/null +++ b/src/lib/CMakeLists.txt | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | cmake_minimum_required(VERSION 3.0) | ||
| 2 | |||
| 3 | # Library | ||
| 4 | |||
| 5 | add_library(neuralnet | ||
| 6 | src/matrix.c | ||
| 7 | src/neuralnet.c | ||
| 8 | src/train.c) | ||
| 9 | |||
| 10 | target_include_directories(neuralnet PUBLIC | ||
| 11 | include) | ||
| 12 | |||
| 13 | target_link_libraries(neuralnet PRIVATE | ||
| 14 | math # System math library. | ||
| 15 | random) | ||
| 16 | |||
| 17 | target_compile_options(neuralnet PRIVATE -Wall -Wextra) | ||
| 18 | |||
| 19 | # Test | ||
| 20 | |||
| 21 | add_executable(neuralnet-test | ||
| 22 | test/matrix_test.c | ||
| 23 | test/neuralnet_test.c | ||
| 24 | test/test_main.c | ||
| 25 | test/train_linear_perceptron_test.c | ||
| 26 | test/train_linear_perceptron_non_origin_test.c | ||
| 27 | test/train_sigmoid_test.c | ||
| 28 | test/train_xor_test.c) | ||
| 29 | |||
| 30 | target_link_libraries(neuralnet-test PRIVATE | ||
| 31 | neuralnet) | ||
| 32 | |||
| 33 | # So that we can include header files from the private implementation. | ||
| 34 | target_include_directories(neuralnet-test PRIVATE | ||
| 35 | src) | ||
| 36 | |||
| 37 | target_compile_options(neuralnet-test PRIVATE -DUNIT_TEST -Wall -Wextra) | ||
diff --git a/src/lib/include/neuralnet/matrix.h b/src/lib/include/neuralnet/matrix.h new file mode 100644 index 0000000..9816b81 --- /dev/null +++ b/src/lib/include/neuralnet/matrix.h | |||
| @@ -0,0 +1,111 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <neuralnet/types.h> | ||
| 4 | |||
| 5 | #include <assert.h> | ||
| 6 | |||
| 7 | /// NxM matrix. | ||
| 8 | typedef struct nnMatrix { | ||
| 9 | int rows; | ||
| 10 | int cols; | ||
| 11 | R* values; | ||
| 12 | } nnMatrix; | ||
| 13 | |||
| 14 | /// Construct a matrix. | ||
| 15 | nnMatrix nnMatrixMake(int rows, int cols); | ||
| 16 | |||
| 17 | /// Delete a matrix and free its internal memory. | ||
| 18 | void nnMatrixDel(nnMatrix*); | ||
| 19 | |||
| 20 | /// Move a matrix. | ||
| 21 | /// | ||
| 22 | /// |in| is an empty matrix after the move. | ||
| 23 | /// |out| is a matrix like |in| before the move. | ||
| 24 | void nnMatrixMove(nnMatrix* in, nnMatrix* out); | ||
| 25 | |||
| 26 | /// Deep-copy a matrix. | ||
| 27 | void nnMatrixCopy(const nnMatrix* in, nnMatrix* out); | ||
| 28 | |||
| 29 | /// Write the matrix values into an array in a row-major fashion. | ||
| 30 | void nnMatrixToArray(const nnMatrix* in, R* out); | ||
| 31 | |||
| 32 | /// Write the given row of a matrix into an array. | ||
| 33 | void nnMatrixRowToArray(const nnMatrix* in, int row, R* out); | ||
| 34 | |||
| 35 | /// Copy a column from a source to a target matrix. | ||
| 36 | void nnMatrixCopyCol(const nnMatrix* in, nnMatrix* out, int col_in, int col_out); | ||
| 37 | |||
| 38 | /// Mutable borrow of a matrix. | ||
| 39 | nnMatrix nnMatrixBorrow(nnMatrix* in); | ||
| 40 | |||
| 41 | /// Mutable borrow of a subrange of rows of a matrix. | ||
| 42 | nnMatrix nnMatrixBorrowRows(nnMatrix* in, int row_start, int num_rows); | ||
| 43 | |||
| 44 | /// Initialize the matrix from an array of values. | ||
| 45 | /// | ||
| 46 | /// The array must hold values in a row-major fashion. | ||
| 47 | void nnMatrixInit(nnMatrix*, const R* values); | ||
| 48 | |||
| 49 | /// Initialize all matrix values to a given constant. | ||
| 50 | void nnMatrixInitConstant(nnMatrix*, R value); | ||
| 51 | |||
| 52 | /// Multiply two matrices. | ||
| 53 | void nnMatrixMul(const nnMatrix* left, const nnMatrix* right, nnMatrix* out); | ||
| 54 | |||
| 55 | /// Matrix multiply-add. | ||
| 56 | /// | ||
| 57 | /// out = left + (right * scale) | ||
| 58 | void nnMatrixMulAdd(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out); | ||
| 59 | |||
| 60 | /// Matrix multiply-subtract. | ||
| 61 | /// | ||
| 62 | /// out = left - (right * scale) | ||
| 63 | void nnMatrixMulSub(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out); | ||
| 64 | |||
| 65 | /// Hadamard product of two matrices. | ||
| 66 | void nnMatrixMulPairs(const nnMatrix* left, const nnMatrix* right, nnMatrix* out); | ||
| 67 | |||
| 68 | /// Add two matrices. | ||
| 69 | void nnMatrixAdd(const nnMatrix* left, const nnMatrix* right, nnMatrix* out); | ||
| 70 | |||
| 71 | /// Subtract two matrices. | ||
| 72 | void nnMatrixSub(const nnMatrix* left, const nnMatrix* right, nnMatrix* out); | ||
| 73 | |||
| 74 | /// Adds a row vector to all rows of the matrix. | ||
| 75 | void nnMatrixAddRow(const nnMatrix* matrix, const nnMatrix* row, nnMatrix* out); | ||
| 76 | |||
| 77 | /// Scale a matrix. | ||
| 78 | void nnMatrixScale(nnMatrix*, R scale); | ||
| 79 | |||
| 80 | /// Transpose a matrix. | ||
| 81 | /// |in| must be different than |out|. | ||
| 82 | void nnMatrixTranspose(const nnMatrix* in, nnMatrix* out); | ||
| 83 | |||
| 84 | /// Threshold the values of a matrix using a greater-than operator. | ||
| 85 | /// | ||
| 86 | /// out[x,y] = 1 if in[x,y] > threshold else 0 | ||
| 87 | void nnMatrixGt(const nnMatrix* in, R threshold, nnMatrix* out); | ||
| 88 | |||
| 89 | /// Return the matrix value at the given row and column. | ||
| 90 | static inline R nnMatrixAt(const nnMatrix* matrix, int row, int col) { | ||
| 91 | assert(matrix); | ||
| 92 | return matrix->values[row * matrix->cols + col]; | ||
| 93 | } | ||
| 94 | |||
| 95 | /// Set the matrix value at the given row and column. | ||
| 96 | static inline void nnMatrixSet(nnMatrix* matrix, int row, int col, R value) { | ||
| 97 | assert(matrix); | ||
| 98 | matrix->values[row * matrix->cols + col] = value; | ||
| 99 | } | ||
| 100 | |||
| 101 | /// Return a pointer to the given row in the matrix. | ||
| 102 | static inline const R* nnMatrixRow(const nnMatrix* matrix, int row) { | ||
| 103 | assert(matrix); | ||
| 104 | return &matrix->values[row * matrix->cols]; | ||
| 105 | } | ||
| 106 | |||
| 107 | /// Return a mutable pointer to the given row in the matrix. | ||
| 108 | static inline R* nnMatrixRow_mut(nnMatrix* matrix, int row) { | ||
| 109 | assert(matrix); | ||
| 110 | return &matrix->values[row * matrix->cols]; | ||
| 111 | } | ||
diff --git a/src/lib/include/neuralnet/neuralnet.h b/src/lib/include/neuralnet/neuralnet.h new file mode 100644 index 0000000..1cf1c53 --- /dev/null +++ b/src/lib/include/neuralnet/neuralnet.h | |||
| @@ -0,0 +1,64 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <neuralnet/types.h> | ||
| 4 | |||
| 5 | typedef struct nnMatrix nnMatrix; | ||
| 6 | |||
| 7 | typedef struct nnNeuralNetwork nnNeuralNetwork; | ||
| 8 | typedef struct nnQueryObject nnQueryObject; | ||
| 9 | |||
| 10 | /// Neuron activation. | ||
| 11 | typedef enum nnActivation { | ||
| 12 | nnIdentity, | ||
| 13 | nnSigmoid, | ||
| 14 | nnRelu, | ||
| 15 | } nnActivation; | ||
| 16 | |||
| 17 | /// Create a network. | ||
| 18 | nnNeuralNetwork* nnMakeNet(int num_layers, const int* layer_sizes, const nnActivation* activations); | ||
| 19 | |||
| 20 | /// Delete the network and free its internal memory. | ||
| 21 | void nnDeleteNet(nnNeuralNetwork**); | ||
| 22 | |||
| 23 | /// Set the network's weights. | ||
| 24 | void nnSetWeights(nnNeuralNetwork*, const R* weights); | ||
| 25 | |||
| 26 | /// Set the network's biases. | ||
| 27 | void nnSetBiases(nnNeuralNetwork*, const R* biases); | ||
| 28 | |||
| 29 | /// Query the network. | ||
| 30 | /// | ||
| 31 | /// |input| is a matrix of inputs, one row per input and as many columns as the | ||
| 32 | /// input's dimension. | ||
| 33 | /// | ||
| 34 | /// The query object's output matrix (see nnQueryOutputs()) is a matrix of | ||
| 35 | /// outputs, one row per output and as many columns as the output's dimension. | ||
| 36 | void nnQuery(const nnNeuralNetwork*, nnQueryObject*, const nnMatrix* input); | ||
| 37 | |||
| 38 | /// Query the network, array version. | ||
| 39 | void nnQueryArray(const nnNeuralNetwork*, nnQueryObject*, const R* input, R* output); | ||
| 40 | |||
| 41 | /// Create a query object. | ||
| 42 | /// | ||
| 43 | /// The query object holds all the internal memory required to query a network. | ||
| 44 | /// Query objects allocate all memory up front so that network queries can run | ||
| 45 | /// without additional memory allocation. | ||
| 46 | nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork*, int num_inputs); | ||
| 47 | |||
| 48 | /// Delete the query object and free its internal memory. | ||
| 49 | void nnDeleteQueryObject(nnQueryObject**); | ||
| 50 | |||
| 51 | /// Return the outputs of the query. | ||
| 52 | const nnMatrix* nnNetOutputs(const nnQueryObject*); | ||
| 53 | |||
| 54 | /// Return the network's input size. | ||
| 55 | int nnNetInputSize(const nnNeuralNetwork*); | ||
| 56 | |||
| 57 | /// Return the network's output size. | ||
| 58 | int nnNetOutputSize(const nnNeuralNetwork*); | ||
| 59 | |||
| 60 | /// Return the layer's input size. | ||
| 61 | int nnLayerInputSize(const nnMatrix* weights); | ||
| 62 | |||
| 63 | /// Return the layer's output size. | ||
| 64 | int nnLayerOutputSize(const nnMatrix* weights); | ||
diff --git a/src/lib/include/neuralnet/train.h b/src/lib/include/neuralnet/train.h new file mode 100644 index 0000000..79f8e7b --- /dev/null +++ b/src/lib/include/neuralnet/train.h | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <neuralnet/neuralnet.h> | ||
| 4 | |||
| 5 | #include <stdbool.h> | ||
| 6 | #include <stdint.h> | ||
| 7 | |||
| 8 | typedef struct nnMatrix nnMatrix; | ||
| 9 | |||
| 10 | /// Weight initialization strategy. | ||
| 11 | /// | ||
| 12 | /// Note that regardless of strategy, a layer's weights are scaled by the | ||
| 13 | /// layer's size. This is to avoid saturation when, e.g., using a sigmoid | ||
| 14 | /// activation with many inputs. Thus, a (0,1) initialization is really | ||
| 15 | /// (0,scale), for example. | ||
| 16 | typedef enum nnWeightInitStrategy { | ||
| 17 | nnWeightInit01, // (0,1) range. | ||
| 18 | nnWeightInit11, // (-1,+1) range. | ||
| 19 | nnWeightInitNormal, // Normal distribution. | ||
| 20 | } nnWeightInitStrategy; | ||
| 21 | |||
| 22 | /// Network training parameters. | ||
| 23 | typedef struct nnTrainingParams { | ||
| 24 | R learning_rate; | ||
| 25 | int max_iterations; | ||
| 26 | uint64_t seed; | ||
| 27 | nnWeightInitStrategy weight_init; | ||
| 28 | bool debug; | ||
| 29 | } nnTrainingParams; | ||
| 30 | |||
| 31 | /// Train the network. | ||
| 32 | /// | ||
| 33 | /// |inputs| is a matrix of inputs, one row per input and as many columns as | ||
| 34 | /// the input's dimension. | ||
| 35 | /// | ||
| 36 | /// |targets| is a matrix of targets, one row per target and as many columns as | ||
| 37 | /// the target's dimension. | ||
| 38 | void nnTrain( | ||
| 39 | nnNeuralNetwork*, | ||
| 40 | const nnMatrix* inputs, | ||
| 41 | const nnMatrix* targets, | ||
| 42 | const nnTrainingParams*); | ||
diff --git a/src/lib/include/neuralnet/types.h b/src/lib/include/neuralnet/types.h new file mode 100644 index 0000000..e8d3942 --- /dev/null +++ b/src/lib/include/neuralnet/types.h | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | typedef double R; | ||
diff --git a/src/lib/src/activation.h b/src/lib/src/activation.h new file mode 100644 index 0000000..42ab73f --- /dev/null +++ b/src/lib/src/activation.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <neuralnet/types.h> | ||
| 4 | |||
| 5 | #include <math.h> | ||
| 6 | |||
| 7 | static inline R sigmoid(R x) { | ||
| 8 | return 1. / (1. + exp(-x)); | ||
| 9 | } | ||
| 10 | |||
| 11 | static inline R relu(R x) { | ||
| 12 | return fmax(0, x); | ||
| 13 | } | ||
| 14 | |||
| 15 | #define NN_MAP_ARRAY(f, in, out, size) \ | ||
| 16 | for (int i = 0; i < size; ++i) { \ | ||
| 17 | out[i] = f(in[i]); \ | ||
| 18 | } | ||
| 19 | |||
| 20 | #define sigmoid_array(in, out, size) NN_MAP_ARRAY(sigmoid, in, out, size) | ||
| 21 | #define relu_array(in, out, size) NN_MAP_ARRAY(relu, in, out, size) | ||
diff --git a/src/lib/src/matrix.c b/src/lib/src/matrix.c new file mode 100644 index 0000000..a7a4ce6 --- /dev/null +++ b/src/lib/src/matrix.c | |||
| @@ -0,0 +1,298 @@ | |||
| 1 | #include <neuralnet/matrix.h> | ||
| 2 | |||
| 3 | #include <assert.h> | ||
| 4 | #include <stdlib.h> | ||
| 5 | #include <string.h> | ||
| 6 | |||
| 7 | nnMatrix nnMatrixMake(int rows, int cols) { | ||
| 8 | R* values = calloc(rows * cols, sizeof(R)); | ||
| 9 | assert(values != 0); | ||
| 10 | |||
| 11 | return (nnMatrix) { | ||
| 12 | .rows = rows, | ||
| 13 | .cols = cols, | ||
| 14 | .values = values, | ||
| 15 | }; | ||
| 16 | } | ||
| 17 | |||
| 18 | void nnMatrixDel(nnMatrix* matrix) { | ||
| 19 | assert(matrix != 0); | ||
| 20 | |||
| 21 | if (matrix->values != 0) { | ||
| 22 | free(matrix->values); | ||
| 23 | matrix->values = 0; | ||
| 24 | matrix->rows = 0; | ||
| 25 | matrix->cols = 0; | ||
| 26 | } | ||
| 27 | } | ||
| 28 | |||
| 29 | void nnMatrixMove(nnMatrix* in, nnMatrix* out) { | ||
| 30 | assert(in); | ||
| 31 | assert(out); | ||
| 32 | |||
| 33 | out->rows = in->rows; | ||
| 34 | out->cols = in->cols; | ||
| 35 | out->values = in->values; | ||
| 36 | |||
| 37 | in->rows = 0; | ||
| 38 | in->cols = 0; | ||
| 39 | in->values = 0; | ||
| 40 | } | ||
| 41 | |||
| 42 | void nnMatrixCopy(const nnMatrix* in, nnMatrix* out) { | ||
| 43 | assert(in); | ||
| 44 | assert(out); | ||
| 45 | assert(in->rows == out->rows); | ||
| 46 | assert(in->cols == out->cols); | ||
| 47 | |||
| 48 | const R* in_value = in->values; | ||
| 49 | R* out_value = out->values; | ||
| 50 | |||
| 51 | for (int i = 0; i < in->rows * in->cols; ++i) { | ||
| 52 | *out_value++ = *in_value++; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | void nnMatrixToArray(const nnMatrix* in, R* out) { | ||
| 57 | assert(in); | ||
| 58 | assert(out); | ||
| 59 | |||
| 60 | const R* values = in->values; | ||
| 61 | for (int i = 0; i < in->rows * in->cols; ++i) { | ||
| 62 | *out++ = *values++; | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | void nnMatrixRowToArray(const nnMatrix* in, int row, R* out) { | ||
| 67 | assert(in); | ||
| 68 | assert(out); | ||
| 69 | |||
| 70 | const R* values = in->values + row * in->cols; | ||
| 71 | for (int i = 0; i < in->cols; ++i) { | ||
| 72 | *out++ = *values++; | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 76 | void nnMatrixCopyCol(const nnMatrix* in, nnMatrix* out, int col_in, int col_out) { | ||
| 77 | assert(in); | ||
| 78 | assert(out); | ||
| 79 | assert(in->rows == out->rows); | ||
| 80 | assert(col_in < in->cols); | ||
| 81 | assert(col_out < out->cols); | ||
| 82 | |||
| 83 | for (int row = 0; row < in->rows; ++row) { | ||
| 84 | nnMatrixSet(out, row, col_out, nnMatrixAt(in, row, col_in)); | ||
| 85 | } | ||
| 86 | } | ||
| 87 | |||
| 88 | nnMatrix nnMatrixBorrow(nnMatrix* in) { | ||
| 89 | assert(in); | ||
| 90 | |||
| 91 | nnMatrix out; | ||
| 92 | out.rows = in->rows; | ||
| 93 | out.cols = in->cols; | ||
| 94 | out.values = in->values; | ||
| 95 | return out; | ||
| 96 | } | ||
| 97 | |||
| 98 | nnMatrix nnMatrixBorrowRows(nnMatrix* in, int row_start, int num_rows) { | ||
| 99 | assert(in); | ||
| 100 | assert(row_start < in->rows); | ||
| 101 | assert(row_start + num_rows <= in->rows); | ||
| 102 | |||
| 103 | nnMatrix out; | ||
| 104 | out.rows = num_rows; | ||
| 105 | out.cols = in->cols; | ||
| 106 | out.values = nnMatrixRow_mut(in, row_start); | ||
| 107 | return out; | ||
| 108 | } | ||
| 109 | |||
| 110 | void nnMatrixInit(nnMatrix* matrix, const R* values) { | ||
| 111 | assert(matrix); | ||
| 112 | assert(values); | ||
| 113 | memcpy(matrix->values, values, matrix->rows * matrix->cols * sizeof(R)); | ||
| 114 | } | ||
| 115 | |||
| 116 | void nnMatrixInitConstant(nnMatrix* matrix, R value) { | ||
| 117 | assert(matrix); | ||
| 118 | for (int i = 0; i < matrix->rows * matrix->cols; ++i) { | ||
| 119 | matrix->values[i] = value; | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | void nnMatrixMul(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { | ||
| 124 | assert(left != 0); | ||
| 125 | assert(right != 0); | ||
| 126 | assert(out != 0); | ||
| 127 | assert(out != left); | ||
| 128 | assert(out != right); | ||
| 129 | assert(left->cols == right->rows); | ||
| 130 | assert(out->rows == left->rows); | ||
| 131 | assert(out->cols == right->cols); | ||
| 132 | |||
| 133 | R* out_value = out->values; | ||
| 134 | |||
| 135 | for (int i = 0; i < left->rows; ++i) { | ||
| 136 | const R* left_row = &left->values[i * left->cols]; | ||
| 137 | |||
| 138 | for (int j = 0; j < right->cols; ++j) { | ||
| 139 | const R* right_col = &right->values[j]; | ||
| 140 | *out_value = 0; | ||
| 141 | |||
| 142 | // Vector dot product. | ||
| 143 | for (int k = 0; k < left->cols; ++k) { | ||
| 144 | *out_value += left_row[k] * right_col[0]; | ||
| 145 | right_col += right->cols; // Next row in the column. | ||
| 146 | } | ||
| 147 | |||
| 148 | out_value++; | ||
| 149 | } | ||
| 150 | } | ||
| 151 | } | ||
| 152 | |||
| 153 | void nnMatrixMulAdd(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) { | ||
| 154 | assert(left); | ||
| 155 | assert(right); | ||
| 156 | assert(out); | ||
| 157 | assert(left->rows == right->rows); | ||
| 158 | assert(left->cols == right->cols); | ||
| 159 | assert(left->rows == out->rows); | ||
| 160 | assert(left->cols == out->cols); | ||
| 161 | |||
| 162 | const R* left_value = left->values; | ||
| 163 | const R* right_value = right->values; | ||
| 164 | R* out_value = out->values; | ||
| 165 | |||
| 166 | for (int i = 0; i < left->rows * left->cols; ++i) { | ||
| 167 | *out_value++ = *left_value++ + *right_value++ * scale; | ||
| 168 | } | ||
| 169 | } | ||
| 170 | |||
| 171 | void nnMatrixMulSub(const nnMatrix* left, const nnMatrix* right, R scale, nnMatrix* out) { | ||
| 172 | assert(left); | ||
| 173 | assert(right); | ||
| 174 | assert(out); | ||
| 175 | assert(left->rows == right->rows); | ||
| 176 | assert(left->cols == right->cols); | ||
| 177 | assert(left->rows == out->rows); | ||
| 178 | assert(left->cols == out->cols); | ||
| 179 | |||
| 180 | const R* left_value = left->values; | ||
| 181 | const R* right_value = right->values; | ||
| 182 | R* out_value = out->values; | ||
| 183 | |||
| 184 | for (int i = 0; i < left->rows * left->cols; ++i) { | ||
| 185 | *out_value++ = *left_value++ - *right_value++ * scale; | ||
| 186 | } | ||
| 187 | } | ||
| 188 | |||
| 189 | void nnMatrixMulPairs(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { | ||
| 190 | assert(left != 0); | ||
| 191 | assert(right != 0); | ||
| 192 | assert(out != 0); | ||
| 193 | assert(left->rows == right->rows); | ||
| 194 | assert(left->cols == right->cols); | ||
| 195 | assert(left->rows == out->rows); | ||
| 196 | assert(left->cols == out->cols); | ||
| 197 | |||
| 198 | R* left_value = left->values; | ||
| 199 | R* right_value = right->values; | ||
| 200 | R* out_value = out->values; | ||
| 201 | |||
| 202 | for (int i = 0; i < left->rows * left->cols; ++i) { | ||
| 203 | *out_value++ = *left_value++ * *right_value++; | ||
| 204 | } | ||
| 205 | } | ||
| 206 | |||
| 207 | void nnMatrixAdd(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { | ||
| 208 | assert(left); | ||
| 209 | assert(right); | ||
| 210 | assert(out); | ||
| 211 | assert(left->rows == right->rows); | ||
| 212 | assert(left->cols == right->cols); | ||
| 213 | assert(left->rows == out->rows); | ||
| 214 | assert(left->cols == out->cols); | ||
| 215 | |||
| 216 | const R* left_value = left->values; | ||
| 217 | const R* right_value = right->values; | ||
| 218 | R* out_value = out->values; | ||
| 219 | |||
| 220 | for (int i = 0; i < left->rows * left->cols; ++i) { | ||
| 221 | *out_value++ = *left_value++ + *right_value++; | ||
| 222 | } | ||
| 223 | } | ||
| 224 | |||
| 225 | void nnMatrixSub(const nnMatrix* left, const nnMatrix* right, nnMatrix* out) { | ||
| 226 | assert(left); | ||
| 227 | assert(right); | ||
| 228 | assert(out); | ||
| 229 | assert(left->rows == right->rows); | ||
| 230 | assert(left->cols == right->cols); | ||
| 231 | assert(left->rows == out->rows); | ||
| 232 | assert(left->cols == out->cols); | ||
| 233 | |||
| 234 | const R* left_value = left->values; | ||
| 235 | const R* right_value = right->values; | ||
| 236 | R* out_value = out->values; | ||
| 237 | |||
| 238 | for (int i = 0; i < left->rows * left->cols; ++i) { | ||
| 239 | *out_value++ = *left_value++ - *right_value++; | ||
| 240 | } | ||
| 241 | } | ||
| 242 | |||
| 243 | void nnMatrixAddRow(const nnMatrix* matrix, const nnMatrix* row, nnMatrix* out) { | ||
| 244 | assert(matrix); | ||
| 245 | assert(row); | ||
| 246 | assert(out); | ||
| 247 | assert(row->rows == 1); | ||
| 248 | assert(matrix->cols == row->cols); | ||
| 249 | assert(matrix->rows == out->rows); | ||
| 250 | assert(matrix->cols == out->cols); | ||
| 251 | |||
| 252 | const R* matrix_value = matrix->values; | ||
| 253 | R* out_value = out->values; | ||
| 254 | |||
| 255 | for (int i = 0; i < matrix->rows; ++i) { | ||
| 256 | const R* row_value = row->values; | ||
| 257 | for (int j = 0; j < row->cols; ++j) { | ||
| 258 | *out_value++ = *matrix_value++ + *row_value++; | ||
| 259 | } | ||
| 260 | } | ||
| 261 | } | ||
| 262 | |||
| 263 | void nnMatrixScale(nnMatrix* matrix, R scale) { | ||
| 264 | assert(matrix); | ||
| 265 | |||
| 266 | R* value = matrix->values; | ||
| 267 | for (int i = 0; i < matrix->rows * matrix->cols; ++i) { | ||
| 268 | *value++ *= scale; | ||
| 269 | } | ||
| 270 | } | ||
| 271 | |||
| 272 | void nnMatrixTranspose(const nnMatrix* in, nnMatrix* out) { | ||
| 273 | assert(in); | ||
| 274 | assert(out); | ||
| 275 | assert(in != out); | ||
| 276 | assert(in->rows == out->cols); | ||
| 277 | assert(in->cols == out->rows); | ||
| 278 | |||
| 279 | for (int i = 0; i < in->rows; ++i) { | ||
| 280 | for (int j = 0; j < in->cols; ++j) { | ||
| 281 | nnMatrixSet(out, j, i, nnMatrixAt(in, i, j)); | ||
| 282 | } | ||
| 283 | } | ||
| 284 | } | ||
| 285 | |||
| 286 | void nnMatrixGt(const nnMatrix* in, R threshold, nnMatrix* out) { | ||
| 287 | assert(in); | ||
| 288 | assert(out); | ||
| 289 | assert(in->rows == out->rows); | ||
| 290 | assert(in->cols == out->cols); | ||
| 291 | |||
| 292 | const R* in_value = in->values; | ||
| 293 | R* out_value = out->values; | ||
| 294 | |||
| 295 | for (int i = 0; i < in->rows * in->cols; ++i) { | ||
| 296 | *out_value++ = (*in_value++) > threshold ? 1 : 0; | ||
| 297 | } | ||
| 298 | } | ||
diff --git a/src/lib/src/neuralnet.c b/src/lib/src/neuralnet.c new file mode 100644 index 0000000..cac611a --- /dev/null +++ b/src/lib/src/neuralnet.c | |||
| @@ -0,0 +1,228 @@ | |||
| 1 | #include <neuralnet/neuralnet.h> | ||
| 2 | |||
| 3 | #include <neuralnet/matrix.h> | ||
| 4 | #include "activation.h" | ||
| 5 | #include "neuralnet_impl.h" | ||
| 6 | |||
| 7 | #include <assert.h> | ||
| 8 | #include <stdlib.h> | ||
| 9 | |||
| 10 | nnNeuralNetwork* nnMakeNet(int num_layers, const int* layer_sizes, const nnActivation* activations) { | ||
| 11 | assert(num_layers > 0); | ||
| 12 | assert(layer_sizes); | ||
| 13 | assert(activations); | ||
| 14 | |||
| 15 | nnNeuralNetwork* net = calloc(1, sizeof(nnNeuralNetwork)); | ||
| 16 | if (net == 0) { | ||
| 17 | return 0; | ||
| 18 | } | ||
| 19 | |||
| 20 | net->num_layers = num_layers; | ||
| 21 | |||
| 22 | net->weights = calloc(num_layers, sizeof(nnMatrix)); | ||
| 23 | net->biases = calloc(num_layers, sizeof(nnMatrix)); | ||
| 24 | net->activations = calloc(num_layers, sizeof(nnActivation)); | ||
| 25 | if ( (net->weights == 0) || (net->biases == 0) || (net->activations == 0) ) { | ||
| 26 | nnDeleteNet(&net); | ||
| 27 | return 0; | ||
| 28 | } | ||
| 29 | |||
| 30 | for (int l = 0; l < num_layers; ++l) { | ||
| 31 | // layer_sizes = { input layer size, first hidden layer size, ...} | ||
| 32 | const int layer_input_size = layer_sizes[l]; | ||
| 33 | const int layer_output_size = layer_sizes[l+1]; | ||
| 34 | |||
| 35 | // We store the transpose of the weight matrix as written in textbooks. | ||
| 36 | // Our vectors are row vectors and the matrices row-major. | ||
| 37 | const int rows = layer_input_size; | ||
| 38 | const int cols = layer_output_size; | ||
| 39 | |||
| 40 | net->weights[l] = nnMatrixMake(rows, cols); | ||
| 41 | net->biases[l] = nnMatrixMake(1, cols); | ||
| 42 | net->activations[l] = activations[l]; | ||
| 43 | } | ||
| 44 | |||
| 45 | return net; | ||
| 46 | } | ||
| 47 | |||
| 48 | void nnDeleteNet(nnNeuralNetwork** net) { | ||
| 49 | if ( (!net) || (!(*net)) ) { | ||
| 50 | return; | ||
| 51 | } | ||
| 52 | if ((*net)->weights != 0) { | ||
| 53 | for (int l = 0; l < (*net)->num_layers; ++l) { | ||
| 54 | nnMatrixDel(&(*net)->weights[l]); | ||
| 55 | } | ||
| 56 | free((*net)->weights); | ||
| 57 | (*net)->weights = 0; | ||
| 58 | } | ||
| 59 | if ((*net)->biases != 0) { | ||
| 60 | for (int l = 0; l < (*net)->num_layers; ++l) { | ||
| 61 | nnMatrixDel(&(*net)->biases[l]); | ||
| 62 | } | ||
| 63 | free((*net)->biases); | ||
| 64 | (*net)->biases = 0; | ||
| 65 | } | ||
| 66 | if ((*net)->activations) { | ||
| 67 | free((*net)->activations); | ||
| 68 | (*net)->activations = 0; | ||
| 69 | } | ||
| 70 | free(*net); | ||
| 71 | *net = 0; | ||
| 72 | } | ||
| 73 | |||
| 74 | void nnSetWeights(nnNeuralNetwork* net, const R* weights) { | ||
| 75 | assert(net); | ||
| 76 | assert(weights); | ||
| 77 | |||
| 78 | for (int l = 0; l < net->num_layers; ++l) { | ||
| 79 | nnMatrix* layer_weights = &net->weights[l]; | ||
| 80 | R* layer_values = layer_weights->values; | ||
| 81 | |||
| 82 | for (int j = 0; j < layer_weights->rows * layer_weights->cols; ++j) { | ||
| 83 | *layer_values++ = *weights++; | ||
| 84 | } | ||
| 85 | } | ||
| 86 | } | ||
| 87 | |||
| 88 | void nnSetBiases(nnNeuralNetwork* net, const R* biases) { | ||
| 89 | assert(net); | ||
| 90 | assert(biases); | ||
| 91 | |||
| 92 | for (int l = 0; l < net->num_layers; ++l) { | ||
| 93 | nnMatrix* layer_biases = &net->biases[l]; | ||
| 94 | R* layer_values = layer_biases->values; | ||
| 95 | |||
| 96 | for (int j = 0; j < layer_biases->rows * layer_biases->cols; ++j) { | ||
| 97 | *layer_values++ = *biases++; | ||
| 98 | } | ||
| 99 | } | ||
| 100 | } | ||
| 101 | |||
| 102 | void nnQuery(const nnNeuralNetwork* net, nnQueryObject* query, const nnMatrix* input) { | ||
| 103 | assert(net); | ||
| 104 | assert(query); | ||
| 105 | assert(input); | ||
| 106 | assert(net->num_layers == query->num_layers); | ||
| 107 | assert(input->rows <= query->network_outputs->rows); | ||
| 108 | assert(input->cols == nnNetInputSize(net)); | ||
| 109 | |||
| 110 | for (int i = 0; i < input->rows; ++i) { | ||
| 111 | // Not mutating the input, but we need the cast to borrow. | ||
| 112 | nnMatrix input_vector = nnMatrixBorrowRows((nnMatrix*)input, i, 1); | ||
| 113 | |||
| 114 | for (int l = 0; l < net->num_layers; ++l) { | ||
| 115 | const nnMatrix* layer_weights = &net->weights[l]; | ||
| 116 | const nnMatrix* layer_biases = &net->biases[l]; | ||
| 117 | // Y^T = (W*X)^T = X^T*W^T | ||
| 118 | // | ||
| 119 | // TODO: If we had a row-row matrix multiplication, we could compute: | ||
| 120 | // Y^T = W ** X^T | ||
| 121 | // The row-row multiplication could be more cache-friendly. We just need | ||
| 122 | // to store W as is, without transposing. | ||
| 123 | // We could also rewrite the original Mul function to go row x row, | ||
| 124 | // decomposing the multiplication. Preserving the original meaning of Mul | ||
| 125 | // makes everything clearer. | ||
| 126 | nnMatrix output_vector = nnMatrixBorrowRows(&query->layer_outputs[l], i, 1); | ||
| 127 | nnMatrixMul(&input_vector, layer_weights, &output_vector); | ||
| 128 | nnMatrixAddRow(&output_vector, layer_biases, &output_vector); | ||
| 129 | |||
| 130 | switch (net->activations[l]) { | ||
| 131 | case nnIdentity: | ||
| 132 | break; // Nothing to do for the identity function. | ||
| 133 | case nnSigmoid: | ||
| 134 | sigmoid_array(output_vector.values, output_vector.values, output_vector.cols); | ||
| 135 | break; | ||
| 136 | case nnRelu: | ||
| 137 | relu_array(output_vector.values, output_vector.values, output_vector.cols); | ||
| 138 | break; | ||
| 139 | default: | ||
| 140 | assert(0); | ||
| 141 | } | ||
| 142 | |||
| 143 | input_vector = output_vector; // Borrow. | ||
| 144 | } | ||
| 145 | } | ||
| 146 | } | ||
| 147 | |||
| 148 | void nnQueryArray(const nnNeuralNetwork* net, nnQueryObject* query, const R* input, R* output) { | ||
| 149 | assert(net); | ||
| 150 | assert(query); | ||
| 151 | assert(input); | ||
| 152 | assert(output); | ||
| 153 | assert(net->num_layers > 0); | ||
| 154 | |||
| 155 | nnMatrix input_vector = nnMatrixMake(net->weights[0].cols, 1); | ||
| 156 | nnMatrixInit(&input_vector, input); | ||
| 157 | nnQuery(net, query, &input_vector); | ||
| 158 | nnMatrixRowToArray(query->network_outputs, 0, output); | ||
| 159 | } | ||
| 160 | |||
| 161 | nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int num_inputs) { | ||
| 162 | assert(net); | ||
| 163 | assert(num_inputs > 0); | ||
| 164 | assert(net->num_layers > 0); | ||
| 165 | |||
| 166 | nnQueryObject* query = calloc(1, sizeof(nnQueryObject)); | ||
| 167 | if (!query) { | ||
| 168 | return 0; | ||
| 169 | } | ||
| 170 | |||
| 171 | query->num_layers = net->num_layers; | ||
| 172 | |||
| 173 | // Allocate the intermediate layer output matrices. | ||
| 174 | query->layer_outputs = calloc(net->num_layers, sizeof(nnMatrix)); | ||
| 175 | if (!query->layer_outputs) { | ||
| 176 | free(query); | ||
| 177 | return 0; | ||
| 178 | } | ||
| 179 | for (int l = 0; l < net->num_layers; ++l) { | ||
| 180 | const nnMatrix* layer_weights = &net->weights[l]; | ||
| 181 | const int layer_output_size = nnLayerOutputSize(layer_weights); | ||
| 182 | query->layer_outputs[l] = nnMatrixMake(num_inputs, layer_output_size); | ||
| 183 | } | ||
| 184 | query->network_outputs = &query->layer_outputs[net->num_layers - 1]; | ||
| 185 | |||
| 186 | return query; | ||
| 187 | } | ||
| 188 | |||
| 189 | void nnDeleteQueryObject(nnQueryObject** query) { | ||
| 190 | if ( (!query) || (!(*query)) ) { | ||
| 191 | return; | ||
| 192 | } | ||
| 193 | if ((*query)->layer_outputs != 0) { | ||
| 194 | for (int l = 0; l < (*query)->num_layers; ++l) { | ||
| 195 | nnMatrixDel(&(*query)->layer_outputs[l]); | ||
| 196 | } | ||
| 197 | } | ||
| 198 | free((*query)->layer_outputs); | ||
| 199 | free(*query); | ||
| 200 | *query = 0; | ||
| 201 | } | ||
| 202 | |||
| 203 | const nnMatrix* nnNetOutputs(const nnQueryObject* query) { | ||
| 204 | assert(query); | ||
| 205 | return query->network_outputs; | ||
| 206 | } | ||
| 207 | |||
| 208 | int nnNetInputSize(const nnNeuralNetwork* net) { | ||
| 209 | assert(net); | ||
| 210 | assert(net->num_layers > 0); | ||
| 211 | return net->weights[0].rows; | ||
| 212 | } | ||
| 213 | |||
| 214 | int nnNetOutputSize(const nnNeuralNetwork* net) { | ||
| 215 | assert(net); | ||
| 216 | assert(net->num_layers > 0); | ||
| 217 | return net->weights[net->num_layers - 1].cols; | ||
| 218 | } | ||
| 219 | |||
| 220 | int nnLayerInputSize(const nnMatrix* weights) { | ||
| 221 | assert(weights); | ||
| 222 | return weights->rows; | ||
| 223 | } | ||
| 224 | |||
| 225 | int nnLayerOutputSize(const nnMatrix* weights) { | ||
| 226 | assert(weights); | ||
| 227 | return weights->cols; | ||
| 228 | } | ||
diff --git a/src/lib/src/neuralnet_impl.h b/src/lib/src/neuralnet_impl.h new file mode 100644 index 0000000..26107b5 --- /dev/null +++ b/src/lib/src/neuralnet_impl.h | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <neuralnet/matrix.h> | ||
| 4 | |||
| 5 | /// Neural network object. | ||
| 6 | /// | ||
| 7 | /// We store the transposes of the weight matrices so that we can do forward | ||
| 8 | /// passes with a minimal amount of work. That is, if in paper we write: | ||
| 9 | /// | ||
| 10 | /// [w11 w21] | ||
| 11 | /// [w12 w22] | ||
| 12 | /// | ||
| 13 | /// then the weight matrix in memory is stored as the following array: | ||
| 14 | /// | ||
| 15 | /// w11 w12 w21 w22 | ||
| 16 | typedef struct nnNeuralNetwork { | ||
| 17 | int num_layers; // Number of non-input layers (hidden + output). | ||
| 18 | nnMatrix* weights; // One matrix per non-input layer. | ||
| 19 | nnMatrix* biases; // One vector per non-input layer. | ||
| 20 | nnActivation* activations; // One per non-input layer. | ||
| 21 | } nnNeuralNetwork; | ||
| 22 | |||
| 23 | /// A query object that holds all the memory necessary to query a network. | ||
| 24 | /// | ||
| 25 | /// |layer_outputs| is an array of matrices of intermediate layer outputs. There | ||
| 26 | /// is one matrix per intermediate layer. Each matrix holds the layer's output, | ||
| 27 | /// with one row per input, and as many columns as the layer's output size (the | ||
| 28 | /// output vector is transposed.) | ||
| 29 | /// | ||
| 30 | /// |network_outputs| points to the last output matrix in |layer_outputs| for | ||
| 31 | /// convenience. | ||
| 32 | typedef struct nnQueryObject { | ||
| 33 | int num_layers; | ||
| 34 | nnMatrix* layer_outputs; // Output matrices, one output per layer. | ||
| 35 | nnMatrix* network_outputs; // Points to the last output matrix. | ||
| 36 | } nnTrainingQueryObject; | ||
diff --git a/src/lib/src/train.c b/src/lib/src/train.c new file mode 100644 index 0000000..027de66 --- /dev/null +++ b/src/lib/src/train.c | |||
| @@ -0,0 +1,346 @@ | |||
| 1 | #include <neuralnet/train.h> | ||
| 2 | |||
| 3 | #include <neuralnet/matrix.h> | ||
| 4 | #include "neuralnet_impl.h" | ||
| 5 | |||
| 6 | #include <random/mt19937-64.h> | ||
| 7 | #include <random/normal.h> | ||
| 8 | |||
| 9 | #include <assert.h> | ||
| 10 | #include <math.h> | ||
| 11 | #include <stdlib.h> | ||
| 12 | |||
| 13 | #include <stdio.h> | ||
| 14 | #define LOGD printf | ||
| 15 | |||
| 16 | // If debug mode is requested, we will show progress every this many iterations. | ||
| 17 | static const int PROGRESS_THRESHOLD = 5; // % | ||
| 18 | |||
| 19 | /// Computes the total MSE from the output error matrix. | ||
| 20 | R ComputeMSE(const nnMatrix* errors) { | ||
| 21 | R sum_sq = 0; | ||
| 22 | const int N = errors->rows * errors->cols; | ||
| 23 | const R* value = errors->values; | ||
| 24 | for (int i = 0; i < N; ++i) { | ||
| 25 | sum_sq += *value * *value; | ||
| 26 | value++; | ||
| 27 | } | ||
| 28 | return sum_sq / (R)N; | ||
| 29 | } | ||
| 30 | |||
| 31 | /// Holds the bits required to compute a sigmoid gradient. | ||
| 32 | typedef struct nnSigmoidGradientElements { | ||
| 33 | nnMatrix ones; // A vector of just ones, same size as the layer. | ||
| 34 | } nnSigmoidGradientElements; | ||
| 35 | |||
| 36 | /// Holds the various elements required to compute gradients. These depend on | ||
| 37 | /// what activation function are used, so they'll potentially be different for | ||
| 38 | /// each layer. A data type is defined for these because we allocate all the | ||
| 39 | /// required memory up front before entering the training loop. | ||
| 40 | typedef struct nnGradientElements { | ||
| 41 | nnActivation type; | ||
| 42 | // Gradient vector, same size as the layer. | ||
| 43 | // This will contain the gradient expression except for the output value of | ||
| 44 | // the previous layer. | ||
| 45 | nnMatrix gradient; | ||
| 46 | union { | ||
| 47 | nnSigmoidGradientElements sigmoid; | ||
| 48 | }; | ||
| 49 | } nnGradientElements; | ||
| 50 | |||
| 51 | // Initialize the network's weights randomly and set their biases to 0. | ||
| 52 | void nnInitNet(nnNeuralNetwork* net, uint64_t seed, const nnWeightInitStrategy strategy) { | ||
| 53 | assert(net); | ||
| 54 | |||
| 55 | mt19937_64 rng = mt19937_64_make(); | ||
| 56 | mt19937_64_init(&rng, seed); | ||
| 57 | |||
| 58 | for (int l = 0; l < net->num_layers; ++l) { | ||
| 59 | nnMatrix* weights = &net->weights[l]; | ||
| 60 | nnMatrix* biases = &net->biases[l]; | ||
| 61 | |||
| 62 | const R layer_size = (R)nnLayerInputSize(weights); | ||
| 63 | const R scale = 1. / layer_size; | ||
| 64 | const R stdev = 1. / sqrt((R)layer_size); | ||
| 65 | const R sigma = stdev * stdev; | ||
| 66 | |||
| 67 | R* value = weights->values; | ||
| 68 | for (int k = 0; k < weights->rows * weights->cols; ++k) { | ||
| 69 | switch (strategy) { | ||
| 70 | case nnWeightInit01: { | ||
| 71 | const R x01 = mt19937_64_gen_real3(&rng); // (0, +1) interval. | ||
| 72 | *value++ = scale * x01; | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | case nnWeightInit11: { | ||
| 76 | const R x11 = mt19937_64_gen_real4(&rng); // (-1, +1) interval. | ||
| 77 | *value++ = scale * x11; | ||
| 78 | break; | ||
| 79 | } | ||
| 80 | case nnWeightInitNormal: | ||
| 81 | // Using initialization with a normal distribution of standard | ||
| 82 | // deviation 1 / sqrt(num_layer_weights) to prevent saturation when | ||
| 83 | // multiplying inputs. | ||
| 84 | const R u01 = mt19937_64_gen_real3(&rng); // (0, +1) interval. | ||
| 85 | const R v01 = mt19937_64_gen_real3(&rng); // (0, +1) interval. | ||
| 86 | R z0, z1; | ||
| 87 | normal2(u01, v01, &z0, &z1); | ||
| 88 | z0 = normal_transform(z0, /*mu=*/0, sigma); | ||
| 89 | z1 = normal_transform(z1, /*mu=*/0, sigma); | ||
| 90 | *value++ = z0; | ||
| 91 | if (k < weights->rows * weights->cols - 1) { | ||
| 92 | *value++ = z1; | ||
| 93 | ++k; | ||
| 94 | } | ||
| 95 | break; | ||
| 96 | default: | ||
| 97 | assert(false); | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | // Initialize biases. | ||
| 102 | // 0 is used so that functions originally go through the origin. | ||
| 103 | value = biases->values; | ||
| 104 | for (int k = 0; k < biases->rows * biases->cols; ++k, ++value) { | ||
| 105 | *value = 0; | ||
| 106 | } | ||
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | // |inputs| has one row vector per sample. | ||
| 111 | // |targets| has one row vector per sample. | ||
| 112 | // | ||
| 113 | // For now, each iteration trains with one sample (row) at a time. | ||
| 114 | void nnTrain( | ||
| 115 | nnNeuralNetwork* net, | ||
| 116 | const nnMatrix* inputs, | ||
| 117 | const nnMatrix* targets, | ||
| 118 | const nnTrainingParams* params) { | ||
| 119 | assert(net); | ||
| 120 | assert(inputs); | ||
| 121 | assert(targets); | ||
| 122 | assert(params); | ||
| 123 | assert(nnNetOutputSize(net) == targets->cols); | ||
| 124 | assert(net->num_layers > 0); | ||
| 125 | |||
| 126 | // Allocate error vectors to hold the backpropagated error values. | ||
| 127 | // For now, these are one row vector per layer, meaning that we will train | ||
| 128 | // with one sample at a time. | ||
| 129 | nnMatrix* errors = calloc(net->num_layers, sizeof(nnMatrix)); | ||
| 130 | |||
| 131 | // Allocate the weight transpose matrices up front for backpropagation. | ||
| 132 | nnMatrix* weights_T = calloc(net->num_layers, sizeof(nnMatrix)); | ||
| 133 | |||
| 134 | // Allocate the weight delta matrices. | ||
| 135 | nnMatrix* weight_deltas = calloc(net->num_layers, sizeof(nnMatrix)); | ||
| 136 | |||
| 137 | // Allocate the data structures required to compute gradients. | ||
| 138 | // This depends on each layer's activation type. | ||
| 139 | nnGradientElements* gradient_elems = calloc(net->num_layers, sizeof(nnGradientElements)); | ||
| 140 | |||
| 141 | // Allocate the output transpose vectors for weight delta calculation. | ||
| 142 | // This is one column vector per layer. | ||
| 143 | nnMatrix* outputs_T = calloc(net->num_layers, sizeof(nnMatrix)); | ||
| 144 | |||
| 145 | assert(errors != 0); | ||
| 146 | assert(weights_T != 0); | ||
| 147 | assert(weight_deltas != 0); | ||
| 148 | assert(gradient_elems); | ||
| 149 | assert(outputs_T); | ||
| 150 | |||
| 151 | for (int l = 0; l < net->num_layers; ++l) { | ||
| 152 | const nnMatrix* layer_weights = &net->weights[l]; | ||
| 153 | const int layer_output_size = net->weights[l].cols; | ||
| 154 | const nnActivation activation = net->activations[l]; | ||
| 155 | |||
| 156 | errors[l] = nnMatrixMake(1, layer_weights->cols); | ||
| 157 | |||
| 158 | weights_T[l] = nnMatrixMake(layer_weights->cols, layer_weights->rows); | ||
| 159 | nnMatrixTranspose(layer_weights, &weights_T[l]); | ||
| 160 | |||
| 161 | weight_deltas[l] = nnMatrixMake(layer_weights->rows, layer_weights->cols); | ||
| 162 | |||
| 163 | outputs_T[l] = nnMatrixMake(layer_output_size, 1); | ||
| 164 | |||
| 165 | // Allocate the gradient elements and vectors for weight delta calculation. | ||
| 166 | nnGradientElements* elems = &gradient_elems[l]; | ||
| 167 | elems->type = activation; | ||
| 168 | switch (activation) { | ||
| 169 | case nnIdentity: | ||
| 170 | break; // Gradient vector will be borrowed, no need to allocate. | ||
| 171 | |||
| 172 | case nnSigmoid: | ||
| 173 | elems->gradient = nnMatrixMake(1, layer_output_size); | ||
| 174 | // Allocate the 1s vectors. | ||
| 175 | elems->sigmoid.ones = nnMatrixMake(1, layer_output_size); | ||
| 176 | nnMatrixInitConstant(&elems->sigmoid.ones, 1); | ||
| 177 | break; | ||
| 178 | |||
| 179 | case nnRelu: | ||
| 180 | elems->gradient = nnMatrixMake(1, layer_output_size); | ||
| 181 | break; | ||
| 182 | } | ||
| 183 | } | ||
| 184 | |||
| 185 | // Construct the query object with a size of 1 since we are training with one | ||
| 186 | // sample at a time. | ||
| 187 | nnQueryObject* query = nnMakeQueryObject(net, 1); | ||
| 188 | |||
| 189 | // Network outputs are given by the query object. Every network query updates | ||
| 190 | // the outputs. | ||
| 191 | const nnMatrix* const training_outputs = query->network_outputs; | ||
| 192 | |||
| 193 | // A vector to store the training input transposed. | ||
| 194 | nnMatrix training_inputs_T = nnMatrixMake(inputs->cols, 1); | ||
| 195 | |||
| 196 | // If debug mode is requested, we will show progress every Nth iteration. | ||
| 197 | const int progress_frame = | ||
| 198 | (params->max_iterations < PROGRESS_THRESHOLD) | ||
| 199 | ? 1 | ||
| 200 | : (params->max_iterations * PROGRESS_THRESHOLD / 100); | ||
| 201 | |||
| 202 | // --- TRAIN | ||
| 203 | |||
| 204 | nnInitNet(net, params->seed, params->weight_init); | ||
| 205 | |||
| 206 | for (int iter = 0; iter < params->max_iterations; ++iter) { | ||
| 207 | |||
| 208 | // For now, we train with one sample at a time. | ||
| 209 | for (int sample = 0; sample < inputs->rows; ++sample) { | ||
| 210 | // Slice the input and target matrices with the batch size. | ||
| 211 | // We are not mutating the inputs, but we need the cast to borrow. | ||
| 212 | nnMatrix training_inputs = nnMatrixBorrowRows((nnMatrix*)inputs, sample, 1); | ||
| 213 | nnMatrix training_targets = nnMatrixBorrowRows((nnMatrix*)targets, sample, 1); | ||
| 214 | |||
| 215 | // Will need the input transposed for backpropagation. | ||
| 216 | // Assuming one training input per iteration for now. | ||
| 217 | nnMatrixTranspose(&training_inputs, &training_inputs_T); | ||
| 218 | |||
| 219 | // Run a forward pass and compute the output layer error. | ||
| 220 | // We don't square the error here; instead, we just compute t-o, which is | ||
| 221 | // part of the derivative, -2(t-o). Also, we compute o-t instead to | ||
| 222 | // remove that outer negative sign. | ||
| 223 | nnQuery(net, query, &training_inputs); | ||
| 224 | //nnMatrixSub(&training_targets, training_outputs, &errors[net->num_layers - 1]); | ||
| 225 | nnMatrixSub(training_outputs, &training_targets, &errors[net->num_layers - 1]); | ||
| 226 | |||
| 227 | // Update outputs_T, which we need during weight updates. | ||
| 228 | for (int l = 0; l < net->num_layers; ++l) { | ||
| 229 | nnMatrixTranspose(&query->layer_outputs[l], &outputs_T[l]); | ||
| 230 | } | ||
| 231 | |||
| 232 | // Update weights and biases for each internal layer, backpropagating | ||
| 233 | // errors along the way. | ||
| 234 | for (int l = net->num_layers - 1; l >= 0; --l) { | ||
| 235 | const nnMatrix* layer_output = &query->layer_outputs[l]; | ||
| 236 | nnMatrix* layer_weights = &net->weights[l]; | ||
| 237 | nnMatrix* layer_biases = &net->biases[l]; | ||
| 238 | nnGradientElements* elems = &gradient_elems[l]; | ||
| 239 | nnMatrix* gradient = &elems->gradient; | ||
| 240 | const nnActivation activation = net->activations[l]; | ||
| 241 | |||
| 242 | // Compute the gradient (the part of the expression that does not | ||
| 243 | // contain the output of the previous layer). | ||
| 244 | // | ||
| 245 | // Identity: G = error_k | ||
| 246 | // Sigmoid: G = error_k * output_k * (1 - output_k). | ||
| 247 | // Relu: G = error_k * (output_k > 0 ? 1 : 0) | ||
| 248 | switch (activation) { | ||
| 249 | case nnIdentity: | ||
| 250 | // TODO: Just copy the pointer? | ||
| 251 | *gradient = nnMatrixBorrow(&errors[l]); | ||
| 252 | break; | ||
| 253 | case nnSigmoid: | ||
| 254 | nnMatrixSub(&elems->sigmoid.ones, layer_output, gradient); | ||
| 255 | nnMatrixMulPairs(layer_output, gradient, gradient); | ||
| 256 | nnMatrixMulPairs(&errors[l], gradient, gradient); | ||
| 257 | break; | ||
| 258 | case nnRelu: | ||
| 259 | nnMatrixGt(layer_output, 0, gradient); | ||
| 260 | nnMatrixMulPairs(&errors[l], gradient, gradient); | ||
| 261 | break; | ||
| 262 | } | ||
| 263 | |||
| 264 | // Outer product to compute the weight deltas. | ||
| 265 | const nnMatrix* output_T = (l == 0) ? &training_inputs_T : &outputs_T[l-1]; | ||
| 266 | nnMatrixMul(output_T, gradient, &weight_deltas[l]); | ||
| 267 | |||
| 268 | // Backpropagate the error before updating weights. | ||
| 269 | if (l > 0) { | ||
| 270 | nnMatrixMul(gradient, &weights_T[l], &errors[l-1]); | ||
| 271 | } | ||
| 272 | |||
| 273 | // Update weights. | ||
| 274 | nnMatrixScale(&weight_deltas[l], params->learning_rate); | ||
| 275 | // The gradient has a negative sign from -(t - o), but we have computed | ||
| 276 | // e = o - t instead, so we can subtract directly. | ||
| 277 | //nnMatrixAdd(layer_weights, &weight_deltas[l], layer_weights); | ||
| 278 | nnMatrixSub(layer_weights, &weight_deltas[l], layer_weights); | ||
| 279 | |||
| 280 | // Update weight transpose matrix for the next training iteration. | ||
| 281 | nnMatrixTranspose(layer_weights, &weights_T[l]); | ||
| 282 | |||
| 283 | // Update biases. | ||
| 284 | // This is the same formula as for weights, except that the o_j term is | ||
| 285 | // just 1. We can simply re-use the gradient that we have already | ||
| 286 | // computed for the weight update. | ||
| 287 | //nnMatrixMulAdd(layer_biases, gradient, params->learning_rate, layer_biases); | ||
| 288 | nnMatrixMulSub(layer_biases, gradient, params->learning_rate, layer_biases); | ||
| 289 | } | ||
| 290 | |||
| 291 | // TODO: Add this under a verbose debugging mode. | ||
| 292 | // if (params->debug) { | ||
| 293 | // LOGD("Iter: %d, Sample: %d, Error: %f\n", iter, sample, ComputeMSE(&errors[net->num_layers - 1])); | ||
| 294 | // LOGD("TGT: "); | ||
| 295 | // for (int i = 0; i < training_targets.cols; ++i) { | ||
| 296 | // printf("%.3f ", training_targets.values[i]); | ||
| 297 | // } | ||
| 298 | // printf("\n"); | ||
| 299 | // LOGD("OUT: "); | ||
| 300 | // for (int i = 0; i < training_outputs->cols; ++i) { | ||
| 301 | // printf("%.3f ", training_outputs->values[i]); | ||
| 302 | // } | ||
| 303 | // printf("\n"); | ||
| 304 | // } | ||
| 305 | } | ||
| 306 | |||
| 307 | if (params->debug && ((iter % progress_frame) == 0)) { | ||
| 308 | LOGD("Iter: %d/%d, Error: %f\n", | ||
| 309 | iter, params->max_iterations, ComputeMSE(&errors[net->num_layers - 1])); | ||
| 310 | } | ||
| 311 | } | ||
| 312 | |||
| 313 | // Print the final error. | ||
| 314 | if (params->debug) { | ||
| 315 | LOGD("Iter: %d/%d, Error: %f\n", | ||
| 316 | params->max_iterations, params->max_iterations, ComputeMSE(&errors[net->num_layers - 1])); | ||
| 317 | } | ||
| 318 | |||
| 319 | for (int l = 0; l < net->num_layers; ++l) { | ||
| 320 | nnMatrixDel(&errors[l]); | ||
| 321 | nnMatrixDel(&outputs_T[l]); | ||
| 322 | nnMatrixDel(&weights_T[l]); | ||
| 323 | nnMatrixDel(&weight_deltas[l]); | ||
| 324 | |||
| 325 | nnGradientElements* elems = &gradient_elems[l]; | ||
| 326 | switch (elems->type) { | ||
| 327 | case nnIdentity: | ||
| 328 | break; // Gradient vector is borrowed, no need to deallocate. | ||
| 329 | |||
| 330 | case nnSigmoid: | ||
| 331 | nnMatrixDel(&elems->gradient); | ||
| 332 | nnMatrixDel(&elems->sigmoid.ones); | ||
| 333 | break; | ||
| 334 | |||
| 335 | case nnRelu: | ||
| 336 | nnMatrixDel(&elems->gradient); | ||
| 337 | break; | ||
| 338 | } | ||
| 339 | } | ||
| 340 | nnMatrixDel(&training_inputs_T); | ||
| 341 | free(errors); | ||
| 342 | free(outputs_T); | ||
| 343 | free(weights_T); | ||
| 344 | free(weight_deltas); | ||
| 345 | free(gradient_elems); | ||
| 346 | } | ||
diff --git a/src/lib/test/matrix_test.c b/src/lib/test/matrix_test.c new file mode 100644 index 0000000..8191c97 --- /dev/null +++ b/src/lib/test/matrix_test.c | |||
| @@ -0,0 +1,350 @@ | |||
| 1 | #include <neuralnet/matrix.h> | ||
| 2 | |||
| 3 | #include "test.h" | ||
| 4 | #include "test_util.h" | ||
| 5 | |||
| 6 | #include <assert.h> | ||
| 7 | #include <stdlib.h> | ||
| 8 | |||
| 9 | // static void PrintMatrix(const nnMatrix* matrix) { | ||
| 10 | // assert(matrix); | ||
| 11 | |||
| 12 | // for (int i = 0; i < matrix->rows; ++i) { | ||
| 13 | // for (int j = 0; j < matrix->cols; ++j) { | ||
| 14 | // printf("%f ", nnMatrixAt(matrix, i, j)); | ||
| 15 | // } | ||
| 16 | // printf("\n"); | ||
| 17 | // } | ||
| 18 | // } | ||
| 19 | |||
| 20 | TEST_CASE(nnMatrixMake_1x1) { | ||
| 21 | nnMatrix A = nnMatrixMake(1, 1); | ||
| 22 | TEST_EQUAL(A.rows, 1); | ||
| 23 | TEST_EQUAL(A.cols, 1); | ||
| 24 | } | ||
| 25 | |||
| 26 | TEST_CASE(nnMatrixMake_3x1) { | ||
| 27 | nnMatrix A = nnMatrixMake(3, 1); | ||
| 28 | TEST_EQUAL(A.rows, 3); | ||
| 29 | TEST_EQUAL(A.cols, 1); | ||
| 30 | } | ||
| 31 | |||
| 32 | TEST_CASE(nnMatrixInit_3x1) { | ||
| 33 | nnMatrix A = nnMatrixMake(3, 1); | ||
| 34 | nnMatrixInit(&A, (R[]) { 1, 2, 3 }); | ||
| 35 | TEST_EQUAL(A.values[0], 1); | ||
| 36 | TEST_EQUAL(A.values[1], 2); | ||
| 37 | TEST_EQUAL(A.values[2], 3); | ||
| 38 | } | ||
| 39 | |||
| 40 | TEST_CASE(nnMatrixCopyCol_test) { | ||
| 41 | nnMatrix A = nnMatrixMake(3, 2); | ||
| 42 | nnMatrix B = nnMatrixMake(3, 1); | ||
| 43 | |||
| 44 | nnMatrixInit(&A, (R[]) { | ||
| 45 | 1, 2, | ||
| 46 | 3, 4, | ||
| 47 | 5, 6, | ||
| 48 | }); | ||
| 49 | |||
| 50 | nnMatrixCopyCol(&A, &B, 1, 0); | ||
| 51 | |||
| 52 | TEST_EQUAL(nnMatrixAt(&B, 0, 0), 2); | ||
| 53 | TEST_EQUAL(nnMatrixAt(&B, 1, 0), 4); | ||
| 54 | TEST_EQUAL(nnMatrixAt(&B, 2, 0), 6); | ||
| 55 | |||
| 56 | nnMatrixDel(&A); | ||
| 57 | nnMatrixDel(&B); | ||
| 58 | } | ||
| 59 | |||
| 60 | TEST_CASE(nnMatrixMul_square_3x3) { | ||
| 61 | nnMatrix A = nnMatrixMake(3, 3); | ||
| 62 | nnMatrix B = nnMatrixMake(3, 3); | ||
| 63 | nnMatrix O = nnMatrixMake(3, 3); | ||
| 64 | |||
| 65 | nnMatrixInit(&A, (const R[]){ | ||
| 66 | 1, 2, 3, | ||
| 67 | 4, 5, 6, | ||
| 68 | 7, 8, 9, | ||
| 69 | }); | ||
| 70 | nnMatrixInit(&B, (const R[]){ | ||
| 71 | 2, 4, 3, | ||
| 72 | 6, 8, 5, | ||
| 73 | 1, 7, 9, | ||
| 74 | }); | ||
| 75 | nnMatrixMul(&A, &B, &O); | ||
| 76 | |||
| 77 | const R expected[3][3] = { | ||
| 78 | { 17, 41, 40 }, | ||
| 79 | { 44, 98, 91 }, | ||
| 80 | { 71, 155, 142 }, | ||
| 81 | }; | ||
| 82 | for (int i = 0; i < O.rows; ++i) { | ||
| 83 | for (int j = 0; j < O.cols; ++j) { | ||
| 84 | TEST_TRUE(double_eq(nnMatrixAt(&O, i, j), expected[i][j], EPS)); | ||
| 85 | } | ||
| 86 | } | ||
| 87 | |||
| 88 | nnMatrixDel(&A); | ||
| 89 | nnMatrixDel(&B); | ||
| 90 | nnMatrixDel(&O); | ||
| 91 | } | ||
| 92 | |||
| 93 | TEST_CASE(nnMatrixMul_non_square_2x3_3x1) { | ||
| 94 | nnMatrix A = nnMatrixMake(2, 3); | ||
| 95 | nnMatrix B = nnMatrixMake(3, 1); | ||
| 96 | nnMatrix O = nnMatrixMake(2, 1); | ||
| 97 | |||
| 98 | nnMatrixInit(&A, (const R[]){ | ||
| 99 | 1, 2, 3, | ||
| 100 | 4, 5, 6, | ||
| 101 | }); | ||
| 102 | nnMatrixInit(&B, (const R[]){ | ||
| 103 | 2, | ||
| 104 | 6, | ||
| 105 | 1, | ||
| 106 | }); | ||
| 107 | nnMatrixMul(&A, &B, &O); | ||
| 108 | |||
| 109 | const R expected[2][1] = { | ||
| 110 | { 17 }, | ||
| 111 | { 44 }, | ||
| 112 | }; | ||
| 113 | for (int i = 0; i < O.rows; ++i) { | ||
| 114 | for (int j = 0; j < O.cols; ++j) { | ||
| 115 | TEST_TRUE(double_eq(nnMatrixAt(&O, i, j), expected[i][j], EPS)); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | nnMatrixDel(&A); | ||
| 120 | nnMatrixDel(&B); | ||
| 121 | nnMatrixDel(&O); | ||
| 122 | } | ||
| 123 | |||
| 124 | TEST_CASE(nnMatrixMulAdd_test) { | ||
| 125 | nnMatrix A = nnMatrixMake(2, 3); | ||
| 126 | nnMatrix B = nnMatrixMake(2, 3); | ||
| 127 | nnMatrix O = nnMatrixMake(2, 3); | ||
| 128 | const R scale = 2; | ||
| 129 | |||
| 130 | nnMatrixInit(&A, (const R[]){ | ||
| 131 | 1, 2, 3, | ||
| 132 | 4, 5, 6, | ||
| 133 | }); | ||
| 134 | nnMatrixInit(&B, (const R[]){ | ||
| 135 | 2, 3, 1, | ||
| 136 | 7, 4, 3 | ||
| 137 | }); | ||
| 138 | nnMatrixMulAdd(&A, &B, scale, &O); // O = A + B * scale | ||
| 139 | |||
| 140 | const R expected[2][3] = { | ||
| 141 | { 5, 8, 5 }, | ||
| 142 | { 18, 13, 12 }, | ||
| 143 | }; | ||
| 144 | for (int i = 0; i < O.rows; ++i) { | ||
| 145 | for (int j = 0; j < O.cols; ++j) { | ||
| 146 | TEST_TRUE(double_eq(nnMatrixAt(&O, i, j), expected[i][j], EPS)); | ||
| 147 | } | ||
| 148 | } | ||
| 149 | |||
| 150 | nnMatrixDel(&A); | ||
| 151 | nnMatrixDel(&B); | ||
| 152 | nnMatrixDel(&O); | ||
| 153 | } | ||
| 154 | |||
| 155 | TEST_CASE(nnMatrixMulSub_test) { | ||
| 156 | nnMatrix A = nnMatrixMake(2, 3); | ||
| 157 | nnMatrix B = nnMatrixMake(2, 3); | ||
| 158 | nnMatrix O = nnMatrixMake(2, 3); | ||
| 159 | const R scale = 2; | ||
| 160 | |||
| 161 | nnMatrixInit(&A, (const R[]){ | ||
| 162 | 1, 2, 3, | ||
| 163 | 4, 5, 6, | ||
| 164 | }); | ||
| 165 | nnMatrixInit(&B, (const R[]){ | ||
| 166 | 2, 3, 1, | ||
| 167 | 7, 4, 3 | ||
| 168 | }); | ||
| 169 | nnMatrixMulSub(&A, &B, scale, &O); // O = A - B * scale | ||
| 170 | |||
| 171 | const R expected[2][3] = { | ||
| 172 | { -3, -4, 1 }, | ||
| 173 | { -10, -3, 0 }, | ||
| 174 | }; | ||
| 175 | for (int i = 0; i < O.rows; ++i) { | ||
| 176 | for (int j = 0; j < O.cols; ++j) { | ||
| 177 | TEST_TRUE(double_eq(nnMatrixAt(&O, i, j), expected[i][j], EPS)); | ||
| 178 | } | ||
| 179 | } | ||
| 180 | |||
| 181 | nnMatrixDel(&A); | ||
| 182 | nnMatrixDel(&B); | ||
| 183 | nnMatrixDel(&O); | ||
| 184 | } | ||
| 185 | |||
| 186 | TEST_CASE(nnMatrixMulPairs_2x3) { | ||
| 187 | nnMatrix A = nnMatrixMake(2, 3); | ||
| 188 | nnMatrix B = nnMatrixMake(2, 3); | ||
| 189 | nnMatrix O = nnMatrixMake(2, 3); | ||
| 190 | |||
| 191 | nnMatrixInit(&A, (const R[]){ | ||
| 192 | 1, 2, 3, | ||
| 193 | 4, 5, 6, | ||
| 194 | }); | ||
| 195 | nnMatrixInit(&B, (const R[]){ | ||
| 196 | 2, 3, 1, | ||
| 197 | 7, 4, 3 | ||
| 198 | }); | ||
| 199 | nnMatrixMulPairs(&A, &B, &O); | ||
| 200 | |||
| 201 | const R expected[2][3] = { | ||
| 202 | { 2, 6, 3 }, | ||
| 203 | { 28, 20, 18 }, | ||
| 204 | }; | ||
| 205 | for (int i = 0; i < O.rows; ++i) { | ||
| 206 | for (int j = 0; j < O.cols; ++j) { | ||
| 207 | TEST_TRUE(double_eq(nnMatrixAt(&O, i, j), expected[i][j], EPS)); | ||
| 208 | } | ||
| 209 | } | ||
| 210 | |||
| 211 | nnMatrixDel(&A); | ||
| 212 | nnMatrixDel(&B); | ||
| 213 | nnMatrixDel(&O); | ||
| 214 | } | ||
| 215 | |||
| 216 | TEST_CASE(nnMatrixAdd_square_2x2) { | ||
| 217 | nnMatrix A = nnMatrixMake(2, 2); | ||
| 218 | nnMatrix B = nnMatrixMake(2, 2); | ||
| 219 | nnMatrix C = nnMatrixMake(2, 2); | ||
| 220 | |||
| 221 | nnMatrixInit(&A, (R[]) { | ||
| 222 | 1, 2, | ||
| 223 | 3, 4, | ||
| 224 | }); | ||
| 225 | nnMatrixInit(&B, (R[]) { | ||
| 226 | 2, 1, | ||
| 227 | 5, 3, | ||
| 228 | }); | ||
| 229 | |||
| 230 | nnMatrixAdd(&A, &B, &C); | ||
| 231 | |||
| 232 | TEST_TRUE(double_eq(nnMatrixAt(&C, 0, 0), 3, EPS)); | ||
| 233 | TEST_TRUE(double_eq(nnMatrixAt(&C, 0, 1), 3, EPS)); | ||
| 234 | TEST_TRUE(double_eq(nnMatrixAt(&C, 1, 0), 8, EPS)); | ||
| 235 | TEST_TRUE(double_eq(nnMatrixAt(&C, 1, 1), 7, EPS)); | ||
| 236 | |||
| 237 | nnMatrixDel(&A); | ||
| 238 | nnMatrixDel(&B); | ||
| 239 | nnMatrixDel(&C); | ||
| 240 | } | ||
| 241 | |||
| 242 | TEST_CASE(nnMatrixSub_square_2x2) { | ||
| 243 | nnMatrix A = nnMatrixMake(2, 2); | ||
| 244 | nnMatrix B = nnMatrixMake(2, 2); | ||
| 245 | nnMatrix C = nnMatrixMake(2, 2); | ||
| 246 | |||
| 247 | nnMatrixInit(&A, (R[]) { | ||
| 248 | 1, 2, | ||
| 249 | 3, 4, | ||
| 250 | }); | ||
| 251 | nnMatrixInit(&B, (R[]) { | ||
| 252 | 2, 1, | ||
| 253 | 5, 3, | ||
| 254 | }); | ||
| 255 | |||
| 256 | nnMatrixSub(&A, &B, &C); | ||
| 257 | |||
| 258 | TEST_TRUE(double_eq(nnMatrixAt(&C, 0, 0), -1, EPS)); | ||
| 259 | TEST_TRUE(double_eq(nnMatrixAt(&C, 0, 1), +1, EPS)); | ||
| 260 | TEST_TRUE(double_eq(nnMatrixAt(&C, 1, 0), -2, EPS)); | ||
| 261 | TEST_TRUE(double_eq(nnMatrixAt(&C, 1, 1), +1, EPS)); | ||
| 262 | |||
| 263 | nnMatrixDel(&A); | ||
| 264 | nnMatrixDel(&B); | ||
| 265 | nnMatrixDel(&C); | ||
| 266 | } | ||
| 267 | |||
| 268 | TEST_CASE(nnMatrixAddRow_test) { | ||
| 269 | nnMatrix A = nnMatrixMake(2, 3); | ||
| 270 | nnMatrix B = nnMatrixMake(1, 3); | ||
| 271 | nnMatrix C = nnMatrixMake(2, 3); | ||
| 272 | |||
| 273 | nnMatrixInit(&A, (R[]) { | ||
| 274 | 1, 2, 3, | ||
| 275 | 4, 5, 6, | ||
| 276 | }); | ||
| 277 | nnMatrixInit(&B, (R[]) { | ||
| 278 | 2, 1, 3, | ||
| 279 | }); | ||
| 280 | |||
| 281 | nnMatrixAddRow(&A, &B, &C); | ||
| 282 | |||
| 283 | TEST_TRUE(double_eq(nnMatrixAt(&C, 0, 0), 3, EPS)); | ||
| 284 | TEST_TRUE(double_eq(nnMatrixAt(&C, 0, 1), 3, EPS)); | ||
| 285 | TEST_TRUE(double_eq(nnMatrixAt(&C, 0, 2), 6, EPS)); | ||
| 286 | TEST_TRUE(double_eq(nnMatrixAt(&C, 1, 0), 6, EPS)); | ||
| 287 | TEST_TRUE(double_eq(nnMatrixAt(&C, 1, 1), 6, EPS)); | ||
| 288 | TEST_TRUE(double_eq(nnMatrixAt(&C, 1, 2), 9, EPS)); | ||
| 289 | |||
| 290 | nnMatrixDel(&A); | ||
| 291 | nnMatrixDel(&B); | ||
| 292 | nnMatrixDel(&C); | ||
| 293 | } | ||
| 294 | |||
| 295 | TEST_CASE(nnMatrixTranspose_square_2x2) { | ||
| 296 | nnMatrix A = nnMatrixMake(2, 2); | ||
| 297 | nnMatrix B = nnMatrixMake(2, 2); | ||
| 298 | |||
| 299 | nnMatrixInit(&A, (R[]) { | ||
| 300 | 1, 2, | ||
| 301 | 3, 4 | ||
| 302 | }); | ||
| 303 | |||
| 304 | nnMatrixTranspose(&A, &B); | ||
| 305 | TEST_TRUE(double_eq(nnMatrixAt(&B, 0, 0), 1, EPS)); | ||
| 306 | TEST_TRUE(double_eq(nnMatrixAt(&B, 0, 1), 3, EPS)); | ||
| 307 | TEST_TRUE(double_eq(nnMatrixAt(&B, 1, 0), 2, EPS)); | ||
| 308 | TEST_TRUE(double_eq(nnMatrixAt(&B, 1, 1), 4, EPS)); | ||
| 309 | |||
| 310 | nnMatrixDel(&A); | ||
| 311 | nnMatrixDel(&B); | ||
| 312 | } | ||
| 313 | |||
| 314 | TEST_CASE(nnMatrixTranspose_non_square_2x1) { | ||
| 315 | nnMatrix A = nnMatrixMake(2, 1); | ||
| 316 | nnMatrix B = nnMatrixMake(1, 2); | ||
| 317 | |||
| 318 | nnMatrixInit(&A, (R[]) { | ||
| 319 | 1, | ||
| 320 | 3, | ||
| 321 | }); | ||
| 322 | |||
| 323 | nnMatrixTranspose(&A, &B); | ||
| 324 | TEST_TRUE(double_eq(nnMatrixAt(&B, 0, 0), 1, EPS)); | ||
| 325 | TEST_TRUE(double_eq(nnMatrixAt(&B, 0, 1), 3, EPS)); | ||
| 326 | |||
| 327 | nnMatrixDel(&A); | ||
| 328 | nnMatrixDel(&B); | ||
| 329 | } | ||
| 330 | |||
| 331 | TEST_CASE(nnMatrixGt_test) { | ||
| 332 | nnMatrix A = nnMatrixMake(2, 3); | ||
| 333 | nnMatrix B = nnMatrixMake(2, 3); | ||
| 334 | |||
| 335 | nnMatrixInit(&A, (R[]) { | ||
| 336 | -3, 2, 0, | ||
| 337 | 4, -1, 5 | ||
| 338 | }); | ||
| 339 | |||
| 340 | nnMatrixGt(&A, 0, &B); | ||
| 341 | TEST_TRUE(double_eq(nnMatrixAt(&B, 0, 0), 0, EPS)); | ||
| 342 | TEST_TRUE(double_eq(nnMatrixAt(&B, 0, 1), 1, EPS)); | ||
| 343 | TEST_TRUE(double_eq(nnMatrixAt(&B, 0, 2), 0, EPS)); | ||
| 344 | TEST_TRUE(double_eq(nnMatrixAt(&B, 1, 0), 1, EPS)); | ||
| 345 | TEST_TRUE(double_eq(nnMatrixAt(&B, 1, 1), 0, EPS)); | ||
| 346 | TEST_TRUE(double_eq(nnMatrixAt(&B, 1, 2), 1, EPS)); | ||
| 347 | |||
| 348 | nnMatrixDel(&A); | ||
| 349 | nnMatrixDel(&B); | ||
| 350 | } | ||
diff --git a/src/lib/test/neuralnet_test.c b/src/lib/test/neuralnet_test.c new file mode 100644 index 0000000..14d9438 --- /dev/null +++ b/src/lib/test/neuralnet_test.c | |||
| @@ -0,0 +1,92 @@ | |||
| 1 | #include <neuralnet/neuralnet.h> | ||
| 2 | |||
| 3 | #include <neuralnet/matrix.h> | ||
| 4 | #include "activation.h" | ||
| 5 | #include "neuralnet_impl.h" | ||
| 6 | |||
| 7 | #include "test.h" | ||
| 8 | #include "test_util.h" | ||
| 9 | |||
| 10 | #include <assert.h> | ||
| 11 | |||
| 12 | TEST_CASE(neuralnet_perceptron_test) { | ||
| 13 | const int num_layers = 1; | ||
| 14 | const int layer_sizes[] = { 1, 1 }; | ||
| 15 | const nnActivation layer_activations[] = { nnSigmoid }; | ||
| 16 | const R weights[] = { 0.3 }; | ||
| 17 | |||
| 18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | ||
| 19 | assert(net); | ||
| 20 | nnSetWeights(net, weights); | ||
| 21 | |||
| 22 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); | ||
| 23 | |||
| 24 | const R input[] = { 0.9 }; | ||
| 25 | R output[1]; | ||
| 26 | nnQueryArray(net, query, input, output); | ||
| 27 | |||
| 28 | const R expected_output = sigmoid(input[0] * weights[0]); | ||
| 29 | printf("\nOutput: %f, Expected: %f\n", output[0], expected_output); | ||
| 30 | TEST_TRUE(double_eq(output[0], expected_output, EPS)); | ||
| 31 | |||
| 32 | nnDeleteQueryObject(&query); | ||
| 33 | nnDeleteNet(&net); | ||
| 34 | } | ||
| 35 | |||
| 36 | TEST_CASE(neuralnet_xor_test) { | ||
| 37 | const int num_layers = 2; | ||
| 38 | const int layer_sizes[] = { 2, 2, 1 }; | ||
| 39 | const nnActivation layer_activations[] = { nnRelu, nnIdentity }; | ||
| 40 | const R weights[] = { | ||
| 41 | 1, 1, 1, 1, // First (hidden) layer. | ||
| 42 | 1, -2 // Second (output) layer. | ||
| 43 | }; | ||
| 44 | const R biases[] = { | ||
| 45 | 0, -1, // First (hidden) layer. | ||
| 46 | 0 // Second (output) layer. | ||
| 47 | }; | ||
| 48 | |||
| 49 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | ||
| 50 | assert(net); | ||
| 51 | nnSetWeights(net, weights); | ||
| 52 | nnSetBiases(net, biases); | ||
| 53 | |||
| 54 | // First layer weights. | ||
| 55 | TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 0), 1); | ||
| 56 | TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 1), 1); | ||
| 57 | TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 2), 1); | ||
| 58 | TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 3), 1); | ||
| 59 | // Second layer weights. | ||
| 60 | TEST_EQUAL(nnMatrixAt(&net->weights[1], 0, 0), 1); | ||
| 61 | TEST_EQUAL(nnMatrixAt(&net->weights[1], 0, 1), -2); | ||
| 62 | // First layer biases. | ||
| 63 | TEST_EQUAL(nnMatrixAt(&net->biases[0], 0, 0), 0); | ||
| 64 | TEST_EQUAL(nnMatrixAt(&net->biases[0], 0, 1), -1); | ||
| 65 | // Second layer biases. | ||
| 66 | TEST_EQUAL(nnMatrixAt(&net->biases[1], 0, 0), 0); | ||
| 67 | |||
| 68 | // Test. | ||
| 69 | |||
| 70 | #define M 4 | ||
| 71 | |||
| 72 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/M); | ||
| 73 | |||
| 74 | const R test_inputs[M][2] = { { 0., 0. }, { 1., 0. }, { 0., 1. }, { 1., 1. } }; | ||
| 75 | nnMatrix test_inputs_matrix = nnMatrixMake(M, 2); | ||
| 76 | nnMatrixInit(&test_inputs_matrix, (const R*)test_inputs); | ||
| 77 | nnQuery(net, query, &test_inputs_matrix); | ||
| 78 | |||
| 79 | const R expected_outputs[M] = { 0., 1., 1., 0. }; | ||
| 80 | for (int i = 0; i < M; ++i) { | ||
| 81 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); | ||
| 82 | printf("\nInput: (%f, %f), Output: %f, Expected: %f\n", | ||
| 83 | test_inputs[i][0], test_inputs[i][1], test_output, expected_outputs[i]); | ||
| 84 | } | ||
| 85 | for (int i = 0; i < M; ++i) { | ||
| 86 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); | ||
| 87 | TEST_TRUE(double_eq(test_output, expected_outputs[i], OUTPUT_EPS)); | ||
| 88 | } | ||
| 89 | |||
| 90 | nnDeleteQueryObject(&query); | ||
| 91 | nnDeleteNet(&net); | ||
| 92 | } | ||
diff --git a/src/lib/test/test.h b/src/lib/test/test.h new file mode 100644 index 0000000..fd8dc22 --- /dev/null +++ b/src/lib/test/test.h | |||
| @@ -0,0 +1,185 @@ | |||
| 1 | // SPDX-License-Identifier: MIT | ||
| 2 | #pragma once | ||
| 3 | |||
| 4 | #ifdef UNIT_TEST | ||
| 5 | |||
| 6 | #include <stdbool.h> | ||
| 7 | #include <stdio.h> | ||
| 8 | #include <stdlib.h> | ||
| 9 | #include <string.h> | ||
| 10 | |||
| 11 | #if defined(__DragonFly__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ | ||
| 12 | defined(__NetBSD__) || defined(__OpenBSD__) | ||
| 13 | #define USE_SYSCTL_FOR_ARGS 1 | ||
| 14 | // clang-format off | ||
| 15 | #include <sys/types.h> | ||
| 16 | #include <sys/sysctl.h> | ||
| 17 | // clang-format on | ||
| 18 | #include <unistd.h> // getpid | ||
| 19 | #endif | ||
| 20 | |||
| 21 | struct test_file_metadata; | ||
| 22 | |||
| 23 | struct test_failure { | ||
| 24 | bool present; | ||
| 25 | const char *message; | ||
| 26 | const char *file; | ||
| 27 | int line; | ||
| 28 | }; | ||
| 29 | |||
| 30 | struct test_case_metadata { | ||
| 31 | void (*fn)(struct test_case_metadata *, struct test_file_metadata *); | ||
| 32 | struct test_failure failure; | ||
| 33 | const char *name; | ||
| 34 | struct test_case_metadata *next; | ||
| 35 | }; | ||
| 36 | |||
| 37 | struct test_file_metadata { | ||
| 38 | bool registered; | ||
| 39 | const char *name; | ||
| 40 | struct test_file_metadata *next; | ||
| 41 | struct test_case_metadata *tests; | ||
| 42 | }; | ||
| 43 | |||
| 44 | struct test_file_metadata __attribute__((weak)) * test_file_head; | ||
| 45 | |||
| 46 | #define SET_FAILURE(_message) \ | ||
| 47 | metadata->failure = (struct test_failure) { \ | ||
| 48 | .message = _message, .file = __FILE__, .line = __LINE__, .present = true, \ | ||
| 49 | } | ||
| 50 | |||
| 51 | #define TEST_EQUAL(a, b) \ | ||
| 52 | do { \ | ||
| 53 | if ((a) != (b)) { \ | ||
| 54 | SET_FAILURE(#a " != " #b); \ | ||
| 55 | return; \ | ||
| 56 | } \ | ||
| 57 | } while (0) | ||
| 58 | |||
| 59 | #define TEST_TRUE(a) \ | ||
| 60 | do { \ | ||
| 61 | if (!(a)) { \ | ||
| 62 | SET_FAILURE(#a " is not true"); \ | ||
| 63 | return; \ | ||
| 64 | } \ | ||
| 65 | } while (0) | ||
| 66 | |||
| 67 | #define TEST_STREQUAL(a, b) \ | ||
| 68 | do { \ | ||
| 69 | if (strcmp(a, b) != 0) { \ | ||
| 70 | SET_FAILURE(#a " != " #b); \ | ||
| 71 | return; \ | ||
| 72 | } \ | ||
| 73 | } while (0) | ||
| 74 | |||
| 75 | #define TEST_CASE(_name) \ | ||
| 76 | static void __test_h_##_name(struct test_case_metadata *, \ | ||
| 77 | struct test_file_metadata *); \ | ||
| 78 | static struct test_file_metadata __test_h_file; \ | ||
| 79 | static struct test_case_metadata __test_h_meta_##_name = { \ | ||
| 80 | .name = #_name, \ | ||
| 81 | .fn = __test_h_##_name, \ | ||
| 82 | }; \ | ||
| 83 | static void __attribute__((constructor(101))) __test_h_##_name##_register(void) { \ | ||
| 84 | __test_h_meta_##_name.next = __test_h_file.tests; \ | ||
| 85 | __test_h_file.tests = &__test_h_meta_##_name; \ | ||
| 86 | if (!__test_h_file.registered) { \ | ||
| 87 | __test_h_file.name = __FILE__; \ | ||
| 88 | __test_h_file.next = test_file_head; \ | ||
| 89 | test_file_head = &__test_h_file; \ | ||
| 90 | __test_h_file.registered = true; \ | ||
| 91 | } \ | ||
| 92 | } \ | ||
| 93 | static void __test_h_##_name( \ | ||
| 94 | struct test_case_metadata *metadata __attribute__((unused)), \ | ||
| 95 | struct test_file_metadata *file_metadata __attribute__((unused))) | ||
| 96 | |||
| 97 | extern void __attribute__((weak)) (*test_h_unittest_setup)(void); | ||
| 98 | /// Run defined tests, return true if all tests succeeds | ||
| 99 | /// @param[out] tests_run if not NULL, set to whether tests were run | ||
| 100 | static inline void __attribute__((constructor(102))) run_tests(void) { | ||
| 101 | bool should_run = false; | ||
| 102 | #ifdef USE_SYSCTL_FOR_ARGS | ||
| 103 | int mib[] = { | ||
| 104 | CTL_KERN, | ||
| 105 | #if defined(__NetBSD__) || defined(__OpenBSD__) | ||
| 106 | KERN_PROC_ARGS, | ||
| 107 | getpid(), | ||
| 108 | KERN_PROC_ARGV, | ||
| 109 | #else | ||
| 110 | KERN_PROC, | ||
| 111 | KERN_PROC_ARGS, | ||
| 112 | getpid(), | ||
| 113 | #endif | ||
| 114 | }; | ||
| 115 | char *arg = NULL; | ||
| 116 | size_t arglen; | ||
| 117 | sysctl(mib, sizeof(mib) / sizeof(mib[0]), NULL, &arglen, NULL, 0); | ||
| 118 | arg = malloc(arglen); | ||
| 119 | sysctl(mib, sizeof(mib) / sizeof(mib[0]), arg, &arglen, NULL, 0); | ||
| 120 | #else | ||
| 121 | FILE *cmdlinef = fopen("/proc/self/cmdline", "r"); | ||
| 122 | char *arg = NULL; | ||
| 123 | int arglen; | ||
| 124 | fscanf(cmdlinef, "%ms%n", &arg, &arglen); | ||
| 125 | fclose(cmdlinef); | ||
| 126 | #endif | ||
| 127 | for (char *pos = arg; pos < arg + arglen; pos += strlen(pos) + 1) { | ||
| 128 | if (strcmp(pos, "--unittest") == 0) { | ||
| 129 | should_run = true; | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | } | ||
| 133 | free(arg); | ||
| 134 | |||
| 135 | if (!should_run) { | ||
| 136 | return; | ||
| 137 | } | ||
| 138 | |||
| 139 | if (&test_h_unittest_setup) { | ||
| 140 | test_h_unittest_setup(); | ||
| 141 | } | ||
| 142 | |||
| 143 | struct test_file_metadata *i = test_file_head; | ||
| 144 | int failed = 0, success = 0; | ||
| 145 | while (i) { | ||
| 146 | fprintf(stderr, "Running tests from %s:\n", i->name); | ||
| 147 | struct test_case_metadata *j = i->tests; | ||
| 148 | while (j) { | ||
| 149 | fprintf(stderr, "\t%s ... ", j->name); | ||
| 150 | j->failure.present = false; | ||
| 151 | j->fn(j, i); | ||
| 152 | if (j->failure.present) { | ||
| 153 | fprintf(stderr, "failed (%s at %s:%d)\n", j->failure.message, | ||
| 154 | j->failure.file, j->failure.line); | ||
| 155 | failed++; | ||
| 156 | } else { | ||
| 157 | fprintf(stderr, "passed\n"); | ||
| 158 | success++; | ||
| 159 | } | ||
| 160 | j = j->next; | ||
| 161 | } | ||
| 162 | fprintf(stderr, "\n"); | ||
| 163 | i = i->next; | ||
| 164 | } | ||
| 165 | int total = failed + success; | ||
| 166 | fprintf(stderr, "Test results: passed %d/%d, failed %d/%d\n", success, total, | ||
| 167 | failed, total); | ||
| 168 | exit(failed == 0 ? EXIT_SUCCESS : EXIT_FAILURE); | ||
| 169 | } | ||
| 170 | |||
| 171 | #else | ||
| 172 | |||
| 173 | #include <stdbool.h> | ||
| 174 | |||
| 175 | #define TEST_CASE(name) static void __attribute__((unused)) __test_h_##name(void) | ||
| 176 | |||
| 177 | #define TEST_EQUAL(a, b) \ | ||
| 178 | (void)(a); \ | ||
| 179 | (void)(b) | ||
| 180 | #define TEST_TRUE(a) (void)(a) | ||
| 181 | #define TEST_STREQUAL(a, b) \ | ||
| 182 | (void)(a); \ | ||
| 183 | (void)(b) | ||
| 184 | |||
| 185 | #endif | ||
diff --git a/src/lib/test/test_main.c b/src/lib/test/test_main.c new file mode 100644 index 0000000..4cce7f6 --- /dev/null +++ b/src/lib/test/test_main.c | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | int main() { | ||
| 2 | return 0; | ||
| 3 | } | ||
diff --git a/src/lib/test/test_util.h b/src/lib/test/test_util.h new file mode 100644 index 0000000..8abb99a --- /dev/null +++ b/src/lib/test/test_util.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <neuralnet/types.h> | ||
| 4 | |||
| 5 | #include <math.h> | ||
| 6 | |||
| 7 | // General epsilon for comparing values. | ||
| 8 | static const R EPS = 1e-10; | ||
| 9 | |||
| 10 | // Epsilon for comparing network weights after training. | ||
| 11 | static const R WEIGHT_EPS = 0.01; | ||
| 12 | |||
| 13 | // Epsilon for comparing network outputs after training. | ||
| 14 | static const R OUTPUT_EPS = 0.01; | ||
| 15 | |||
| 16 | static inline bool double_eq(double a, double b, double eps) { | ||
| 17 | return fabs(a - b) <= eps; | ||
| 18 | } | ||
| 19 | |||
| 20 | static inline R lerp(R a, R b, R t) { | ||
| 21 | return a + t*(b-a); | ||
| 22 | } | ||
diff --git a/src/lib/test/train_linear_perceptron_non_origin_test.c b/src/lib/test/train_linear_perceptron_non_origin_test.c new file mode 100644 index 0000000..5a320ac --- /dev/null +++ b/src/lib/test/train_linear_perceptron_non_origin_test.c | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | #include <neuralnet/train.h> | ||
| 2 | |||
| 3 | #include <neuralnet/matrix.h> | ||
| 4 | #include <neuralnet/neuralnet.h> | ||
| 5 | #include "activation.h" | ||
| 6 | #include "neuralnet_impl.h" | ||
| 7 | |||
| 8 | #include "test.h" | ||
| 9 | #include "test_util.h" | ||
| 10 | |||
| 11 | #include <assert.h> | ||
| 12 | |||
| 13 | TEST_CASE(neuralnet_train_linear_perceptron_non_origin_test) { | ||
| 14 | const int num_layers = 1; | ||
| 15 | const int layer_sizes[] = { 1, 1 }; | ||
| 16 | const nnActivation layer_activations[] = { nnIdentity }; | ||
| 17 | |||
| 18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | ||
| 19 | assert(net); | ||
| 20 | |||
| 21 | // Train. | ||
| 22 | |||
| 23 | // Try to learn the Y = 2X + 1 line. | ||
| 24 | #define N 2 | ||
| 25 | const R inputs[N] = { 0., 1. }; | ||
| 26 | const R targets[N] = { 1., 3. }; | ||
| 27 | |||
| 28 | nnMatrix inputs_matrix = nnMatrixMake(N, 1); | ||
| 29 | nnMatrix targets_matrix = nnMatrixMake(N, 1); | ||
| 30 | nnMatrixInit(&inputs_matrix, inputs); | ||
| 31 | nnMatrixInit(&targets_matrix, targets); | ||
| 32 | |||
| 33 | nnTrainingParams params = { | ||
| 34 | .learning_rate = 0.7, | ||
| 35 | .max_iterations = 20, | ||
| 36 | .seed = 0, | ||
| 37 | .weight_init = nnWeightInit01, | ||
| 38 | .debug = false, | ||
| 39 | }; | ||
| 40 | |||
| 41 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); | ||
| 42 | |||
| 43 | const R weight = nnMatrixAt(&net->weights[0], 0, 0); | ||
| 44 | const R expected_weight = 2.0; | ||
| 45 | printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | ||
| 46 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); | ||
| 47 | |||
| 48 | const R bias = nnMatrixAt(&net->biases[0], 0, 0); | ||
| 49 | const R expected_bias = 1.0; | ||
| 50 | printf("Trained network bias: %f, Expected: %f\n", bias, expected_bias); | ||
| 51 | TEST_TRUE(double_eq(bias, expected_bias, WEIGHT_EPS)); | ||
| 52 | |||
| 53 | // Test. | ||
| 54 | |||
| 55 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); | ||
| 56 | |||
| 57 | const R test_input[] = { 2.3 }; | ||
| 58 | R test_output[1]; | ||
| 59 | nnQueryArray(net, query, test_input, test_output); | ||
| 60 | |||
| 61 | const R expected_output = test_input[0] * expected_weight + expected_bias; | ||
| 62 | printf("Output: %f, Expected: %f\n", test_output[0], expected_output); | ||
| 63 | TEST_TRUE(double_eq(test_output[0], expected_output, OUTPUT_EPS)); | ||
| 64 | |||
| 65 | nnDeleteQueryObject(&query); | ||
| 66 | nnDeleteNet(&net); | ||
| 67 | } | ||
diff --git a/src/lib/test/train_linear_perceptron_test.c b/src/lib/test/train_linear_perceptron_test.c new file mode 100644 index 0000000..2b1336d --- /dev/null +++ b/src/lib/test/train_linear_perceptron_test.c | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | #include <neuralnet/train.h> | ||
| 2 | |||
| 3 | #include <neuralnet/matrix.h> | ||
| 4 | #include <neuralnet/neuralnet.h> | ||
| 5 | #include "activation.h" | ||
| 6 | #include "neuralnet_impl.h" | ||
| 7 | |||
| 8 | #include "test.h" | ||
| 9 | #include "test_util.h" | ||
| 10 | |||
| 11 | #include <assert.h> | ||
| 12 | |||
| 13 | TEST_CASE(neuralnet_train_linear_perceptron_test) { | ||
| 14 | const int num_layers = 1; | ||
| 15 | const int layer_sizes[] = { 1, 1 }; | ||
| 16 | const nnActivation layer_activations[] = { nnIdentity }; | ||
| 17 | |||
| 18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | ||
| 19 | assert(net); | ||
| 20 | |||
| 21 | // Train. | ||
| 22 | |||
| 23 | // Try to learn the Y=X line. | ||
| 24 | #define N 2 | ||
| 25 | const R inputs[N] = { 0., 1. }; | ||
| 26 | const R targets[N] = { 0., 1. }; | ||
| 27 | |||
| 28 | nnMatrix inputs_matrix = nnMatrixMake(N, 1); | ||
| 29 | nnMatrix targets_matrix = nnMatrixMake(N, 1); | ||
| 30 | nnMatrixInit(&inputs_matrix, inputs); | ||
| 31 | nnMatrixInit(&targets_matrix, targets); | ||
| 32 | |||
| 33 | nnTrainingParams params = { | ||
| 34 | .learning_rate = 0.7, | ||
| 35 | .max_iterations = 10, | ||
| 36 | .seed = 0, | ||
| 37 | .weight_init = nnWeightInit01, | ||
| 38 | .debug = false, | ||
| 39 | }; | ||
| 40 | |||
| 41 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); | ||
| 42 | |||
| 43 | const R weight = nnMatrixAt(&net->weights[0], 0, 0); | ||
| 44 | const R expected_weight = 1.0; | ||
| 45 | printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | ||
| 46 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); | ||
| 47 | |||
| 48 | // Test. | ||
| 49 | |||
| 50 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); | ||
| 51 | |||
| 52 | const R test_input[] = { 2.3 }; | ||
| 53 | R test_output[1]; | ||
| 54 | nnQueryArray(net, query, test_input, test_output); | ||
| 55 | |||
| 56 | const R expected_output = test_input[0]; | ||
| 57 | printf("Output: %f, Expected: %f\n", test_output[0], expected_output); | ||
| 58 | TEST_TRUE(double_eq(test_output[0], expected_output, OUTPUT_EPS)); | ||
| 59 | |||
| 60 | nnDeleteQueryObject(&query); | ||
| 61 | nnDeleteNet(&net); | ||
| 62 | } | ||
diff --git a/src/lib/test/train_sigmoid_test.c b/src/lib/test/train_sigmoid_test.c new file mode 100644 index 0000000..588e7ca --- /dev/null +++ b/src/lib/test/train_sigmoid_test.c | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | #include <neuralnet/train.h> | ||
| 2 | |||
| 3 | #include <neuralnet/matrix.h> | ||
| 4 | #include <neuralnet/neuralnet.h> | ||
| 5 | #include "activation.h" | ||
| 6 | #include "neuralnet_impl.h" | ||
| 7 | |||
| 8 | #include "test.h" | ||
| 9 | #include "test_util.h" | ||
| 10 | |||
| 11 | #include <assert.h> | ||
| 12 | |||
| 13 | TEST_CASE(neuralnet_train_sigmoid_test) { | ||
| 14 | const int num_layers = 1; | ||
| 15 | const int layer_sizes[] = { 1, 1 }; | ||
| 16 | const nnActivation layer_activations[] = { nnSigmoid }; | ||
| 17 | |||
| 18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | ||
| 19 | assert(net); | ||
| 20 | |||
| 21 | // Train. | ||
| 22 | |||
| 23 | // Try to learn the sigmoid function. | ||
| 24 | #define N 3 | ||
| 25 | R inputs[N]; | ||
| 26 | R targets[N]; | ||
| 27 | for (int i = 0; i < N; ++i) { | ||
| 28 | inputs[i] = lerp(-1, +1, (R)i / (R)(N-1)); | ||
| 29 | targets[i] = sigmoid(inputs[i]); | ||
| 30 | } | ||
| 31 | |||
| 32 | nnMatrix inputs_matrix = nnMatrixMake(N, 1); | ||
| 33 | nnMatrix targets_matrix = nnMatrixMake(N, 1); | ||
| 34 | nnMatrixInit(&inputs_matrix, inputs); | ||
| 35 | nnMatrixInit(&targets_matrix, targets); | ||
| 36 | |||
| 37 | nnTrainingParams params = { | ||
| 38 | .learning_rate = 0.9, | ||
| 39 | .max_iterations = 100, | ||
| 40 | .seed = 0, | ||
| 41 | .weight_init = nnWeightInit01, | ||
| 42 | .debug = false, | ||
| 43 | }; | ||
| 44 | |||
| 45 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); | ||
| 46 | |||
| 47 | const R weight = nnMatrixAt(&net->weights[0], 0, 0); | ||
| 48 | const R expected_weight = 1.0; | ||
| 49 | printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | ||
| 50 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); | ||
| 51 | |||
| 52 | // Test. | ||
| 53 | |||
| 54 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); | ||
| 55 | |||
| 56 | const R test_input[] = { 0.3 }; | ||
| 57 | R test_output[1]; | ||
| 58 | nnQueryArray(net, query, test_input, test_output); | ||
| 59 | |||
| 60 | const R expected_output = 0.574442516811659; // sigmoid(0.3) | ||
| 61 | printf("Output: %f, Expected: %f\n", test_output[0], expected_output); | ||
| 62 | TEST_TRUE(double_eq(test_output[0], expected_output, OUTPUT_EPS)); | ||
| 63 | |||
| 64 | nnDeleteQueryObject(&query); | ||
| 65 | nnDeleteNet(&net); | ||
| 66 | } | ||
diff --git a/src/lib/test/train_xor_test.c b/src/lib/test/train_xor_test.c new file mode 100644 index 0000000..6ddc6e0 --- /dev/null +++ b/src/lib/test/train_xor_test.c | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | #include <neuralnet/train.h> | ||
| 2 | |||
| 3 | #include <neuralnet/matrix.h> | ||
| 4 | #include <neuralnet/neuralnet.h> | ||
| 5 | #include "activation.h" | ||
| 6 | #include "neuralnet_impl.h" | ||
| 7 | |||
| 8 | #include "test.h" | ||
| 9 | #include "test_util.h" | ||
| 10 | |||
| 11 | #include <assert.h> | ||
| 12 | |||
| 13 | TEST_CASE(neuralnet_train_xor_test) { | ||
| 14 | const int num_layers = 2; | ||
| 15 | const int layer_sizes[] = { 2, 2, 1 }; | ||
| 16 | const nnActivation layer_activations[] = { nnRelu, nnIdentity }; | ||
| 17 | |||
| 18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | ||
| 19 | assert(net); | ||
| 20 | |||
| 21 | // Train. | ||
| 22 | |||
| 23 | #define N 4 | ||
| 24 | const R inputs[N][2] = { { 0., 0. }, { 0., 1. }, { 1., 0. }, { 1., 1. } }; | ||
| 25 | const R targets[N] = { 0., 1., 1., 0. }; | ||
| 26 | |||
| 27 | nnMatrix inputs_matrix = nnMatrixMake(N, 2); | ||
| 28 | nnMatrix targets_matrix = nnMatrixMake(N, 1); | ||
| 29 | nnMatrixInit(&inputs_matrix, (const R*)inputs); | ||
| 30 | nnMatrixInit(&targets_matrix, targets); | ||
| 31 | |||
| 32 | nnTrainingParams params = { | ||
| 33 | .learning_rate = 0.1, | ||
| 34 | .max_iterations = 500, | ||
| 35 | .seed = 0, | ||
| 36 | .weight_init = nnWeightInit01, | ||
| 37 | .debug = false, | ||
| 38 | }; | ||
| 39 | |||
| 40 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); | ||
| 41 | |||
| 42 | // Test. | ||
| 43 | |||
| 44 | #define M 4 | ||
| 45 | |||
| 46 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/M); | ||
| 47 | |||
| 48 | const R test_inputs[M][2] = { { 0., 0. }, { 1., 0. }, { 0., 1. }, { 1., 1. } }; | ||
| 49 | nnMatrix test_inputs_matrix = nnMatrixMake(M, 2); | ||
| 50 | nnMatrixInit(&test_inputs_matrix, (const R*)test_inputs); | ||
| 51 | nnQuery(net, query, &test_inputs_matrix); | ||
| 52 | |||
| 53 | const R expected_outputs[M] = { 0., 1., 1., 0. }; | ||
| 54 | for (int i = 0; i < M; ++i) { | ||
| 55 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); | ||
| 56 | printf("\nInput: (%f, %f), Output: %f, Expected: %f\n", | ||
| 57 | test_inputs[i][0], test_inputs[i][1], test_output, expected_outputs[i]); | ||
| 58 | } | ||
| 59 | for (int i = 0; i < M; ++i) { | ||
| 60 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); | ||
| 61 | TEST_TRUE(double_eq(test_output, expected_outputs[i], OUTPUT_EPS)); | ||
| 62 | } | ||
| 63 | |||
| 64 | nnDeleteQueryObject(&query); | ||
| 65 | nnDeleteNet(&net); | ||
| 66 | } | ||
