diff options
| author | 3gg <3gg@shellblade.net> | 2023-12-16 10:21:16 -0800 |
|---|---|---|
| committer | 3gg <3gg@shellblade.net> | 2023-12-16 10:21:16 -0800 |
| commit | 653e98e029a0d0f110b0ac599e50406060bb0f87 (patch) | |
| tree | 6f909215218f6720266bde1b3f49aeddad8b1da3 | |
| parent | 3df7b6fb0c65295eed4590e6f166d60e89b3c68e (diff) | |
Decouple activations from linear layer.
| -rw-r--r-- | src/bin/mnist/src/main.c | 195 | ||||
| -rw-r--r-- | src/lib/include/neuralnet/matrix.h | 3 | ||||
| -rw-r--r-- | src/lib/include/neuralnet/neuralnet.h | 51 | ||||
| -rw-r--r-- | src/lib/src/activation.h | 4 | ||||
| -rw-r--r-- | src/lib/src/matrix.c | 6 | ||||
| -rw-r--r-- | src/lib/src/neuralnet.c | 218 | ||||
| -rw-r--r-- | src/lib/src/neuralnet_impl.h | 35 | ||||
| -rw-r--r-- | src/lib/src/train.c | 182 | ||||
| -rw-r--r-- | src/lib/test/neuralnet_test.c | 103 | ||||
| -rw-r--r-- | src/lib/test/train_linear_perceptron_non_origin_test.c | 46 | ||||
| -rw-r--r-- | src/lib/test/train_linear_perceptron_test.c | 44 | ||||
| -rw-r--r-- | src/lib/test/train_sigmoid_test.c | 46 | ||||
| -rw-r--r-- | src/lib/test/train_xor_test.c | 55 |
13 files changed, 559 insertions, 429 deletions
diff --git a/src/bin/mnist/src/main.c b/src/bin/mnist/src/main.c index 9aa3ce5..53e0197 100644 --- a/src/bin/mnist/src/main.c +++ b/src/bin/mnist/src/main.c | |||
| @@ -29,32 +29,35 @@ static const double LABEL_UPPER_BOUND = 0.99; | |||
| 29 | // Epsilon used to compare R values. | 29 | // Epsilon used to compare R values. |
| 30 | static const double EPS = 1e-10; | 30 | static const double EPS = 1e-10; |
| 31 | 31 | ||
| 32 | #define min(a,b) ((a) < (b) ? (a) : (b)) | 32 | #define min(a, b) ((a) < (b) ? (a) : (b)) |
| 33 | 33 | ||
| 34 | typedef struct ImageSet { | 34 | typedef struct ImageSet { |
| 35 | nnMatrix images; // Images flattened into row vectors of the matrix. | 35 | nnMatrix images; // Images flattened into row vectors of the matrix. |
| 36 | nnMatrix labels; // One-hot-encoded labels. | 36 | nnMatrix labels; // One-hot-encoded labels. |
| 37 | int count; // Number of images and labels. | 37 | int count; // Number of images and labels. |
| 38 | int rows; // Rows in an image. | 38 | int rows; // Rows in an image. |
| 39 | int cols; // Columns in an image. | 39 | int cols; // Columns in an image. |
| 40 | } ImageSet; | 40 | } ImageSet; |
| 41 | 41 | ||
| 42 | static void usage(const char* argv0) { | 42 | static void usage(const char* argv0) { |
| 43 | fprintf(stderr, "Usage: %s <path to mnist files directory> [num images]\n", argv0); | 43 | fprintf( |
| 44 | stderr, "Usage: %s <path to mnist files directory> [num images]\n", | ||
| 45 | argv0); | ||
| 44 | fprintf(stderr, "\n"); | 46 | fprintf(stderr, "\n"); |
| 45 | fprintf(stderr, " Use -1 for [num images] to use all the images in the data set\n"); | 47 | fprintf( |
| 48 | stderr, | ||
| 49 | " Use -1 for [num images] to use all the images in the data set\n"); | ||
| 46 | } | 50 | } |
| 47 | 51 | ||
| 48 | static bool R_eq(R a, R b) { | 52 | static bool R_eq(R a, R b) { return fabs(a - b) <= EPS; } |
| 49 | return fabs(a-b) <= EPS; | ||
| 50 | } | ||
| 51 | 53 | ||
| 52 | static void PrintImage(const nnMatrix* images, int rows, int cols, int image_index) { | 54 | static void PrintImage( |
| 55 | const nnMatrix* images, int rows, int cols, int image_index) { | ||
| 53 | assert(images); | 56 | assert(images); |
| 54 | assert((0 <= image_index) && (image_index < images->rows)); | 57 | assert((0 <= image_index) && (image_index < images->rows)); |
| 55 | 58 | ||
| 56 | // Top line. | 59 | // Top line. |
| 57 | for (int j = 0; j < cols/2; ++j) { | 60 | for (int j = 0; j < cols / 2; ++j) { |
| 58 | printf(" -"); | 61 | printf(" -"); |
| 59 | } | 62 | } |
| 60 | printf("\n"); | 63 | printf("\n"); |
| @@ -68,8 +71,7 @@ static void PrintImage(const nnMatrix* images, int rows, int cols, int image_ind | |||
| 68 | printf("#"); | 71 | printf("#"); |
| 69 | } else if (*value > 0.5) { | 72 | } else if (*value > 0.5) { |
| 70 | printf("*"); | 73 | printf("*"); |
| 71 | } | 74 | } else if (*value > PIXEL_LOWER_BOUND) { |
| 72 | else if (*value > PIXEL_LOWER_BOUND) { | ||
| 73 | printf(":"); | 75 | printf(":"); |
| 74 | } else if (*value == 0.0) { | 76 | } else if (*value == 0.0) { |
| 75 | // Values should not be exactly 0, otherwise they cancel out weights | 77 | // Values should not be exactly 0, otherwise they cancel out weights |
| @@ -84,7 +86,7 @@ static void PrintImage(const nnMatrix* images, int rows, int cols, int image_ind | |||
| 84 | } | 86 | } |
| 85 | 87 | ||
| 86 | // Bottom line. | 88 | // Bottom line. |
| 87 | for (int j = 0; j < cols/2; ++j) { | 89 | for (int j = 0; j < cols / 2; ++j) { |
| 88 | printf(" -"); | 90 | printf(" -"); |
| 89 | } | 91 | } |
| 90 | printf("\n"); | 92 | printf("\n"); |
| @@ -96,7 +98,7 @@ static void PrintLabel(const nnMatrix* labels, int label_index) { | |||
| 96 | 98 | ||
| 97 | // Compute the label from the one-hot encoding. | 99 | // Compute the label from the one-hot encoding. |
| 98 | const R* value = nnMatrixRow(labels, label_index); | 100 | const R* value = nnMatrixRow(labels, label_index); |
| 99 | int label = -1; | 101 | int label = -1; |
| 100 | for (int i = 0; i < 10; ++i) { | 102 | for (int i = 0; i < 10; ++i) { |
| 101 | if (R_eq(*value++, LABEL_UPPER_BOUND)) { | 103 | if (R_eq(*value++, LABEL_UPPER_BOUND)) { |
| 102 | label = i; | 104 | label = i; |
| @@ -113,13 +115,12 @@ static void PrintLabel(const nnMatrix* labels, int label_index) { | |||
| 113 | printf(")\n"); | 115 | printf(")\n"); |
| 114 | } | 116 | } |
| 115 | 117 | ||
| 116 | static R lerp(R a, R b, R t) { | 118 | static R lerp(R a, R b, R t) { return a + t * (b - a); } |
| 117 | return a + t*(b-a); | ||
| 118 | } | ||
| 119 | 119 | ||
| 120 | /// Rescales a pixel from [0,255] to [PIXEL_LOWER_BOUND, 1.0]. | 120 | /// Rescales a pixel from [0,255] to [PIXEL_LOWER_BOUND, 1.0]. |
| 121 | static R FormatPixel(uint8_t pixel) { | 121 | static R FormatPixel(uint8_t pixel) { |
| 122 | const R value = (R)(pixel) / 255.0 * (1.0 - PIXEL_LOWER_BOUND) + PIXEL_LOWER_BOUND; | 122 | const R value = |
| 123 | (R)(pixel) / 255.0 * (1.0 - PIXEL_LOWER_BOUND) + PIXEL_LOWER_BOUND; | ||
| 123 | assert(value >= PIXEL_LOWER_BOUND); | 124 | assert(value >= PIXEL_LOWER_BOUND); |
| 124 | assert(value <= 1.0); | 125 | assert(value <= 1.0); |
| 125 | return value; | 126 | return value; |
| @@ -152,7 +153,8 @@ static void ImageToMatrix( | |||
| 152 | } | 153 | } |
| 153 | } | 154 | } |
| 154 | 155 | ||
| 155 | static bool ReadImages(gzFile images_file, int max_num_images, ImageSet* image_set) { | 156 | static bool ReadImages( |
| 157 | gzFile images_file, int max_num_images, ImageSet* image_set) { | ||
| 156 | assert(images_file != Z_NULL); | 158 | assert(images_file != Z_NULL); |
| 157 | assert(image_set); | 159 | assert(image_set); |
| 158 | 160 | ||
| @@ -161,36 +163,41 @@ static bool ReadImages(gzFile images_file, int max_num_images, ImageSet* image_s | |||
| 161 | uint8_t* pixels = 0; | 163 | uint8_t* pixels = 0; |
| 162 | 164 | ||
| 163 | int32_t magic, total_images, rows, cols; | 165 | int32_t magic, total_images, rows, cols; |
| 164 | if ( (gzread(images_file, (char*)&magic, sizeof(int32_t)) != sizeof(int32_t)) || | 166 | if ((gzread(images_file, (char*)&magic, sizeof(int32_t)) != |
| 165 | (gzread(images_file, (char*)&total_images, sizeof(int32_t)) != sizeof(int32_t)) || | 167 | sizeof(int32_t)) || |
| 166 | (gzread(images_file, (char*)&rows, sizeof(int32_t)) != sizeof(int32_t)) || | 168 | (gzread(images_file, (char*)&total_images, sizeof(int32_t)) != |
| 167 | (gzread(images_file, (char*)&cols, sizeof(int32_t)) != sizeof(int32_t)) ) { | 169 | sizeof(int32_t)) || |
| 170 | (gzread(images_file, (char*)&rows, sizeof(int32_t)) != sizeof(int32_t)) || | ||
| 171 | (gzread(images_file, (char*)&cols, sizeof(int32_t)) != sizeof(int32_t))) { | ||
| 168 | fprintf(stderr, "Failed to read header\n"); | 172 | fprintf(stderr, "Failed to read header\n"); |
| 169 | goto cleanup; | 173 | goto cleanup; |
| 170 | } | 174 | } |
| 171 | 175 | ||
| 172 | magic = ReverseEndian32(magic); | 176 | magic = ReverseEndian32(magic); |
| 173 | total_images = ReverseEndian32(total_images); | 177 | total_images = ReverseEndian32(total_images); |
| 174 | rows = ReverseEndian32(rows); | 178 | rows = ReverseEndian32(rows); |
| 175 | cols = ReverseEndian32(cols); | 179 | cols = ReverseEndian32(cols); |
| 176 | 180 | ||
| 177 | if (magic != IMAGE_FILE_MAGIC) { | 181 | if (magic != IMAGE_FILE_MAGIC) { |
| 178 | fprintf(stderr, "Magic number mismatch. Got %x, expected: %x\n", | 182 | fprintf( |
| 179 | magic, IMAGE_FILE_MAGIC); | 183 | stderr, "Magic number mismatch. Got %x, expected: %x\n", magic, |
| 184 | IMAGE_FILE_MAGIC); | ||
| 180 | goto cleanup; | 185 | goto cleanup; |
| 181 | } | 186 | } |
| 182 | 187 | ||
| 183 | printf("Magic: %.8x\nTotal images: %d\nRows: %d\nCols: %d\n", | 188 | printf( |
| 184 | magic, total_images, rows, cols); | 189 | "Magic: %.8x\nTotal images: %d\nRows: %d\nCols: %d\n", magic, |
| 190 | total_images, rows, cols); | ||
| 185 | 191 | ||
| 186 | total_images = max_num_images >= 0 ? min(total_images, max_num_images) : total_images; | 192 | total_images = |
| 193 | max_num_images >= 0 ? min(total_images, max_num_images) : total_images; | ||
| 187 | 194 | ||
| 188 | // Images are flattened into single row vectors. | 195 | // Images are flattened into single row vectors. |
| 189 | const int num_pixels = rows * cols; | 196 | const int num_pixels = rows * cols; |
| 190 | image_set->images = nnMatrixMake(total_images, num_pixels); | 197 | image_set->images = nnMatrixMake(total_images, num_pixels); |
| 191 | image_set->count = total_images; | 198 | image_set->count = total_images; |
| 192 | image_set->rows = rows; | 199 | image_set->rows = rows; |
| 193 | image_set->cols = cols; | 200 | image_set->cols = cols; |
| 194 | 201 | ||
| 195 | pixels = calloc(1, num_pixels); | 202 | pixels = calloc(1, num_pixels); |
| 196 | if (!pixels) { | 203 | if (!pixels) { |
| @@ -219,30 +226,31 @@ cleanup: | |||
| 219 | return success; | 226 | return success; |
| 220 | } | 227 | } |
| 221 | 228 | ||
| 222 | static void OneHotEncode(const uint8_t* labels_bytes, int num_labels, nnMatrix* labels) { | 229 | static void OneHotEncode( |
| 230 | const uint8_t* labels_bytes, int num_labels, nnMatrix* labels) { | ||
| 223 | assert(labels_bytes); | 231 | assert(labels_bytes); |
| 224 | assert(labels); | 232 | assert(labels); |
| 225 | assert(labels->rows == num_labels); | 233 | assert(labels->rows == num_labels); |
| 226 | assert(labels->cols == 10); | 234 | assert(labels->cols == 10); |
| 227 | 235 | ||
| 228 | static const R one_hot[10][10] = { | 236 | static const R one_hot[10][10] = { |
| 229 | { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, | 237 | {1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, |
| 230 | { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, | 238 | {0, 1, 0, 0, 0, 0, 0, 0, 0, 0}, |
| 231 | { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }, | 239 | {0, 0, 1, 0, 0, 0, 0, 0, 0, 0}, |
| 232 | { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, | 240 | {0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, |
| 233 | { 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 }, | 241 | {0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, |
| 234 | { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, | 242 | {0, 0, 0, 0, 0, 1, 0, 0, 0, 0}, |
| 235 | { 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }, | 243 | {0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, |
| 236 | { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, | 244 | {0, 0, 0, 0, 0, 0, 0, 1, 0, 0}, |
| 237 | { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 }, | 245 | {0, 0, 0, 0, 0, 0, 0, 0, 1, 0}, |
| 238 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, | 246 | {0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, |
| 239 | }; | 247 | }; |
| 240 | 248 | ||
| 241 | R* value = labels->values; | 249 | R* value = labels->values; |
| 242 | 250 | ||
| 243 | for (int i = 0; i < num_labels; ++i) { | 251 | for (int i = 0; i < num_labels; ++i) { |
| 244 | const uint8_t label = labels_bytes[i]; | 252 | const uint8_t label = labels_bytes[i]; |
| 245 | const R* one_hot_value = one_hot[label]; | 253 | const R* one_hot_value = one_hot[label]; |
| 246 | 254 | ||
| 247 | for (int j = 0; j < 10; ++j) { | 255 | for (int j = 0; j < 10; ++j) { |
| 248 | *value++ = FormatLabel(*one_hot_value++); | 256 | *value++ = FormatLabel(*one_hot_value++); |
| @@ -255,13 +263,13 @@ static int OneHotDecode(const nnMatrix* label_matrix) { | |||
| 255 | assert(label_matrix->cols == 10); | 263 | assert(label_matrix->cols == 10); |
| 256 | assert(label_matrix->rows == 1); | 264 | assert(label_matrix->rows == 1); |
| 257 | 265 | ||
| 258 | R max_value = 0; | 266 | R max_value = 0; |
| 259 | int pos_max = 0; | 267 | int pos_max = 0; |
| 260 | for (int i = 0; i < 10; ++i) { | 268 | for (int i = 0; i < 10; ++i) { |
| 261 | const R value = nnMatrixAt(label_matrix, 0, i); | 269 | const R value = nnMatrixAt(label_matrix, 0, i); |
| 262 | if (value > max_value) { | 270 | if (value > max_value) { |
| 263 | max_value = value; | 271 | max_value = value; |
| 264 | pos_max = i; | 272 | pos_max = i; |
| 265 | } | 273 | } |
| 266 | } | 274 | } |
| 267 | assert(pos_max >= 0); | 275 | assert(pos_max >= 0); |
| @@ -269,7 +277,8 @@ static int OneHotDecode(const nnMatrix* label_matrix) { | |||
| 269 | return pos_max; | 277 | return pos_max; |
| 270 | } | 278 | } |
| 271 | 279 | ||
| 272 | static bool ReadLabels(gzFile labels_file, int max_num_labels, ImageSet* image_set) { | 280 | static bool ReadLabels( |
| 281 | gzFile labels_file, int max_num_labels, ImageSet* image_set) { | ||
| 273 | assert(labels_file != Z_NULL); | 282 | assert(labels_file != Z_NULL); |
| 274 | assert(image_set != 0); | 283 | assert(image_set != 0); |
| 275 | 284 | ||
| @@ -278,24 +287,28 @@ static bool ReadLabels(gzFile labels_file, int max_num_labels, ImageSet* image_s | |||
| 278 | uint8_t* labels = 0; | 287 | uint8_t* labels = 0; |
| 279 | 288 | ||
| 280 | int32_t magic, total_labels; | 289 | int32_t magic, total_labels; |
| 281 | if ( (gzread(labels_file, (char*)&magic, sizeof(int32_t)) != sizeof(int32_t)) || | 290 | if ((gzread(labels_file, (char*)&magic, sizeof(int32_t)) != |
| 282 | (gzread(labels_file, (char*)&total_labels, sizeof(int32_t)) != sizeof(int32_t)) ) { | 291 | sizeof(int32_t)) || |
| 292 | (gzread(labels_file, (char*)&total_labels, sizeof(int32_t)) != | ||
| 293 | sizeof(int32_t))) { | ||
| 283 | fprintf(stderr, "Failed to read header\n"); | 294 | fprintf(stderr, "Failed to read header\n"); |
| 284 | goto cleanup; | 295 | goto cleanup; |
| 285 | } | 296 | } |
| 286 | 297 | ||
| 287 | magic = ReverseEndian32(magic); | 298 | magic = ReverseEndian32(magic); |
| 288 | total_labels = ReverseEndian32(total_labels); | 299 | total_labels = ReverseEndian32(total_labels); |
| 289 | 300 | ||
| 290 | if (magic != LABEL_FILE_MAGIC) { | 301 | if (magic != LABEL_FILE_MAGIC) { |
| 291 | fprintf(stderr, "Magic number mismatch. Got %x, expected: %x\n", | 302 | fprintf( |
| 292 | magic, LABEL_FILE_MAGIC); | 303 | stderr, "Magic number mismatch. Got %x, expected: %x\n", magic, |
| 304 | LABEL_FILE_MAGIC); | ||
| 293 | goto cleanup; | 305 | goto cleanup; |
| 294 | } | 306 | } |
| 295 | 307 | ||
| 296 | printf("Magic: %.8x\nTotal labels: %d\n", magic, total_labels); | 308 | printf("Magic: %.8x\nTotal labels: %d\n", magic, total_labels); |
| 297 | 309 | ||
| 298 | total_labels = max_num_labels >= 0 ? min(total_labels, max_num_labels) : total_labels; | 310 | total_labels = |
| 311 | max_num_labels >= 0 ? min(total_labels, max_num_labels) : total_labels; | ||
| 299 | 312 | ||
| 300 | assert(image_set->count == total_labels); | 313 | assert(image_set->count == total_labels); |
| 301 | 314 | ||
| @@ -308,7 +321,8 @@ static bool ReadLabels(gzFile labels_file, int max_num_labels, ImageSet* image_s | |||
| 308 | goto cleanup; | 321 | goto cleanup; |
| 309 | } | 322 | } |
| 310 | 323 | ||
| 311 | if (gzread(labels_file, labels, total_labels * sizeof(uint8_t)) != total_labels) { | 324 | if (gzread(labels_file, labels, total_labels * sizeof(uint8_t)) != |
| 325 | total_labels) { | ||
| 312 | fprintf(stderr, "Failed to read labels\n"); | 326 | fprintf(stderr, "Failed to read labels\n"); |
| 313 | goto cleanup; | 327 | goto cleanup; |
| 314 | } | 328 | } |
| @@ -335,17 +349,17 @@ int main(int argc, const char** argv) { | |||
| 335 | 349 | ||
| 336 | bool success = false; | 350 | bool success = false; |
| 337 | 351 | ||
| 338 | gzFile train_images_file = Z_NULL; | 352 | gzFile train_images_file = Z_NULL; |
| 339 | gzFile train_labels_file = Z_NULL; | 353 | gzFile train_labels_file = Z_NULL; |
| 340 | gzFile test_images_file = Z_NULL; | 354 | gzFile test_images_file = Z_NULL; |
| 341 | gzFile test_labels_file = Z_NULL; | 355 | gzFile test_labels_file = Z_NULL; |
| 342 | ImageSet train_set = { 0 }; | 356 | ImageSet train_set = {0}; |
| 343 | ImageSet test_set = { 0 }; | 357 | ImageSet test_set = {0}; |
| 344 | nnNeuralNetwork* net = 0; | 358 | nnNeuralNetwork* net = 0; |
| 345 | nnQueryObject* query = 0; | 359 | nnQueryObject* query = 0; |
| 346 | 360 | ||
| 347 | const char* mnist_files_dir = argv[1]; | 361 | const char* mnist_files_dir = argv[1]; |
| 348 | const int max_num_images = argc > 2 ? atoi(argv[2]) : -1; | 362 | const int max_num_images = argc > 2 ? atoi(argv[2]) : -1; |
| 349 | 363 | ||
| 350 | char train_labels_path[PATH_MAX]; | 364 | char train_labels_path[PATH_MAX]; |
| 351 | char train_images_path[PATH_MAX]; | 365 | char train_images_path[PATH_MAX]; |
| @@ -353,12 +367,12 @@ int main(int argc, const char** argv) { | |||
| 353 | char test_images_path[PATH_MAX]; | 367 | char test_images_path[PATH_MAX]; |
| 354 | strlcpy(train_labels_path, mnist_files_dir, PATH_MAX); | 368 | strlcpy(train_labels_path, mnist_files_dir, PATH_MAX); |
| 355 | strlcpy(train_images_path, mnist_files_dir, PATH_MAX); | 369 | strlcpy(train_images_path, mnist_files_dir, PATH_MAX); |
| 356 | strlcpy(test_labels_path, mnist_files_dir, PATH_MAX); | 370 | strlcpy(test_labels_path, mnist_files_dir, PATH_MAX); |
| 357 | strlcpy(test_images_path, mnist_files_dir, PATH_MAX); | 371 | strlcpy(test_images_path, mnist_files_dir, PATH_MAX); |
| 358 | strlcat(train_labels_path, "/train-labels-idx1-ubyte.gz", PATH_MAX); | 372 | strlcat(train_labels_path, "/train-labels-idx1-ubyte.gz", PATH_MAX); |
| 359 | strlcat(train_images_path, "/train-images-idx3-ubyte.gz", PATH_MAX); | 373 | strlcat(train_images_path, "/train-images-idx3-ubyte.gz", PATH_MAX); |
| 360 | strlcat(test_labels_path, "/t10k-labels-idx1-ubyte.gz", PATH_MAX); | 374 | strlcat(test_labels_path, "/t10k-labels-idx1-ubyte.gz", PATH_MAX); |
| 361 | strlcat(test_images_path, "/t10k-images-idx3-ubyte.gz", PATH_MAX); | 375 | strlcat(test_images_path, "/t10k-images-idx3-ubyte.gz", PATH_MAX); |
| 362 | 376 | ||
| 363 | train_images_file = gzopen(train_images_path, "r"); | 377 | train_images_file = gzopen(train_images_path, "r"); |
| 364 | if (train_images_file == Z_NULL) { | 378 | if (train_images_file == Z_NULL) { |
| @@ -406,11 +420,18 @@ int main(int argc, const char** argv) { | |||
| 406 | } | 420 | } |
| 407 | 421 | ||
| 408 | // Network definition. | 422 | // Network definition. |
| 409 | const int image_size_pixels = train_set.rows * train_set.cols; | 423 | const int image_size_pixels = train_set.rows * train_set.cols; |
| 410 | const int num_layers = 2; | 424 | const int num_layers = 4; |
| 411 | const int layer_sizes[3] = { image_size_pixels, 100, 10 }; | 425 | const int hidden_size = 100; |
| 412 | const nnActivation layer_activations[2] = { nnSigmoid, nnSigmoid }; | 426 | const nnLayer layers[4] = { |
| 413 | if (!(net = nnMakeNet(num_layers, layer_sizes, layer_activations))) { | 427 | {.type = nnLinear, |
| 428 | .linear = {.input_size = image_size_pixels, .output_size = hidden_size}}, | ||
| 429 | {.type = nnSigmoid}, | ||
| 430 | {.type = nnLinear, | ||
| 431 | .linear = {.input_size = hidden_size, .output_size = 10}}, | ||
| 432 | {.type = nnSigmoid} | ||
| 433 | }; | ||
| 434 | if (!(net = nnMakeNet(layers, num_layers, image_size_pixels))) { | ||
| 414 | fprintf(stderr, "Failed to create neural network\n"); | 435 | fprintf(stderr, "Failed to create neural network\n"); |
| 415 | goto cleanup; | 436 | goto cleanup; |
| 416 | } | 437 | } |
| @@ -418,17 +439,17 @@ int main(int argc, const char** argv) { | |||
| 418 | // Train. | 439 | // Train. |
| 419 | printf("Training with up to %d images from the data set\n\n", max_num_images); | 440 | printf("Training with up to %d images from the data set\n\n", max_num_images); |
| 420 | const nnTrainingParams training_params = { | 441 | const nnTrainingParams training_params = { |
| 421 | .learning_rate = 0.1, | 442 | .learning_rate = 0.1, |
| 422 | .max_iterations = TRAIN_ITERATIONS, | 443 | .max_iterations = TRAIN_ITERATIONS, |
| 423 | .seed = 0, | 444 | .seed = 0, |
| 424 | .weight_init = nnWeightInitNormal, | 445 | .weight_init = nnWeightInitNormal, |
| 425 | .debug = true, | 446 | .debug = true, |
| 426 | }; | 447 | }; |
| 427 | nnTrain(net, &train_set.images, &train_set.labels, &training_params); | 448 | nnTrain(net, &train_set.images, &train_set.labels, &training_params); |
| 428 | 449 | ||
| 429 | // Test. | 450 | // Test. |
| 430 | int hits = 0; | 451 | int hits = 0; |
| 431 | query = nnMakeQueryObject(net, /*num_inputs=*/1); | 452 | query = nnMakeQueryObject(net, /*num_inputs=*/1); |
| 432 | for (int i = 0; i < test_set.count; ++i) { | 453 | for (int i = 0; i < test_set.count; ++i) { |
| 433 | const nnMatrix test_image = nnMatrixBorrowRows(&test_set.images, i, 1); | 454 | const nnMatrix test_image = nnMatrixBorrowRows(&test_set.images, i, 1); |
| 434 | const nnMatrix test_label = nnMatrixBorrowRows(&test_set.labels, i, 1); | 455 | const nnMatrix test_label = nnMatrixBorrowRows(&test_set.labels, i, 1); |
| @@ -444,7 +465,7 @@ int main(int argc, const char** argv) { | |||
| 444 | } | 465 | } |
| 445 | const R hit_ratio = (R)hits / (R)test_set.count; | 466 | const R hit_ratio = (R)hits / (R)test_set.count; |
| 446 | printf("Test images: %d\n", test_set.count); | 467 | printf("Test images: %d\n", test_set.count); |
| 447 | printf("Hits: %d/%d (%.3f%%)\n", hits, test_set.count, hit_ratio*100); | 468 | printf("Hits: %d/%d (%.3f%%)\n", hits, test_set.count, hit_ratio * 100); |
| 448 | 469 | ||
| 449 | success = true; | 470 | success = true; |
| 450 | 471 | ||
diff --git a/src/lib/include/neuralnet/matrix.h b/src/lib/include/neuralnet/matrix.h index b7281bf..f80b985 100644 --- a/src/lib/include/neuralnet/matrix.h +++ b/src/lib/include/neuralnet/matrix.h | |||
| @@ -17,6 +17,9 @@ nnMatrix nnMatrixMake(int rows, int cols); | |||
| 17 | /// Delete a matrix and free its internal memory. | 17 | /// Delete a matrix and free its internal memory. |
| 18 | void nnMatrixDel(nnMatrix*); | 18 | void nnMatrixDel(nnMatrix*); |
| 19 | 19 | ||
| 20 | /// Construct a matrix from an array of values. | ||
| 21 | nnMatrix nnMatrixFromArray(int rows, int cols, const R values[]); | ||
| 22 | |||
| 20 | /// Move a matrix. | 23 | /// Move a matrix. |
| 21 | /// | 24 | /// |
| 22 | /// |in| is an empty matrix after the move. | 25 | /// |in| is an empty matrix after the move. |
diff --git a/src/lib/include/neuralnet/neuralnet.h b/src/lib/include/neuralnet/neuralnet.h index 05c9406..f122c2a 100644 --- a/src/lib/include/neuralnet/neuralnet.h +++ b/src/lib/include/neuralnet/neuralnet.h | |||
| @@ -1,32 +1,45 @@ | |||
| 1 | #pragma once | 1 | #pragma once |
| 2 | 2 | ||
| 3 | #include <neuralnet/matrix.h> | ||
| 3 | #include <neuralnet/types.h> | 4 | #include <neuralnet/types.h> |
| 4 | 5 | ||
| 5 | typedef struct nnMatrix nnMatrix; | ||
| 6 | |||
| 7 | typedef struct nnNeuralNetwork nnNeuralNetwork; | 6 | typedef struct nnNeuralNetwork nnNeuralNetwork; |
| 8 | typedef struct nnQueryObject nnQueryObject; | 7 | typedef struct nnQueryObject nnQueryObject; |
| 9 | 8 | ||
| 10 | /// Neuron activation. | 9 | /// Linear layer parameters. |
| 11 | typedef enum nnActivation { | 10 | /// |
| 12 | nnIdentity, | 11 | /// Either one of the following must be set: |
| 12 | /// a) Training: input and output sizes. | ||
| 13 | /// b) Inference: weights + biases. | ||
| 14 | typedef struct nnLinearParams { | ||
| 15 | int input_size; | ||
| 16 | int output_size; | ||
| 17 | nnMatrix weights; | ||
| 18 | nnMatrix biases; | ||
| 19 | } nnLinearParams; | ||
| 20 | |||
| 21 | /// Layer type. | ||
| 22 | typedef enum nnLayerType { | ||
| 23 | nnLinear, | ||
| 13 | nnSigmoid, | 24 | nnSigmoid, |
| 14 | nnRelu, | 25 | nnRelu, |
| 15 | } nnActivation; | 26 | } nnLayerType; |
| 27 | |||
| 28 | /// Neural network layer. | ||
| 29 | typedef struct nnLayer { | ||
| 30 | nnLayerType type; | ||
| 31 | union { | ||
| 32 | nnLinearParams linear; | ||
| 33 | }; | ||
| 34 | } nnLayer; | ||
| 16 | 35 | ||
| 17 | /// Create a network. | 36 | /// Create a network. |
| 18 | nnNeuralNetwork* nnMakeNet( | 37 | nnNeuralNetwork* nnMakeNet( |
| 19 | int num_layers, const int* layer_sizes, const nnActivation* activations); | 38 | const nnLayer* layers, int num_layers, int input_size); |
| 20 | 39 | ||
| 21 | /// Delete the network and free its internal memory. | 40 | /// Delete the network and free its internal memory. |
| 22 | void nnDeleteNet(nnNeuralNetwork**); | 41 | void nnDeleteNet(nnNeuralNetwork**); |
| 23 | 42 | ||
| 24 | /// Set the network's weights. | ||
| 25 | void nnSetWeights(nnNeuralNetwork*, const R* weights); | ||
| 26 | |||
| 27 | /// Set the network's biases. | ||
| 28 | void nnSetBiases(nnNeuralNetwork*, const R* biases); | ||
| 29 | |||
| 30 | /// Query the network. | 43 | /// Query the network. |
| 31 | /// | 44 | /// |
| 32 | /// |input| is a matrix of inputs, one row per input and as many columns as the | 45 | /// |input| is a matrix of inputs, one row per input and as many columns as the |
| @@ -42,10 +55,10 @@ void nnQueryArray( | |||
| 42 | 55 | ||
| 43 | /// Create a query object. | 56 | /// Create a query object. |
| 44 | /// | 57 | /// |
| 45 | /// The query object holds all the internal memory required to query a network. | 58 | /// The query object holds all the internal memory required to query a network |
| 46 | /// Query objects allocate all memory up front so that network queries can run | 59 | /// with batches of the given size. Memory is allocated up front so that network |
| 47 | /// without additional memory allocation. | 60 | /// queries can run without additional memory allocation. |
| 48 | nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork*, int num_inputs); | 61 | nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork*, int batch_size); |
| 49 | 62 | ||
| 50 | /// Delete the query object and free its internal memory. | 63 | /// Delete the query object and free its internal memory. |
| 51 | void nnDeleteQueryObject(nnQueryObject**); | 64 | void nnDeleteQueryObject(nnQueryObject**); |
| @@ -60,7 +73,7 @@ int nnNetInputSize(const nnNeuralNetwork*); | |||
| 60 | int nnNetOutputSize(const nnNeuralNetwork*); | 73 | int nnNetOutputSize(const nnNeuralNetwork*); |
| 61 | 74 | ||
| 62 | /// Return the layer's input size. | 75 | /// Return the layer's input size. |
| 63 | int nnLayerInputSize(const nnMatrix* weights); | 76 | int nnLayerInputSize(const nnNeuralNetwork*, int layer); |
| 64 | 77 | ||
| 65 | /// Return the layer's output size. | 78 | /// Return the layer's output size. |
| 66 | int nnLayerOutputSize(const nnMatrix* weights); | 79 | int nnLayerOutputSize(const nnNeuralNetwork*, int layer); |
diff --git a/src/lib/src/activation.h b/src/lib/src/activation.h index b56a69e..4c8a9e4 100644 --- a/src/lib/src/activation.h +++ b/src/lib/src/activation.h | |||
| @@ -9,8 +9,8 @@ static inline R sigmoid(R x) { return 1. / (1. + exp(-x)); } | |||
| 9 | static inline R relu(R x) { return fmax(0, x); } | 9 | static inline R relu(R x) { return fmax(0, x); } |
| 10 | 10 | ||
| 11 | #define NN_MAP_ARRAY(f, in, out, size) \ | 11 | #define NN_MAP_ARRAY(f, in, out, size) \ |
| 12 | for (int i = 0; i < size; ++i) { \ | 12 | for (int ii = 0; ii < size; ++ii) { \ |
| 13 | out[i] = f(in[i]); \ | 13 | out[ii] = f(in[ii]); \ |
| 14 | } | 14 | } |
| 15 | 15 | ||
| 16 | #define sigmoid_array(in, out, size) NN_MAP_ARRAY(sigmoid, in, out, size) | 16 | #define sigmoid_array(in, out, size) NN_MAP_ARRAY(sigmoid, in, out, size) |
diff --git a/src/lib/src/matrix.c b/src/lib/src/matrix.c index d98c8bb..d5c3fcc 100644 --- a/src/lib/src/matrix.c +++ b/src/lib/src/matrix.c | |||
| @@ -26,6 +26,12 @@ void nnMatrixDel(nnMatrix* matrix) { | |||
| 26 | } | 26 | } |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | nnMatrix nnMatrixFromArray(int rows, int cols, const R values[]) { | ||
| 30 | nnMatrix m = nnMatrixMake(rows, cols); | ||
| 31 | nnMatrixInit(&m, values); | ||
| 32 | return m; | ||
| 33 | } | ||
| 34 | |||
| 29 | void nnMatrixMove(nnMatrix* in, nnMatrix* out) { | 35 | void nnMatrixMove(nnMatrix* in, nnMatrix* out) { |
| 30 | assert(in); | 36 | assert(in); |
| 31 | assert(out); | 37 | assert(out); |
diff --git a/src/lib/src/neuralnet.c b/src/lib/src/neuralnet.c index a5fc59b..4322b8c 100644 --- a/src/lib/src/neuralnet.c +++ b/src/lib/src/neuralnet.c | |||
| @@ -7,11 +7,65 @@ | |||
| 7 | #include <assert.h> | 7 | #include <assert.h> |
| 8 | #include <stdlib.h> | 8 | #include <stdlib.h> |
| 9 | 9 | ||
| 10 | static void MakeLayerImpl( | ||
| 11 | int prev_layer_output_size, const nnLayer* layer, nnLayerImpl* impl) { | ||
| 12 | impl->type = layer->type; | ||
| 13 | |||
| 14 | switch (layer->type) { | ||
| 15 | case nnLinear: { | ||
| 16 | const nnLinearParams* params = &layer->linear; | ||
| 17 | nnLinearImpl* linear = &impl->linear; | ||
| 18 | |||
| 19 | if ((params->input_size > 0) && (params->output_size > 0)) { | ||
| 20 | const int rows = params->input_size; | ||
| 21 | const int cols = params->output_size; | ||
| 22 | linear->weights = nnMatrixMake(rows, cols); | ||
| 23 | linear->biases = nnMatrixMake(1, cols); | ||
| 24 | linear->owned = true; | ||
| 25 | } else { | ||
| 26 | linear->weights = params->weights; | ||
| 27 | linear->biases = params->biases; | ||
| 28 | linear->owned = false; | ||
| 29 | } | ||
| 30 | |||
| 31 | impl->input_size = linear->weights.rows; | ||
| 32 | impl->output_size = linear->weights.cols; | ||
| 33 | |||
| 34 | break; | ||
| 35 | } | ||
| 36 | |||
| 37 | // Activation layers. | ||
| 38 | case nnRelu: | ||
| 39 | case nnSigmoid: | ||
| 40 | impl->input_size = prev_layer_output_size; | ||
| 41 | impl->output_size = prev_layer_output_size; | ||
| 42 | break; | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | static void DeleteLayer(nnLayerImpl* layer) { | ||
| 47 | switch (layer->type) { | ||
| 48 | case nnLinear: { | ||
| 49 | nnLinearImpl* linear = &layer->linear; | ||
| 50 | if (linear->owned) { | ||
| 51 | nnMatrixDel(&linear->weights); | ||
| 52 | nnMatrixDel(&linear->biases); | ||
| 53 | } | ||
| 54 | break; | ||
| 55 | } | ||
| 56 | |||
| 57 | // No parameters for these layers. | ||
| 58 | case nnRelu: | ||
| 59 | case nnSigmoid: | ||
| 60 | break; | ||
| 61 | } | ||
| 62 | } | ||
| 63 | |||
| 10 | nnNeuralNetwork* nnMakeNet( | 64 | nnNeuralNetwork* nnMakeNet( |
| 11 | int num_layers, const int* layer_sizes, const nnActivation* activations) { | 65 | const nnLayer* layers, int num_layers, int input_size) { |
| 66 | assert(layers); | ||
| 12 | assert(num_layers > 0); | 67 | assert(num_layers > 0); |
| 13 | assert(layer_sizes); | 68 | assert(input_size > 0); |
| 14 | assert(activations); | ||
| 15 | 69 | ||
| 16 | nnNeuralNetwork* net = calloc(1, sizeof(nnNeuralNetwork)); | 70 | nnNeuralNetwork* net = calloc(1, sizeof(nnNeuralNetwork)); |
| 17 | if (net == 0) { | 71 | if (net == 0) { |
| @@ -20,84 +74,38 @@ nnNeuralNetwork* nnMakeNet( | |||
| 20 | 74 | ||
| 21 | net->num_layers = num_layers; | 75 | net->num_layers = num_layers; |
| 22 | 76 | ||
| 23 | net->weights = calloc(num_layers, sizeof(nnMatrix)); | 77 | net->layers = calloc(num_layers, sizeof(nnLayerImpl)); |
| 24 | net->biases = calloc(num_layers, sizeof(nnMatrix)); | 78 | if (net->layers == 0) { |
| 25 | net->activations = calloc(num_layers, sizeof(nnActivation)); | ||
| 26 | if ((net->weights == 0) || (net->biases == 0) || (net->activations == 0)) { | ||
| 27 | nnDeleteNet(&net); | 79 | nnDeleteNet(&net); |
| 28 | return 0; | 80 | return 0; |
| 29 | } | 81 | } |
| 30 | 82 | ||
| 83 | int prev_layer_output_size = input_size; | ||
| 31 | for (int l = 0; l < num_layers; ++l) { | 84 | for (int l = 0; l < num_layers; ++l) { |
| 32 | // layer_sizes = { input layer size, first hidden layer size, ...} | 85 | MakeLayerImpl(prev_layer_output_size, &layers[l], &net->layers[l]); |
| 33 | const int layer_input_size = layer_sizes[l]; | 86 | prev_layer_output_size = net->layers[l].output_size; |
| 34 | const int layer_output_size = layer_sizes[l + 1]; | ||
| 35 | |||
| 36 | // We store the transpose of the weight matrix as written in textbooks. | ||
| 37 | // Our vectors are row vectors and the matrices row-major. | ||
| 38 | const int rows = layer_input_size; | ||
| 39 | const int cols = layer_output_size; | ||
| 40 | |||
| 41 | net->weights[l] = nnMatrixMake(rows, cols); | ||
| 42 | net->biases[l] = nnMatrixMake(1, cols); | ||
| 43 | net->activations[l] = activations[l]; | ||
| 44 | } | 87 | } |
| 45 | 88 | ||
| 46 | return net; | 89 | return net; |
| 47 | } | 90 | } |
| 48 | 91 | ||
| 49 | void nnDeleteNet(nnNeuralNetwork** net) { | 92 | void nnDeleteNet(nnNeuralNetwork** ppNet) { |
| 50 | if ((!net) || (!(*net))) { | 93 | if ((!ppNet) || (!(*ppNet))) { |
| 51 | return; | 94 | return; |
| 52 | } | 95 | } |
| 53 | if ((*net)->weights != 0) { | 96 | nnNeuralNetwork* net = *ppNet; |
| 54 | for (int l = 0; l < (*net)->num_layers; ++l) { | ||
| 55 | nnMatrixDel(&(*net)->weights[l]); | ||
| 56 | } | ||
| 57 | free((*net)->weights); | ||
| 58 | (*net)->weights = 0; | ||
| 59 | } | ||
| 60 | if ((*net)->biases != 0) { | ||
| 61 | for (int l = 0; l < (*net)->num_layers; ++l) { | ||
| 62 | nnMatrixDel(&(*net)->biases[l]); | ||
| 63 | } | ||
| 64 | free((*net)->biases); | ||
| 65 | (*net)->biases = 0; | ||
| 66 | } | ||
| 67 | if ((*net)->activations) { | ||
| 68 | free((*net)->activations); | ||
| 69 | (*net)->activations = 0; | ||
| 70 | } | ||
| 71 | free(*net); | ||
| 72 | *net = 0; | ||
| 73 | } | ||
| 74 | |||
| 75 | void nnSetWeights(nnNeuralNetwork* net, const R* weights) { | ||
| 76 | assert(net); | ||
| 77 | assert(weights); | ||
| 78 | 97 | ||
| 79 | for (int l = 0; l < net->num_layers; ++l) { | 98 | for (int l = 0; l < net->num_layers; ++l) { |
| 80 | nnMatrix* layer_weights = &net->weights[l]; | 99 | DeleteLayer(&net->layers[l]); |
| 81 | R* layer_values = layer_weights->values; | ||
| 82 | |||
| 83 | for (int j = 0; j < layer_weights->rows * layer_weights->cols; ++j) { | ||
| 84 | *layer_values++ = *weights++; | ||
| 85 | } | ||
| 86 | } | 100 | } |
| 87 | } | ||
| 88 | |||
| 89 | void nnSetBiases(nnNeuralNetwork* net, const R* biases) { | ||
| 90 | assert(net); | ||
| 91 | assert(biases); | ||
| 92 | |||
| 93 | for (int l = 0; l < net->num_layers; ++l) { | ||
| 94 | nnMatrix* layer_biases = &net->biases[l]; | ||
| 95 | R* layer_values = layer_biases->values; | ||
| 96 | 101 | ||
| 97 | for (int j = 0; j < layer_biases->rows * layer_biases->cols; ++j) { | 102 | if (net->layers) { |
| 98 | *layer_values++ = *biases++; | 103 | free(net->layers); |
| 99 | } | 104 | net->layers = 0; |
| 100 | } | 105 | } |
| 106 | |||
| 107 | free(net); | ||
| 108 | *ppNet = 0; | ||
| 101 | } | 109 | } |
| 102 | 110 | ||
| 103 | void nnQuery( | 111 | void nnQuery( |
| @@ -114,35 +122,40 @@ void nnQuery( | |||
| 114 | nnMatrix input_vector = nnMatrixBorrowRows((nnMatrix*)input, i, 1); | 122 | nnMatrix input_vector = nnMatrixBorrowRows((nnMatrix*)input, i, 1); |
| 115 | 123 | ||
| 116 | for (int l = 0; l < net->num_layers; ++l) { | 124 | for (int l = 0; l < net->num_layers; ++l) { |
| 117 | const nnMatrix* layer_weights = &net->weights[l]; | ||
| 118 | const nnMatrix* layer_biases = &net->biases[l]; | ||
| 119 | // Y^T = (W*X)^T = X^T*W^T | ||
| 120 | // | ||
| 121 | // TODO: If we had a row-row matrix multiplication, we could compute: | ||
| 122 | // Y^T = W ** X^T | ||
| 123 | // The row-row multiplication could be more cache-friendly. We just need | ||
| 124 | // to store W as is, without transposing. | ||
| 125 | // We could also rewrite the original Mul function to go row x row, | ||
| 126 | // decomposing the multiplication. Preserving the original meaning of Mul | ||
| 127 | // makes everything clearer. | ||
| 128 | nnMatrix output_vector = | 125 | nnMatrix output_vector = |
| 129 | nnMatrixBorrowRows(&query->layer_outputs[l], i, 1); | 126 | nnMatrixBorrowRows(&query->layer_outputs[l], i, 1); |
| 130 | nnMatrixMul(&input_vector, layer_weights, &output_vector); | ||
| 131 | nnMatrixAddRow(&output_vector, layer_biases, &output_vector); | ||
| 132 | 127 | ||
| 133 | switch (net->activations[l]) { | 128 | switch (net->layers[l].type) { |
| 134 | case nnIdentity: | 129 | case nnLinear: { |
| 135 | break; // Nothing to do for the identity function. | 130 | const nnLinearImpl* linear = &net->layers[l].linear; |
| 136 | case nnSigmoid: | 131 | const nnMatrix* layer_weights = &linear->weights; |
| 137 | sigmoid_array( | 132 | const nnMatrix* layer_biases = &linear->biases; |
| 138 | output_vector.values, output_vector.values, output_vector.cols); | 133 | |
| 134 | // Y^T = (W*X)^T = X^T*W^T | ||
| 135 | // | ||
| 136 | // TODO: If we had a row-row matrix multiplication, we could compute: | ||
| 137 | // Y^T = W ** X^T | ||
| 138 | // | ||
| 139 | // The row-row multiplication could be more cache-friendly. We just need | ||
| 140 | // to store W as is, without transposing. | ||
| 141 | // | ||
| 142 | // We could also rewrite the original Mul function to go row x row, | ||
| 143 | // decomposing the multiplication. Preserving the original meaning of | ||
| 144 | // Mul makes everything clearer. | ||
| 145 | nnMatrixMul(&input_vector, layer_weights, &output_vector); | ||
| 146 | nnMatrixAddRow(&output_vector, layer_biases, &output_vector); | ||
| 139 | break; | 147 | break; |
| 148 | } | ||
| 140 | case nnRelu: | 149 | case nnRelu: |
| 150 | assert(input_vector.cols == output_vector.cols); | ||
| 141 | relu_array( | 151 | relu_array( |
| 142 | output_vector.values, output_vector.values, output_vector.cols); | 152 | input_vector.values, output_vector.values, output_vector.cols); |
| 153 | break; | ||
| 154 | case nnSigmoid: | ||
| 155 | assert(input_vector.cols == output_vector.cols); | ||
| 156 | sigmoid_array( | ||
| 157 | input_vector.values, output_vector.values, output_vector.cols); | ||
| 143 | break; | 158 | break; |
| 144 | default: | ||
| 145 | assert(0); | ||
| 146 | } | 159 | } |
| 147 | 160 | ||
| 148 | input_vector = output_vector; // Borrow. | 161 | input_vector = output_vector; // Borrow. |
| @@ -159,15 +172,15 @@ void nnQueryArray( | |||
| 159 | assert(output); | 172 | assert(output); |
| 160 | assert(net->num_layers > 0); | 173 | assert(net->num_layers > 0); |
| 161 | 174 | ||
| 162 | nnMatrix input_vector = nnMatrixMake(net->weights[0].cols, 1); | 175 | nnMatrix input_vector = nnMatrixMake(1, nnNetInputSize(net)); |
| 163 | nnMatrixInit(&input_vector, input); | 176 | nnMatrixInit(&input_vector, input); |
| 164 | nnQuery(net, query, &input_vector); | 177 | nnQuery(net, query, &input_vector); |
| 165 | nnMatrixRowToArray(query->network_outputs, 0, output); | 178 | nnMatrixRowToArray(query->network_outputs, 0, output); |
| 166 | } | 179 | } |
| 167 | 180 | ||
| 168 | nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int num_inputs) { | 181 | nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int batch_size) { |
| 169 | assert(net); | 182 | assert(net); |
| 170 | assert(num_inputs > 0); | 183 | assert(batch_size > 0); |
| 171 | assert(net->num_layers > 0); | 184 | assert(net->num_layers > 0); |
| 172 | 185 | ||
| 173 | nnQueryObject* query = calloc(1, sizeof(nnQueryObject)); | 186 | nnQueryObject* query = calloc(1, sizeof(nnQueryObject)); |
| @@ -183,11 +196,12 @@ nnQueryObject* nnMakeQueryObject(const nnNeuralNetwork* net, int num_inputs) { | |||
| 183 | free(query); | 196 | free(query); |
| 184 | return 0; | 197 | return 0; |
| 185 | } | 198 | } |
| 199 | |||
| 186 | for (int l = 0; l < net->num_layers; ++l) { | 200 | for (int l = 0; l < net->num_layers; ++l) { |
| 187 | const nnMatrix* layer_weights = &net->weights[l]; | 201 | const int layer_output_size = nnLayerOutputSize(net, l); |
| 188 | const int layer_output_size = nnLayerOutputSize(layer_weights); | 202 | query->layer_outputs[l] = nnMatrixMake(batch_size, layer_output_size); |
| 189 | query->layer_outputs[l] = nnMatrixMake(num_inputs, layer_output_size); | ||
| 190 | } | 203 | } |
| 204 | |||
| 191 | query->network_outputs = &query->layer_outputs[net->num_layers - 1]; | 205 | query->network_outputs = &query->layer_outputs[net->num_layers - 1]; |
| 192 | 206 | ||
| 193 | return query; | 207 | return query; |
| @@ -213,23 +227,19 @@ const nnMatrix* nnNetOutputs(const nnQueryObject* query) { | |||
| 213 | } | 227 | } |
| 214 | 228 | ||
| 215 | int nnNetInputSize(const nnNeuralNetwork* net) { | 229 | int nnNetInputSize(const nnNeuralNetwork* net) { |
| 216 | assert(net); | 230 | return nnLayerInputSize(net, 0); |
| 217 | assert(net->num_layers > 0); | ||
| 218 | return net->weights[0].rows; | ||
| 219 | } | 231 | } |
| 220 | 232 | ||
| 221 | int nnNetOutputSize(const nnNeuralNetwork* net) { | 233 | int nnNetOutputSize(const nnNeuralNetwork* net) { |
| 222 | assert(net); | 234 | return nnLayerOutputSize(net, net->num_layers - 1); |
| 223 | assert(net->num_layers > 0); | ||
| 224 | return net->weights[net->num_layers - 1].cols; | ||
| 225 | } | 235 | } |
| 226 | 236 | ||
| 227 | int nnLayerInputSize(const nnMatrix* weights) { | 237 | int nnLayerInputSize(const nnNeuralNetwork* net, int layer) { |
| 228 | assert(weights); | 238 | assert(net); |
| 229 | return weights->rows; | 239 | return net->layers[layer].input_size; |
| 230 | } | 240 | } |
| 231 | 241 | ||
| 232 | int nnLayerOutputSize(const nnMatrix* weights) { | 242 | int nnLayerOutputSize(const nnNeuralNetwork* net, int layer) { |
| 233 | assert(weights); | 243 | assert(net); |
| 234 | return weights->cols; | 244 | return net->layers[layer].output_size; |
| 235 | } | 245 | } |
diff --git a/src/lib/src/neuralnet_impl.h b/src/lib/src/neuralnet_impl.h index f5a9c63..935c5ea 100644 --- a/src/lib/src/neuralnet_impl.h +++ b/src/lib/src/neuralnet_impl.h | |||
| @@ -2,22 +2,29 @@ | |||
| 2 | 2 | ||
| 3 | #include <neuralnet/matrix.h> | 3 | #include <neuralnet/matrix.h> |
| 4 | 4 | ||
| 5 | #include <stdbool.h> | ||
| 6 | |||
| 7 | /// Linear layer parameters. | ||
| 8 | typedef struct nnLinearImpl { | ||
| 9 | nnMatrix weights; | ||
| 10 | nnMatrix biases; | ||
| 11 | bool owned; /// Whether the library owns the weights and biases. | ||
| 12 | } nnLinearImpl; | ||
| 13 | |||
| 14 | /// Neural network layer. | ||
| 15 | typedef struct nnLayerImpl { | ||
| 16 | nnLayerType type; | ||
| 17 | int input_size; | ||
| 18 | int output_size; | ||
| 19 | union { | ||
| 20 | nnLinearImpl linear; | ||
| 21 | }; | ||
| 22 | } nnLayerImpl; | ||
| 23 | |||
| 5 | /// Neural network object. | 24 | /// Neural network object. |
| 6 | /// | ||
| 7 | /// We store the transposes of the weight matrices so that we can do forward | ||
| 8 | /// passes with a minimal amount of work. That is, if in paper we write: | ||
| 9 | /// | ||
| 10 | /// [w11 w21] | ||
| 11 | /// [w12 w22] | ||
| 12 | /// | ||
| 13 | /// then the weight matrix in memory is stored as the following array: | ||
| 14 | /// | ||
| 15 | /// w11 w12 w21 w22 | ||
| 16 | typedef struct nnNeuralNetwork { | 25 | typedef struct nnNeuralNetwork { |
| 17 | int num_layers; // Number of non-input layers (hidden + output). | 26 | int num_layers; // Number of non-input layers (hidden + output). |
| 18 | nnMatrix* weights; // One matrix per non-input layer. | 27 | nnLayerImpl* layers; // One per non-input layer. |
| 19 | nnMatrix* biases; // One vector per non-input layer. | ||
| 20 | nnActivation* activations; // One per non-input layer. | ||
| 21 | } nnNeuralNetwork; | 28 | } nnNeuralNetwork; |
| 22 | 29 | ||
| 23 | /// A query object that holds all the memory necessary to query a network. | 30 | /// A query object that holds all the memory necessary to query a network. |
diff --git a/src/lib/src/train.c b/src/lib/src/train.c index dc93f0f..98f58ad 100644 --- a/src/lib/src/train.c +++ b/src/lib/src/train.c | |||
| @@ -38,7 +38,7 @@ typedef struct nnSigmoidGradientElements { | |||
| 38 | /// each layer. A data type is defined for these because we allocate all the | 38 | /// each layer. A data type is defined for these because we allocate all the |
| 39 | /// required memory up front before entering the training loop. | 39 | /// required memory up front before entering the training loop. |
| 40 | typedef struct nnGradientElements { | 40 | typedef struct nnGradientElements { |
| 41 | nnActivation type; | 41 | nnLayerType type; |
| 42 | // Gradient vector, same size as the layer. | 42 | // Gradient vector, same size as the layer. |
| 43 | // This will contain the gradient expression except for the output value of | 43 | // This will contain the gradient expression except for the output value of |
| 44 | // the previous layer. | 44 | // the previous layer. |
| @@ -57,10 +57,27 @@ void nnInitNet( | |||
| 57 | mt19937_64_init(&rng, seed); | 57 | mt19937_64_init(&rng, seed); |
| 58 | 58 | ||
| 59 | for (int l = 0; l < net->num_layers; ++l) { | 59 | for (int l = 0; l < net->num_layers; ++l) { |
| 60 | nnMatrix* weights = &net->weights[l]; | 60 | // Get the layer's weights and biases, if any. |
| 61 | nnMatrix* biases = &net->biases[l]; | 61 | nnMatrix* weights = 0; |
| 62 | nnMatrix* biases = 0; | ||
| 63 | switch (net->layers[l].type) { | ||
| 64 | case nnLinear: { | ||
| 65 | nnLinearImpl* linear = &net->layers[l].linear; | ||
| 66 | |||
| 67 | weights = &linear->weights; | ||
| 68 | biases = &linear->biases; | ||
| 69 | break; | ||
| 70 | } | ||
| 71 | // Activations. | ||
| 72 | case nnRelu: | ||
| 73 | case nnSigmoid: | ||
| 74 | break; | ||
| 75 | } | ||
| 76 | if (!weights || !biases) { | ||
| 77 | continue; | ||
| 78 | } | ||
| 62 | 79 | ||
| 63 | const R layer_size = (R)nnLayerInputSize(weights); | 80 | const R layer_size = (R)nnLayerInputSize(net, l); |
| 64 | const R scale = 1. / layer_size; | 81 | const R scale = 1. / layer_size; |
| 65 | const R stdev = 1. / sqrt((R)layer_size); | 82 | const R stdev = 1. / sqrt((R)layer_size); |
| 66 | const R sigma = stdev * stdev; | 83 | const R sigma = stdev * stdev; |
| @@ -128,9 +145,6 @@ void nnTrain( | |||
| 128 | // with one sample at a time. | 145 | // with one sample at a time. |
| 129 | nnMatrix* errors = calloc(net->num_layers, sizeof(nnMatrix)); | 146 | nnMatrix* errors = calloc(net->num_layers, sizeof(nnMatrix)); |
| 130 | 147 | ||
| 131 | // Allocate the weight transpose matrices up front for backpropagation. | ||
| 132 | // nnMatrix* weights_T = calloc(net->num_layers, sizeof(nnMatrix)); | ||
| 133 | |||
| 134 | // Allocate the weight delta matrices. | 148 | // Allocate the weight delta matrices. |
| 135 | nnMatrix* weight_deltas = calloc(net->num_layers, sizeof(nnMatrix)); | 149 | nnMatrix* weight_deltas = calloc(net->num_layers, sizeof(nnMatrix)); |
| 136 | 150 | ||
| @@ -144,30 +158,24 @@ void nnTrain( | |||
| 144 | nnMatrix* outputs_T = calloc(net->num_layers, sizeof(nnMatrix)); | 158 | nnMatrix* outputs_T = calloc(net->num_layers, sizeof(nnMatrix)); |
| 145 | 159 | ||
| 146 | assert(errors != 0); | 160 | assert(errors != 0); |
| 147 | // assert(weights_T != 0); | ||
| 148 | assert(weight_deltas != 0); | 161 | assert(weight_deltas != 0); |
| 149 | assert(gradient_elems); | 162 | assert(gradient_elems); |
| 150 | assert(outputs_T); | 163 | assert(outputs_T); |
| 151 | 164 | ||
| 152 | for (int l = 0; l < net->num_layers; ++l) { | 165 | for (int l = 0; l < net->num_layers; ++l) { |
| 153 | const nnMatrix* layer_weights = &net->weights[l]; | 166 | const int layer_input_size = nnLayerInputSize(net, l); |
| 154 | const int layer_output_size = net->weights[l].cols; | 167 | const int layer_output_size = nnLayerOutputSize(net, l); |
| 155 | const nnActivation activation = net->activations[l]; | 168 | const nnLayerImpl* layer = &net->layers[l]; |
| 156 | |||
| 157 | errors[l] = nnMatrixMake(1, layer_weights->cols); | ||
| 158 | |||
| 159 | // weights_T[l] = nnMatrixMake(layer_weights->cols, layer_weights->rows); | ||
| 160 | // nnMatrixTranspose(layer_weights, &weights_T[l]); | ||
| 161 | |||
| 162 | weight_deltas[l] = nnMatrixMake(layer_weights->rows, layer_weights->cols); | ||
| 163 | 169 | ||
| 164 | outputs_T[l] = nnMatrixMake(layer_output_size, 1); | 170 | errors[l] = nnMatrixMake(1, layer_output_size); |
| 171 | weight_deltas[l] = nnMatrixMake(layer_input_size, layer_output_size); | ||
| 172 | outputs_T[l] = nnMatrixMake(layer_output_size, 1); | ||
| 165 | 173 | ||
| 166 | // Allocate the gradient elements and vectors for weight delta calculation. | 174 | // Allocate the gradient elements and vectors for weight delta calculation. |
| 167 | nnGradientElements* elems = &gradient_elems[l]; | 175 | nnGradientElements* elems = &gradient_elems[l]; |
| 168 | elems->type = activation; | 176 | elems->type = layer->type; |
| 169 | switch (activation) { | 177 | switch (layer->type) { |
| 170 | case nnIdentity: | 178 | case nnLinear: |
| 171 | break; // Gradient vector will be borrowed, no need to allocate. | 179 | break; // Gradient vector will be borrowed, no need to allocate. |
| 172 | 180 | ||
| 173 | case nnSigmoid: | 181 | case nnSigmoid: |
| @@ -208,6 +216,7 @@ void nnTrain( | |||
| 208 | 216 | ||
| 209 | // For now, we train with one sample at a time. | 217 | // For now, we train with one sample at a time. |
| 210 | for (int sample = 0; sample < inputs->rows; ++sample) { | 218 | for (int sample = 0; sample < inputs->rows; ++sample) { |
| 219 | // TODO: Introduce a BorrowMut. | ||
| 211 | // Slice the input and target matrices with the batch size. | 220 | // Slice the input and target matrices with the batch size. |
| 212 | // We are not mutating the inputs, but we need the cast to borrow. | 221 | // We are not mutating the inputs, but we need the cast to borrow. |
| 213 | nnMatrix training_inputs = | 222 | nnMatrix training_inputs = |
| @@ -219,15 +228,16 @@ void nnTrain( | |||
| 219 | // Assuming one training input per iteration for now. | 228 | // Assuming one training input per iteration for now. |
| 220 | nnMatrixTranspose(&training_inputs, &training_inputs_T); | 229 | nnMatrixTranspose(&training_inputs, &training_inputs_T); |
| 221 | 230 | ||
| 222 | // Run a forward pass and compute the output layer error relevant to the | 231 | // Forward pass. |
| 223 | // derivative: o-t. | 232 | nnQuery(net, query, &training_inputs); |
| 224 | // Error: (t-o)^2 | 233 | |
| 225 | // dE/do = -2(t-o) | 234 | // Compute the error derivative: o-t. |
| 226 | // = +2(o-t) | 235 | // Error: 1/2 (t-o)^2 |
| 236 | // dE/do = -(t-o) | ||
| 237 | // = +(o-t) | ||
| 227 | // Note that we compute o-t instead to remove that outer negative sign. | 238 | // Note that we compute o-t instead to remove that outer negative sign. |
| 228 | // The 2 is dropped because we are only interested in the direction of the | 239 | // The 2 is dropped because we are only interested in the direction of the |
| 229 | // gradient. The learning rate controls the magnitude. | 240 | // gradient. The learning rate controls the magnitude. |
| 230 | nnQuery(net, query, &training_inputs); | ||
| 231 | nnMatrixSub( | 241 | nnMatrixSub( |
| 232 | training_outputs, &training_targets, &errors[net->num_layers - 1]); | 242 | training_outputs, &training_targets, &errors[net->num_layers - 1]); |
| 233 | 243 | ||
| @@ -236,68 +246,86 @@ void nnTrain( | |||
| 236 | nnMatrixTranspose(&query->layer_outputs[l], &outputs_T[l]); | 246 | nnMatrixTranspose(&query->layer_outputs[l], &outputs_T[l]); |
| 237 | } | 247 | } |
| 238 | 248 | ||
| 239 | // Update weights and biases for each internal layer, backpropagating | 249 | // Update weights and biases for each internal layer, back-propagating |
| 240 | // errors along the way. | 250 | // errors along the way. |
| 241 | for (int l = net->num_layers - 1; l >= 0; --l) { | 251 | for (int l = net->num_layers - 1; l >= 0; --l) { |
| 242 | const nnMatrix* layer_output = &query->layer_outputs[l]; | 252 | const nnMatrix* layer_output = &query->layer_outputs[l]; |
| 243 | nnMatrix* layer_weights = &net->weights[l]; | 253 | nnGradientElements* elems = &gradient_elems[l]; |
| 244 | nnMatrix* layer_biases = &net->biases[l]; | 254 | nnMatrix* gradient = &elems->gradient; |
| 245 | nnGradientElements* elems = &gradient_elems[l]; | 255 | nnLayerImpl* layer = &net->layers[l]; |
| 246 | nnMatrix* gradient = &elems->gradient; | 256 | |
| 247 | const nnActivation activation = net->activations[l]; | 257 | // Compute this layer's gradient. |
| 248 | 258 | // | |
| 249 | // Compute the gradient (the part of the expression that does not | 259 | // By "gradient" we mean the expression common to the weights and bias |
| 250 | // contain the output of the previous layer). | 260 | // gradients. This is the part of the expression that does not contain |
| 261 | // this layer's input. | ||
| 251 | // | 262 | // |
| 252 | // Identity: G = error_k | 263 | // Linear: G = id |
| 253 | // Sigmoid: G = error_k * output_k * (1 - output_k). | 264 | // Relu: G = (output_k > 0 ? 1 : 0) |
| 254 | // Relu: G = error_k * (output_k > 0 ? 1 : 0) | 265 | // Sigmoid: G = output_k * (1 - output_k) |
| 255 | switch (activation) { | 266 | switch (layer->type) { |
| 256 | case nnIdentity: | 267 | case nnLinear: { |
| 257 | // TODO: Just copy the pointer? | 268 | // TODO: Just copy the pointer? |
| 258 | *gradient = nnMatrixBorrow(&errors[l]); | 269 | *gradient = nnMatrixBorrow(&errors[l]); |
| 259 | break; | 270 | break; |
| 271 | } | ||
| 272 | case nnRelu: | ||
| 273 | nnMatrixGt(layer_output, 0, gradient); | ||
| 274 | break; | ||
| 260 | case nnSigmoid: | 275 | case nnSigmoid: |
| 261 | nnMatrixSub(&elems->sigmoid.ones, layer_output, gradient); | 276 | nnMatrixSub(&elems->sigmoid.ones, layer_output, gradient); |
| 262 | nnMatrixMulPairs(layer_output, gradient, gradient); | 277 | nnMatrixMulPairs(layer_output, gradient, gradient); |
| 263 | nnMatrixMulPairs(&errors[l], gradient, gradient); | ||
| 264 | break; | ||
| 265 | case nnRelu: | ||
| 266 | nnMatrixGt(layer_output, 0, gradient); | ||
| 267 | nnMatrixMulPairs(&errors[l], gradient, gradient); | ||
| 268 | break; | 278 | break; |
| 269 | } | 279 | } |
| 270 | 280 | ||
| 271 | // Outer product to compute the weight deltas. | 281 | // Back-propagate the error. |
| 272 | const nnMatrix* output_T = | 282 | // |
| 273 | (l == 0) ? &training_inputs_T : &outputs_T[l - 1]; | 283 | // This combines this layer's gradient with the back-propagated error, |
| 274 | nnMatrixMul(output_T, gradient, &weight_deltas[l]); | 284 | // which is the combination of the gradients of subsequent layers down |
| 275 | 285 | // to the output layer error. | |
| 276 | // Backpropagate the error before updating weights. | 286 | // |
| 287 | // Note that this step uses the layer's original weights. | ||
| 277 | if (l > 0) { | 288 | if (l > 0) { |
| 278 | // G * W^T == G *^T W. | 289 | switch (layer->type) { |
| 279 | // nnMatrixMul(gradient, &weights_T[l], &errors[l-1]); | 290 | case nnLinear: { |
| 280 | nnMatrixMulRows(gradient, layer_weights, &errors[l - 1]); | 291 | const nnMatrix* layer_weights = &layer->linear.weights; |
| 292 | // E * W^T == E *^T W. | ||
| 293 | // Using nnMatrixMulRows, we avoid having to transpose the weight | ||
| 294 | // matrix. | ||
| 295 | nnMatrixMulRows(&errors[l], layer_weights, &errors[l - 1]); | ||
| 296 | break; | ||
| 297 | } | ||
| 298 | // For activations, the error back-propagates as is but multiplied by | ||
| 299 | // the layer's gradient. | ||
| 300 | case nnRelu: | ||
| 301 | case nnSigmoid: | ||
| 302 | nnMatrixMulPairs(&errors[l], gradient, &errors[l - 1]); | ||
| 303 | break; | ||
| 304 | } | ||
| 281 | } | 305 | } |
| 282 | 306 | ||
| 283 | // Update weights. | 307 | // Update layer weights. |
| 284 | nnMatrixScale(&weight_deltas[l], params->learning_rate); | 308 | if (layer->type == nnLinear) { |
| 285 | // The gradient has a negative sign from -(t - o), but we have computed | 309 | nnLinearImpl* linear = &layer->linear; |
| 286 | // e = o - t instead, so we can subtract directly. | 310 | nnMatrix* layer_weights = &linear->weights; |
| 287 | // nnMatrixAdd(layer_weights, &weight_deltas[l], layer_weights); | 311 | nnMatrix* layer_biases = &linear->biases; |
| 288 | nnMatrixSub(layer_weights, &weight_deltas[l], layer_weights); | 312 | |
| 289 | 313 | // Outer product to compute the weight deltas. | |
| 290 | // Update weight transpose matrix for the next training iteration. | 314 | // This layer's input is the previous layer's output. |
| 291 | // nnMatrixTranspose(layer_weights, &weights_T[l]); | 315 | const nnMatrix* input_T = |
| 292 | 316 | (l == 0) ? &training_inputs_T : &outputs_T[l - 1]; | |
| 293 | // Update biases. | 317 | nnMatrixMul(input_T, gradient, &weight_deltas[l]); |
| 294 | // This is the same formula as for weights, except that the o_j term is | 318 | |
| 295 | // just 1. We can simply re-use the gradient that we have already | 319 | // Update weights. |
| 296 | // computed for the weight update. | 320 | nnMatrixScale(&weight_deltas[l], params->learning_rate); |
| 297 | // nnMatrixMulAdd(layer_biases, gradient, params->learning_rate, | 321 | nnMatrixSub(layer_weights, &weight_deltas[l], layer_weights); |
| 298 | // layer_biases); | 322 | |
| 299 | nnMatrixMulSub( | 323 | // Update biases. |
| 300 | layer_biases, gradient, params->learning_rate, layer_biases); | 324 | // This is the same formula as for weights, except that the o_j term |
| 325 | // is just 1. | ||
| 326 | nnMatrixMulSub( | ||
| 327 | layer_biases, gradient, params->learning_rate, layer_biases); | ||
| 328 | } | ||
| 301 | } | 329 | } |
| 302 | 330 | ||
| 303 | // TODO: Add this under a verbose debugging mode. | 331 | // TODO: Add this under a verbose debugging mode. |
| @@ -334,12 +362,11 @@ void nnTrain( | |||
| 334 | for (int l = 0; l < net->num_layers; ++l) { | 362 | for (int l = 0; l < net->num_layers; ++l) { |
| 335 | nnMatrixDel(&errors[l]); | 363 | nnMatrixDel(&errors[l]); |
| 336 | nnMatrixDel(&outputs_T[l]); | 364 | nnMatrixDel(&outputs_T[l]); |
| 337 | // nnMatrixDel(&weights_T[l]); | ||
| 338 | nnMatrixDel(&weight_deltas[l]); | 365 | nnMatrixDel(&weight_deltas[l]); |
| 339 | 366 | ||
| 340 | nnGradientElements* elems = &gradient_elems[l]; | 367 | nnGradientElements* elems = &gradient_elems[l]; |
| 341 | switch (elems->type) { | 368 | switch (elems->type) { |
| 342 | case nnIdentity: | 369 | case nnLinear: |
| 343 | break; // Gradient vector is borrowed, no need to deallocate. | 370 | break; // Gradient vector is borrowed, no need to deallocate. |
| 344 | 371 | ||
| 345 | case nnSigmoid: | 372 | case nnSigmoid: |
| @@ -355,7 +382,6 @@ void nnTrain( | |||
| 355 | nnMatrixDel(&training_inputs_T); | 382 | nnMatrixDel(&training_inputs_T); |
| 356 | free(errors); | 383 | free(errors); |
| 357 | free(outputs_T); | 384 | free(outputs_T); |
| 358 | // free(weights_T); | ||
| 359 | free(weight_deltas); | 385 | free(weight_deltas); |
| 360 | free(gradient_elems); | 386 | free(gradient_elems); |
| 361 | } | 387 | } |
diff --git a/src/lib/test/neuralnet_test.c b/src/lib/test/neuralnet_test.c index 14d9438..0f8d7b8 100644 --- a/src/lib/test/neuralnet_test.c +++ b/src/lib/test/neuralnet_test.c | |||
| @@ -1,8 +1,8 @@ | |||
| 1 | #include <neuralnet/neuralnet.h> | 1 | #include <neuralnet/neuralnet.h> |
| 2 | 2 | ||
| 3 | #include <neuralnet/matrix.h> | ||
| 4 | #include "activation.h" | 3 | #include "activation.h" |
| 5 | #include "neuralnet_impl.h" | 4 | #include "neuralnet_impl.h" |
| 5 | #include <neuralnet/matrix.h> | ||
| 6 | 6 | ||
| 7 | #include "test.h" | 7 | #include "test.h" |
| 8 | #include "test_util.h" | 8 | #include "test_util.h" |
| @@ -10,23 +10,31 @@ | |||
| 10 | #include <assert.h> | 10 | #include <assert.h> |
| 11 | 11 | ||
| 12 | TEST_CASE(neuralnet_perceptron_test) { | 12 | TEST_CASE(neuralnet_perceptron_test) { |
| 13 | const int num_layers = 1; | 13 | const int num_layers = 2; |
| 14 | const int layer_sizes[] = { 1, 1 }; | 14 | const int input_size = 1; |
| 15 | const nnActivation layer_activations[] = { nnSigmoid }; | 15 | const R weights[] = {0.3}; |
| 16 | const R weights[] = { 0.3 }; | 16 | const R biases[] = {0.0}; |
| 17 | const nnLayer layers[] = { | ||
| 18 | {.type = nnLinear, | ||
| 19 | .linear = | ||
| 20 | {.weights = nnMatrixFromArray(1, 1, weights), | ||
| 21 | .biases = nnMatrixFromArray(1, 1, biases)}}, | ||
| 22 | {.type = nnSigmoid}, | ||
| 23 | }; | ||
| 17 | 24 | ||
| 18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | 25 | nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); |
| 19 | assert(net); | 26 | assert(net); |
| 20 | nnSetWeights(net, weights); | ||
| 21 | 27 | ||
| 22 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); | 28 | nnQueryObject* query = nnMakeQueryObject(net, 1); |
| 23 | 29 | ||
| 24 | const R input[] = { 0.9 }; | 30 | const R input[] = {0.9}; |
| 25 | R output[1]; | 31 | R output[1]; |
| 26 | nnQueryArray(net, query, input, output); | 32 | nnQueryArray(net, query, input, output); |
| 27 | 33 | ||
| 28 | const R expected_output = sigmoid(input[0] * weights[0]); | 34 | const R expected_output = sigmoid(input[0] * weights[0]); |
| 29 | printf("\nOutput: %f, Expected: %f\n", output[0], expected_output); | 35 | printf( |
| 36 | "\n[neuralnet_perceptron_test] Output: %f, Expected: %f\n", output[0], | ||
| 37 | expected_output); | ||
| 30 | TEST_TRUE(double_eq(output[0], expected_output, EPS)); | 38 | TEST_TRUE(double_eq(output[0], expected_output, EPS)); |
| 31 | 39 | ||
| 32 | nnDeleteQueryObject(&query); | 40 | nnDeleteQueryObject(&query); |
| @@ -34,53 +42,66 @@ TEST_CASE(neuralnet_perceptron_test) { | |||
| 34 | } | 42 | } |
| 35 | 43 | ||
| 36 | TEST_CASE(neuralnet_xor_test) { | 44 | TEST_CASE(neuralnet_xor_test) { |
| 37 | const int num_layers = 2; | 45 | // First (hidden) layer. |
| 38 | const int layer_sizes[] = { 2, 2, 1 }; | 46 | const R weights0[] = {1, 1, 1, 1}; |
| 39 | const nnActivation layer_activations[] = { nnRelu, nnIdentity }; | 47 | const R biases0[] = {0, -1}; |
| 40 | const R weights[] = { | 48 | // Second (output) layer. |
| 41 | 1, 1, 1, 1, // First (hidden) layer. | 49 | const R weights1[] = {1, -2}; |
| 42 | 1, -2 // Second (output) layer. | 50 | const R biases1[] = {0}; |
| 43 | }; | 51 | // Network. |
| 44 | const R biases[] = { | 52 | const int num_layers = 3; |
| 45 | 0, -1, // First (hidden) layer. | 53 | const int input_size = 2; |
| 46 | 0 // Second (output) layer. | 54 | const nnLayer layers[] = { |
| 55 | {.type = nnLinear, | ||
| 56 | .linear = | ||
| 57 | {.weights = nnMatrixFromArray(2, 2, weights0), | ||
| 58 | .biases = nnMatrixFromArray(1, 2, biases0)}}, | ||
| 59 | {.type = nnRelu}, | ||
| 60 | {.type = nnLinear, | ||
| 61 | .linear = | ||
| 62 | {.weights = nnMatrixFromArray(2, 1, weights1), | ||
| 63 | .biases = nnMatrixFromArray(1, 1, biases1)}}, | ||
| 47 | }; | 64 | }; |
| 48 | 65 | ||
| 49 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | 66 | nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); |
| 50 | assert(net); | 67 | assert(net); |
| 51 | nnSetWeights(net, weights); | ||
| 52 | nnSetBiases(net, biases); | ||
| 53 | 68 | ||
| 54 | // First layer weights. | 69 | // First layer weights. |
| 55 | TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 0), 1); | 70 | TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.weights, 0, 0), 1); |
| 56 | TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 1), 1); | 71 | TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.weights, 0, 1), 1); |
| 57 | TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 2), 1); | 72 | TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.weights, 0, 2), 1); |
| 58 | TEST_EQUAL(nnMatrixAt(&net->weights[0], 0, 3), 1); | 73 | TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.weights, 0, 3), 1); |
| 59 | // Second layer weights. | 74 | // Second linear layer (third layer) weights. |
| 60 | TEST_EQUAL(nnMatrixAt(&net->weights[1], 0, 0), 1); | 75 | TEST_EQUAL(nnMatrixAt(&net->layers[2].linear.weights, 0, 0), 1); |
| 61 | TEST_EQUAL(nnMatrixAt(&net->weights[1], 0, 1), -2); | 76 | TEST_EQUAL(nnMatrixAt(&net->layers[2].linear.weights, 0, 1), -2); |
| 62 | // First layer biases. | 77 | // First layer biases. |
| 63 | TEST_EQUAL(nnMatrixAt(&net->biases[0], 0, 0), 0); | 78 | TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.biases, 0, 0), 0); |
| 64 | TEST_EQUAL(nnMatrixAt(&net->biases[0], 0, 1), -1); | 79 | TEST_EQUAL(nnMatrixAt(&net->layers[0].linear.biases, 0, 1), -1); |
| 65 | // Second layer biases. | 80 | // Second linear layer (third layer) biases. |
| 66 | TEST_EQUAL(nnMatrixAt(&net->biases[1], 0, 0), 0); | 81 | TEST_EQUAL(nnMatrixAt(&net->layers[2].linear.biases, 0, 0), 0); |
| 67 | 82 | ||
| 68 | // Test. | 83 | // Test. |
| 69 | 84 | ||
| 70 | #define M 4 | 85 | #define M 4 |
| 71 | 86 | ||
| 72 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/M); | 87 | nnQueryObject* query = nnMakeQueryObject(net, M); |
| 73 | 88 | ||
| 74 | const R test_inputs[M][2] = { { 0., 0. }, { 1., 0. }, { 0., 1. }, { 1., 1. } }; | 89 | const R test_inputs[M][2] = { |
| 90 | {0., 0.}, | ||
| 91 | {1., 0.}, | ||
| 92 | {0., 1.}, | ||
| 93 | {1., 1.} | ||
| 94 | }; | ||
| 75 | nnMatrix test_inputs_matrix = nnMatrixMake(M, 2); | 95 | nnMatrix test_inputs_matrix = nnMatrixMake(M, 2); |
| 76 | nnMatrixInit(&test_inputs_matrix, (const R*)test_inputs); | 96 | nnMatrixInit(&test_inputs_matrix, (const R*)test_inputs); |
| 77 | nnQuery(net, query, &test_inputs_matrix); | 97 | nnQuery(net, query, &test_inputs_matrix); |
| 78 | 98 | ||
| 79 | const R expected_outputs[M] = { 0., 1., 1., 0. }; | 99 | const R expected_outputs[M] = {0., 1., 1., 0.}; |
| 80 | for (int i = 0; i < M; ++i) { | 100 | for (int i = 0; i < M; ++i) { |
| 81 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); | 101 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); |
| 82 | printf("\nInput: (%f, %f), Output: %f, Expected: %f\n", | 102 | printf( |
| 83 | test_inputs[i][0], test_inputs[i][1], test_output, expected_outputs[i]); | 103 | "\nInput: (%f, %f), Output: %f, Expected: %f\n", test_inputs[i][0], |
| 104 | test_inputs[i][1], test_output, expected_outputs[i]); | ||
| 84 | } | 105 | } |
| 85 | for (int i = 0; i < M; ++i) { | 106 | for (int i = 0; i < M; ++i) { |
| 86 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); | 107 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); |
diff --git a/src/lib/test/train_linear_perceptron_non_origin_test.c b/src/lib/test/train_linear_perceptron_non_origin_test.c index 5a320ac..40a42e0 100644 --- a/src/lib/test/train_linear_perceptron_non_origin_test.c +++ b/src/lib/test/train_linear_perceptron_non_origin_test.c | |||
| @@ -1,9 +1,8 @@ | |||
| 1 | #include <neuralnet/train.h> | 1 | #include <neuralnet/train.h> |
| 2 | 2 | ||
| 3 | #include "neuralnet_impl.h" | ||
| 3 | #include <neuralnet/matrix.h> | 4 | #include <neuralnet/matrix.h> |
| 4 | #include <neuralnet/neuralnet.h> | 5 | #include <neuralnet/neuralnet.h> |
| 5 | #include "activation.h" | ||
| 6 | #include "neuralnet_impl.h" | ||
| 7 | 6 | ||
| 8 | #include "test.h" | 7 | #include "test.h" |
| 9 | #include "test_util.h" | 8 | #include "test_util.h" |
| @@ -11,19 +10,21 @@ | |||
| 11 | #include <assert.h> | 10 | #include <assert.h> |
| 12 | 11 | ||
| 13 | TEST_CASE(neuralnet_train_linear_perceptron_non_origin_test) { | 12 | TEST_CASE(neuralnet_train_linear_perceptron_non_origin_test) { |
| 14 | const int num_layers = 1; | 13 | const int num_layers = 1; |
| 15 | const int layer_sizes[] = { 1, 1 }; | 14 | const int input_size = 1; |
| 16 | const nnActivation layer_activations[] = { nnIdentity }; | 15 | const nnLayer layers[] = { |
| 16 | {.type = nnLinear, .linear = {.input_size = 1, .output_size = 1}} | ||
| 17 | }; | ||
| 17 | 18 | ||
| 18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | 19 | nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); |
| 19 | assert(net); | 20 | assert(net); |
| 20 | 21 | ||
| 21 | // Train. | 22 | // Train. |
| 22 | 23 | ||
| 23 | // Try to learn the Y = 2X + 1 line. | 24 | // Try to learn the Y = 2X + 1 line. |
| 24 | #define N 2 | 25 | #define N 2 |
| 25 | const R inputs[N] = { 0., 1. }; | 26 | const R inputs[N] = {0., 1.}; |
| 26 | const R targets[N] = { 1., 3. }; | 27 | const R targets[N] = {1., 3.}; |
| 27 | 28 | ||
| 28 | nnMatrix inputs_matrix = nnMatrixMake(N, 1); | 29 | nnMatrix inputs_matrix = nnMatrixMake(N, 1); |
| 29 | nnMatrix targets_matrix = nnMatrixMake(N, 1); | 30 | nnMatrix targets_matrix = nnMatrixMake(N, 1); |
| @@ -31,31 +32,32 @@ TEST_CASE(neuralnet_train_linear_perceptron_non_origin_test) { | |||
| 31 | nnMatrixInit(&targets_matrix, targets); | 32 | nnMatrixInit(&targets_matrix, targets); |
| 32 | 33 | ||
| 33 | nnTrainingParams params = { | 34 | nnTrainingParams params = { |
| 34 | .learning_rate = 0.7, | 35 | .learning_rate = 0.7, |
| 35 | .max_iterations = 20, | 36 | .max_iterations = 20, |
| 36 | .seed = 0, | 37 | .seed = 0, |
| 37 | .weight_init = nnWeightInit01, | 38 | .weight_init = nnWeightInit01, |
| 38 | .debug = false, | 39 | .debug = false, |
| 39 | }; | 40 | }; |
| 40 | 41 | ||
| 41 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); | 42 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); |
| 42 | 43 | ||
| 43 | const R weight = nnMatrixAt(&net->weights[0], 0, 0); | 44 | const R weight = nnMatrixAt(&net->layers[0].linear.weights, 0, 0); |
| 44 | const R expected_weight = 2.0; | 45 | const R expected_weight = 2.0; |
| 45 | printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | 46 | printf( |
| 47 | "\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | ||
| 46 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); | 48 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); |
| 47 | 49 | ||
| 48 | const R bias = nnMatrixAt(&net->biases[0], 0, 0); | 50 | const R bias = nnMatrixAt(&net->layers[0].linear.biases, 0, 0); |
| 49 | const R expected_bias = 1.0; | 51 | const R expected_bias = 1.0; |
| 50 | printf("Trained network bias: %f, Expected: %f\n", bias, expected_bias); | 52 | printf("Trained network bias: %f, Expected: %f\n", bias, expected_bias); |
| 51 | TEST_TRUE(double_eq(bias, expected_bias, WEIGHT_EPS)); | 53 | TEST_TRUE(double_eq(bias, expected_bias, WEIGHT_EPS)); |
| 52 | 54 | ||
| 53 | // Test. | 55 | // Test. |
| 54 | 56 | ||
| 55 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); | 57 | nnQueryObject* query = nnMakeQueryObject(net, 1); |
| 56 | 58 | ||
| 57 | const R test_input[] = { 2.3 }; | 59 | const R test_input[] = {2.3}; |
| 58 | R test_output[1]; | 60 | R test_output[1]; |
| 59 | nnQueryArray(net, query, test_input, test_output); | 61 | nnQueryArray(net, query, test_input, test_output); |
| 60 | 62 | ||
| 61 | const R expected_output = test_input[0] * expected_weight + expected_bias; | 63 | const R expected_output = test_input[0] * expected_weight + expected_bias; |
diff --git a/src/lib/test/train_linear_perceptron_test.c b/src/lib/test/train_linear_perceptron_test.c index 2b1336d..667643b 100644 --- a/src/lib/test/train_linear_perceptron_test.c +++ b/src/lib/test/train_linear_perceptron_test.c | |||
| @@ -1,9 +1,8 @@ | |||
| 1 | #include <neuralnet/train.h> | 1 | #include <neuralnet/train.h> |
| 2 | 2 | ||
| 3 | #include "neuralnet_impl.h" | ||
| 3 | #include <neuralnet/matrix.h> | 4 | #include <neuralnet/matrix.h> |
| 4 | #include <neuralnet/neuralnet.h> | 5 | #include <neuralnet/neuralnet.h> |
| 5 | #include "activation.h" | ||
| 6 | #include "neuralnet_impl.h" | ||
| 7 | 6 | ||
| 8 | #include "test.h" | 7 | #include "test.h" |
| 9 | #include "test_util.h" | 8 | #include "test_util.h" |
| @@ -11,19 +10,21 @@ | |||
| 11 | #include <assert.h> | 10 | #include <assert.h> |
| 12 | 11 | ||
| 13 | TEST_CASE(neuralnet_train_linear_perceptron_test) { | 12 | TEST_CASE(neuralnet_train_linear_perceptron_test) { |
| 14 | const int num_layers = 1; | 13 | const int num_layers = 1; |
| 15 | const int layer_sizes[] = { 1, 1 }; | 14 | const int input_size = 1; |
| 16 | const nnActivation layer_activations[] = { nnIdentity }; | 15 | const nnLayer layers[] = { |
| 16 | {.type = nnLinear, .linear = {.input_size = 1, .output_size = 1}} | ||
| 17 | }; | ||
| 17 | 18 | ||
| 18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | 19 | nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); |
| 19 | assert(net); | 20 | assert(net); |
| 20 | 21 | ||
| 21 | // Train. | 22 | // Train. |
| 22 | 23 | ||
| 23 | // Try to learn the Y=X line. | 24 | // Try to learn the Y=X line. |
| 24 | #define N 2 | 25 | #define N 2 |
| 25 | const R inputs[N] = { 0., 1. }; | 26 | const R inputs[N] = {0., 1.}; |
| 26 | const R targets[N] = { 0., 1. }; | 27 | const R targets[N] = {0., 1.}; |
| 27 | 28 | ||
| 28 | nnMatrix inputs_matrix = nnMatrixMake(N, 1); | 29 | nnMatrix inputs_matrix = nnMatrixMake(N, 1); |
| 29 | nnMatrix targets_matrix = nnMatrixMake(N, 1); | 30 | nnMatrix targets_matrix = nnMatrixMake(N, 1); |
| @@ -31,26 +32,27 @@ TEST_CASE(neuralnet_train_linear_perceptron_test) { | |||
| 31 | nnMatrixInit(&targets_matrix, targets); | 32 | nnMatrixInit(&targets_matrix, targets); |
| 32 | 33 | ||
| 33 | nnTrainingParams params = { | 34 | nnTrainingParams params = { |
| 34 | .learning_rate = 0.7, | 35 | .learning_rate = 0.7, |
| 35 | .max_iterations = 10, | 36 | .max_iterations = 10, |
| 36 | .seed = 0, | 37 | .seed = 0, |
| 37 | .weight_init = nnWeightInit01, | 38 | .weight_init = nnWeightInit01, |
| 38 | .debug = false, | 39 | .debug = false, |
| 39 | }; | 40 | }; |
| 40 | 41 | ||
| 41 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); | 42 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); |
| 42 | 43 | ||
| 43 | const R weight = nnMatrixAt(&net->weights[0], 0, 0); | 44 | const R weight = nnMatrixAt(&net->layers[0].linear.weights, 0, 0); |
| 44 | const R expected_weight = 1.0; | 45 | const R expected_weight = 1.0; |
| 45 | printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | 46 | printf( |
| 47 | "\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | ||
| 46 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); | 48 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); |
| 47 | 49 | ||
| 48 | // Test. | 50 | // Test. |
| 49 | 51 | ||
| 50 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); | 52 | nnQueryObject* query = nnMakeQueryObject(net, 1); |
| 51 | 53 | ||
| 52 | const R test_input[] = { 2.3 }; | 54 | const R test_input[] = {2.3}; |
| 53 | R test_output[1]; | 55 | R test_output[1]; |
| 54 | nnQueryArray(net, query, test_input, test_output); | 56 | nnQueryArray(net, query, test_input, test_output); |
| 55 | 57 | ||
| 56 | const R expected_output = test_input[0]; | 58 | const R expected_output = test_input[0]; |
diff --git a/src/lib/test/train_sigmoid_test.c b/src/lib/test/train_sigmoid_test.c index 588e7ca..39a84b0 100644 --- a/src/lib/test/train_sigmoid_test.c +++ b/src/lib/test/train_sigmoid_test.c | |||
| @@ -1,9 +1,9 @@ | |||
| 1 | #include <neuralnet/train.h> | 1 | #include <neuralnet/train.h> |
| 2 | 2 | ||
| 3 | #include <neuralnet/matrix.h> | ||
| 4 | #include <neuralnet/neuralnet.h> | ||
| 5 | #include "activation.h" | 3 | #include "activation.h" |
| 6 | #include "neuralnet_impl.h" | 4 | #include "neuralnet_impl.h" |
| 5 | #include <neuralnet/matrix.h> | ||
| 6 | #include <neuralnet/neuralnet.h> | ||
| 7 | 7 | ||
| 8 | #include "test.h" | 8 | #include "test.h" |
| 9 | #include "test_util.h" | 9 | #include "test_util.h" |
| @@ -11,21 +11,24 @@ | |||
| 11 | #include <assert.h> | 11 | #include <assert.h> |
| 12 | 12 | ||
| 13 | TEST_CASE(neuralnet_train_sigmoid_test) { | 13 | TEST_CASE(neuralnet_train_sigmoid_test) { |
| 14 | const int num_layers = 1; | 14 | const int num_layers = 2; |
| 15 | const int layer_sizes[] = { 1, 1 }; | 15 | const int input_size = 1; |
| 16 | const nnActivation layer_activations[] = { nnSigmoid }; | 16 | const nnLayer layers[] = { |
| 17 | {.type = nnLinear, .linear = {.input_size = 1, .output_size = 1}}, | ||
| 18 | {.type = nnSigmoid}, | ||
| 19 | }; | ||
| 17 | 20 | ||
| 18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | 21 | nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); |
| 19 | assert(net); | 22 | assert(net); |
| 20 | 23 | ||
| 21 | // Train. | 24 | // Train. |
| 22 | 25 | ||
| 23 | // Try to learn the sigmoid function. | 26 | // Try to learn the sigmoid function. |
| 24 | #define N 3 | 27 | #define N 3 |
| 25 | R inputs[N]; | 28 | R inputs[N]; |
| 26 | R targets[N]; | 29 | R targets[N]; |
| 27 | for (int i = 0; i < N; ++i) { | 30 | for (int i = 0; i < N; ++i) { |
| 28 | inputs[i] = lerp(-1, +1, (R)i / (R)(N-1)); | 31 | inputs[i] = lerp(-1, +1, (R)i / (R)(N - 1)); |
| 29 | targets[i] = sigmoid(inputs[i]); | 32 | targets[i] = sigmoid(inputs[i]); |
| 30 | } | 33 | } |
| 31 | 34 | ||
| @@ -35,29 +38,30 @@ TEST_CASE(neuralnet_train_sigmoid_test) { | |||
| 35 | nnMatrixInit(&targets_matrix, targets); | 38 | nnMatrixInit(&targets_matrix, targets); |
| 36 | 39 | ||
| 37 | nnTrainingParams params = { | 40 | nnTrainingParams params = { |
| 38 | .learning_rate = 0.9, | 41 | .learning_rate = 0.9, |
| 39 | .max_iterations = 100, | 42 | .max_iterations = 100, |
| 40 | .seed = 0, | 43 | .seed = 0, |
| 41 | .weight_init = nnWeightInit01, | 44 | .weight_init = nnWeightInit01, |
| 42 | .debug = false, | 45 | .debug = false, |
| 43 | }; | 46 | }; |
| 44 | 47 | ||
| 45 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); | 48 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); |
| 46 | 49 | ||
| 47 | const R weight = nnMatrixAt(&net->weights[0], 0, 0); | 50 | const R weight = nnMatrixAt(&net->layers[0].linear.weights, 0, 0); |
| 48 | const R expected_weight = 1.0; | 51 | const R expected_weight = 1.0; |
| 49 | printf("\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | 52 | printf( |
| 53 | "\nTrained network weight: %f, Expected: %f\n", weight, expected_weight); | ||
| 50 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); | 54 | TEST_TRUE(double_eq(weight, expected_weight, WEIGHT_EPS)); |
| 51 | 55 | ||
| 52 | // Test. | 56 | // Test. |
| 53 | 57 | ||
| 54 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/1); | 58 | nnQueryObject* query = nnMakeQueryObject(net, 1); |
| 55 | 59 | ||
| 56 | const R test_input[] = { 0.3 }; | 60 | const R test_input[] = {0.3}; |
| 57 | R test_output[1]; | 61 | R test_output[1]; |
| 58 | nnQueryArray(net, query, test_input, test_output); | 62 | nnQueryArray(net, query, test_input, test_output); |
| 59 | 63 | ||
| 60 | const R expected_output = 0.574442516811659; // sigmoid(0.3) | 64 | const R expected_output = 0.574442516811659; // sigmoid(0.3) |
| 61 | printf("Output: %f, Expected: %f\n", test_output[0], expected_output); | 65 | printf("Output: %f, Expected: %f\n", test_output[0], expected_output); |
| 62 | TEST_TRUE(double_eq(test_output[0], expected_output, OUTPUT_EPS)); | 66 | TEST_TRUE(double_eq(test_output[0], expected_output, OUTPUT_EPS)); |
| 63 | 67 | ||
diff --git a/src/lib/test/train_xor_test.c b/src/lib/test/train_xor_test.c index 6ddc6e0..78695a3 100644 --- a/src/lib/test/train_xor_test.c +++ b/src/lib/test/train_xor_test.c | |||
| @@ -1,9 +1,9 @@ | |||
| 1 | #include <neuralnet/train.h> | 1 | #include <neuralnet/train.h> |
| 2 | 2 | ||
| 3 | #include <neuralnet/matrix.h> | ||
| 4 | #include <neuralnet/neuralnet.h> | ||
| 5 | #include "activation.h" | 3 | #include "activation.h" |
| 6 | #include "neuralnet_impl.h" | 4 | #include "neuralnet_impl.h" |
| 5 | #include <neuralnet/matrix.h> | ||
| 6 | #include <neuralnet/neuralnet.h> | ||
| 7 | 7 | ||
| 8 | #include "test.h" | 8 | #include "test.h" |
| 9 | #include "test_util.h" | 9 | #include "test_util.h" |
| @@ -11,18 +11,27 @@ | |||
| 11 | #include <assert.h> | 11 | #include <assert.h> |
| 12 | 12 | ||
| 13 | TEST_CASE(neuralnet_train_xor_test) { | 13 | TEST_CASE(neuralnet_train_xor_test) { |
| 14 | const int num_layers = 2; | 14 | const int num_layers = 3; |
| 15 | const int layer_sizes[] = { 2, 2, 1 }; | 15 | const int input_size = 2; |
| 16 | const nnActivation layer_activations[] = { nnRelu, nnIdentity }; | 16 | const nnLayer layers[] = { |
| 17 | {.type = nnLinear, .linear = {.input_size = 2, .output_size = 2}}, | ||
| 18 | {.type = nnRelu}, | ||
| 19 | {.type = nnLinear, .linear = {.input_size = 2, .output_size = 1}} | ||
| 20 | }; | ||
| 17 | 21 | ||
| 18 | nnNeuralNetwork* net = nnMakeNet(num_layers, layer_sizes, layer_activations); | 22 | nnNeuralNetwork* net = nnMakeNet(layers, num_layers, input_size); |
| 19 | assert(net); | 23 | assert(net); |
| 20 | 24 | ||
| 21 | // Train. | 25 | // Train. |
| 22 | 26 | ||
| 23 | #define N 4 | 27 | #define N 4 |
| 24 | const R inputs[N][2] = { { 0., 0. }, { 0., 1. }, { 1., 0. }, { 1., 1. } }; | 28 | const R inputs[N][2] = { |
| 25 | const R targets[N] = { 0., 1., 1., 0. }; | 29 | {0., 0.}, |
| 30 | {0., 1.}, | ||
| 31 | {1., 0.}, | ||
| 32 | {1., 1.} | ||
| 33 | }; | ||
| 34 | const R targets[N] = {0., 1., 1., 0.}; | ||
| 26 | 35 | ||
| 27 | nnMatrix inputs_matrix = nnMatrixMake(N, 2); | 36 | nnMatrix inputs_matrix = nnMatrixMake(N, 2); |
| 28 | nnMatrix targets_matrix = nnMatrixMake(N, 1); | 37 | nnMatrix targets_matrix = nnMatrixMake(N, 1); |
| @@ -30,31 +39,37 @@ TEST_CASE(neuralnet_train_xor_test) { | |||
| 30 | nnMatrixInit(&targets_matrix, targets); | 39 | nnMatrixInit(&targets_matrix, targets); |
| 31 | 40 | ||
| 32 | nnTrainingParams params = { | 41 | nnTrainingParams params = { |
| 33 | .learning_rate = 0.1, | 42 | .learning_rate = 0.1, |
| 34 | .max_iterations = 500, | 43 | .max_iterations = 500, |
| 35 | .seed = 0, | 44 | .seed = 0, |
| 36 | .weight_init = nnWeightInit01, | 45 | .weight_init = nnWeightInit01, |
| 37 | .debug = false, | 46 | .debug = false, |
| 38 | }; | 47 | }; |
| 39 | 48 | ||
| 40 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); | 49 | nnTrain(net, &inputs_matrix, &targets_matrix, ¶ms); |
| 41 | 50 | ||
| 42 | // Test. | 51 | // Test. |
| 43 | 52 | ||
| 44 | #define M 4 | 53 | #define M 4 |
| 45 | 54 | ||
| 46 | nnQueryObject* query = nnMakeQueryObject(net, /*num_inputs=*/M); | 55 | nnQueryObject* query = nnMakeQueryObject(net, M); |
| 47 | 56 | ||
| 48 | const R test_inputs[M][2] = { { 0., 0. }, { 1., 0. }, { 0., 1. }, { 1., 1. } }; | 57 | const R test_inputs[M][2] = { |
| 58 | {0., 0.}, | ||
| 59 | {1., 0.}, | ||
| 60 | {0., 1.}, | ||
| 61 | {1., 1.} | ||
| 62 | }; | ||
| 49 | nnMatrix test_inputs_matrix = nnMatrixMake(M, 2); | 63 | nnMatrix test_inputs_matrix = nnMatrixMake(M, 2); |
| 50 | nnMatrixInit(&test_inputs_matrix, (const R*)test_inputs); | 64 | nnMatrixInit(&test_inputs_matrix, (const R*)test_inputs); |
| 51 | nnQuery(net, query, &test_inputs_matrix); | 65 | nnQuery(net, query, &test_inputs_matrix); |
| 52 | 66 | ||
| 53 | const R expected_outputs[M] = { 0., 1., 1., 0. }; | 67 | const R expected_outputs[M] = {0., 1., 1., 0.}; |
| 54 | for (int i = 0; i < M; ++i) { | 68 | for (int i = 0; i < M; ++i) { |
| 55 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); | 69 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); |
| 56 | printf("\nInput: (%f, %f), Output: %f, Expected: %f\n", | 70 | printf( |
| 57 | test_inputs[i][0], test_inputs[i][1], test_output, expected_outputs[i]); | 71 | "\nInput: (%f, %f), Output: %f, Expected: %f\n", test_inputs[i][0], |
| 72 | test_inputs[i][1], test_output, expected_outputs[i]); | ||
| 58 | } | 73 | } |
| 59 | for (int i = 0; i < M; ++i) { | 74 | for (int i = 0; i < M; ++i) { |
| 60 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); | 75 | const R test_output = nnMatrixAt(nnNetOutputs(query), i, 0); |
