diff options
| -rw-r--r-- | CMakeLists.txt | 6 | ||||
| -rw-r--r-- | hello/CMakeLists.txt | 11 | ||||
| -rw-r--r-- | hello/hello.cu | 59 | ||||
| -rw-r--r-- | julia/CMakeLists.txt | 11 | ||||
| -rw-r--r-- | julia/julia.cu | 108 | ||||
| -rw-r--r-- | vector_sum/CMakeLists.txt | 11 | ||||
| -rw-r--r-- | vector_sum/main.cu | 62 | 
7 files changed, 268 insertions, 0 deletions
| diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..c3ae680 --- /dev/null +++ b/CMakeLists.txt | |||
| @@ -0,0 +1,6 @@ | |||
| 1 | cmake_minimum_required(VERSION 3.28) | ||
| 2 | |||
| 3 | add_subdirectory(hello) | ||
| 4 | add_subdirectory(julia) | ||
| 5 | add_subdirectory(ptracer) | ||
| 6 | add_subdirectory(vector_sum) | ||
| diff --git a/hello/CMakeLists.txt b/hello/CMakeLists.txt new file mode 100644 index 0000000..e4b4acc --- /dev/null +++ b/hello/CMakeLists.txt | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | cmake_minimum_required(VERSION 3.28) | ||
| 2 | |||
| 3 | project(cuda_hello LANGUAGES CUDA CXX) | ||
| 4 | |||
| 5 | add_executable(cuda_hello | ||
| 6 | hello.cu) | ||
| 7 | |||
| 8 | # -Wpedantic causes warnings due to nvcc emitting non-standard (gcc-specific) | ||
| 9 | # host code. | ||
| 10 | # https://stackoverflow.com/questions/31000996/warning-when-compiling-cu-with-wpedantic-style-of-line-directive-is-a-gcc-ex | ||
| 11 | target_compile_options(cuda_hello PRIVATE -Wall -Wextra -Wno-pedantic) | ||
| diff --git a/hello/hello.cu b/hello/hello.cu new file mode 100644 index 0000000..691b18c --- /dev/null +++ b/hello/hello.cu | |||
| @@ -0,0 +1,59 @@ | |||
| 1 | #include <cstdio> | ||
| 2 | |||
| 3 | void logDevices() { | ||
| 4 | int count; | ||
| 5 | if (cudaGetDeviceCount(&count) != cudaSuccess) { | ||
| 6 | printf("No CUDA devices found\n"); | ||
| 7 | return; | ||
| 8 | } | ||
| 9 | |||
| 10 | printf("CUDA devices found: %d\n", count); | ||
| 11 | for (int i = 0; i < count; ++i) { | ||
| 12 | cudaDeviceProp properties; | ||
| 13 | if (cudaGetDeviceProperties(&properties, i) == cudaSuccess) { | ||
| 14 | printf("Device [%d]: %s\n", i, properties.name); | ||
| 15 | } | ||
| 16 | } | ||
| 17 | } | ||
| 18 | |||
| 19 | __global__ void kernel(int* array, int N) { | ||
| 20 | for (int i = 0; i < N; ++i) { | ||
| 21 | array[i] = i; | ||
| 22 | } | ||
| 23 | } | ||
| 24 | |||
| 25 | int main() { | ||
| 26 | logDevices(); | ||
| 27 | |||
| 28 | constexpr int N = 100; | ||
| 29 | |||
| 30 | int* host_array = new int[N]; | ||
| 31 | int* device_array = nullptr; | ||
| 32 | bool success = false; | ||
| 33 | |||
| 34 | if (cudaMalloc(&device_array, N * sizeof(int)) != cudaSuccess) { | ||
| 35 | goto cleanup; | ||
| 36 | } | ||
| 37 | |||
| 38 | kernel<<<1, 1>>>(device_array, N); | ||
| 39 | |||
| 40 | if (cudaMemcpy( | ||
| 41 | host_array, device_array, N * sizeof(int), cudaMemcpyDeviceToHost) != | ||
| 42 | cudaSuccess) { | ||
| 43 | goto cleanup; | ||
| 44 | } | ||
| 45 | |||
| 46 | for (int i = 0; i < N; ++i) { | ||
| 47 | printf("%d ", host_array[i]); | ||
| 48 | } | ||
| 49 | printf("\n"); | ||
| 50 | |||
| 51 | success = true; | ||
| 52 | |||
| 53 | cleanup: | ||
| 54 | delete[] host_array; | ||
| 55 | if (device_array != nullptr) { | ||
| 56 | cudaFree(device_array); | ||
| 57 | } | ||
| 58 | return success ? 0 : 1; | ||
| 59 | } | ||
| diff --git a/julia/CMakeLists.txt b/julia/CMakeLists.txt new file mode 100644 index 0000000..e5428fb --- /dev/null +++ b/julia/CMakeLists.txt | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | cmake_minimum_required(VERSION 3.28) | ||
| 2 | |||
| 3 | project(cuda_julia LANGUAGES CUDA CXX) | ||
| 4 | |||
| 5 | add_executable(cuda_julia | ||
| 6 | julia.cu) | ||
| 7 | |||
| 8 | # -Wpedantic causes warnings due to nvcc emitting non-standard (gcc-specific) | ||
| 9 | # host code. | ||
| 10 | # https://stackoverflow.com/questions/31000996/warning-when-compiling-cu-with-wpedantic-style-of-line-directive-is-a-gcc-ex | ||
| 11 | target_compile_options(cuda_julia PRIVATE -Wall -Wextra -Wno-pedantic) | ||
| diff --git a/julia/julia.cu b/julia/julia.cu new file mode 100644 index 0000000..f3ecb80 --- /dev/null +++ b/julia/julia.cu | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | #include <cstdint> | ||
| 2 | #include <cstdio> | ||
| 3 | #include <cstdlib> | ||
| 4 | |||
| 5 | struct Pixel { | ||
| 6 | uint8_t r, g, b; | ||
| 7 | }; | ||
| 8 | |||
| 9 | struct Complex { | ||
| 10 | float r, i; | ||
| 11 | |||
| 12 | __device__ float norm2() const { return r * r + i * i; } | ||
| 13 | }; | ||
| 14 | |||
| 15 | __device__ Complex operator*(Complex a, Complex b) { | ||
| 16 | return Complex{(a.r * b.r) - (a.i * b.i), (a.i * b.r) + (a.r * b.i)}; | ||
| 17 | } | ||
| 18 | |||
| 19 | __device__ Complex operator+(Complex a, Complex b) { | ||
| 20 | return Complex{a.r + b.r, a.i + b.i}; | ||
| 21 | } | ||
| 22 | |||
| 23 | __device__ int julia(int width, int height, int x, int y) { | ||
| 24 | constexpr float scale = 1.5; | ||
| 25 | constexpr int N = 200; | ||
| 26 | |||
| 27 | const float jx = scale * (width / 2 - x) / (width / 2); | ||
| 28 | const float jy = scale * (height / 2 - y) / (height / 2); | ||
| 29 | |||
| 30 | const Complex c{-0.8, 0.156}; | ||
| 31 | Complex a{jx, jy}; | ||
| 32 | |||
| 33 | for (int i = 0; i < N; ++i) { | ||
| 34 | a = a * a + c; | ||
| 35 | if (a.norm2() > 1000) { | ||
| 36 | return 0; | ||
| 37 | } | ||
| 38 | } | ||
| 39 | return 1; | ||
| 40 | } | ||
| 41 | |||
| 42 | __global__ void juliaMain(int width, int height, Pixel* image) { | ||
| 43 | const int x = blockIdx.x; | ||
| 44 | const int y = blockIdx.y; | ||
| 45 | |||
| 46 | constexpr Pixel background{41, 95, 152}; | ||
| 47 | constexpr Pixel juliaColour{228, 192, 135}; | ||
| 48 | |||
| 49 | const Pixel pixel = | ||
| 50 | julia(width, height, x, y) == 1 ? juliaColour : background; | ||
| 51 | |||
| 52 | image[y * width + x] = pixel; | ||
| 53 | } | ||
| 54 | |||
| 55 | bool write_pbm(const Pixel* image, int width, int height, const char* path) { | ||
| 56 | const size_t num_pixels = width * height; | ||
| 57 | |||
| 58 | FILE* file = fopen(path, "wb"); | ||
| 59 | if (!file) { | ||
| 60 | return false; | ||
| 61 | } | ||
| 62 | |||
| 63 | fprintf(file, "P6\n%d %d\n255\n", width, height); | ||
| 64 | if (fwrite(image, sizeof(Pixel), num_pixels, file) != num_pixels) { | ||
| 65 | fclose(file); | ||
| 66 | return false; | ||
| 67 | } | ||
| 68 | |||
| 69 | fclose(file); | ||
| 70 | return true; | ||
| 71 | } | ||
| 72 | |||
| 73 | int main(int argc, const char** argv) { | ||
| 74 | const int width = argc > 1 ? atoi(argv[1]) : 1920; | ||
| 75 | const int height = argc > 2 ? atoi(argv[2]) : 1080; | ||
| 76 | |||
| 77 | bool success = false; | ||
| 78 | |||
| 79 | const dim3 dim(width, height); | ||
| 80 | const int image_size_bytes = width * height * sizeof(Pixel); | ||
| 81 | auto image_host = new Pixel[width * height]; | ||
| 82 | Pixel* image_dev = nullptr; | ||
| 83 | |||
| 84 | if (cudaMalloc(&image_dev, image_size_bytes) != cudaSuccess) { | ||
| 85 | goto cleanup; | ||
| 86 | } | ||
| 87 | |||
| 88 | juliaMain<<<dim, 1>>>(width, height, image_dev); | ||
| 89 | |||
| 90 | if (cudaMemcpy( | ||
| 91 | image_host, image_dev, image_size_bytes, cudaMemcpyDeviceToHost) != | ||
| 92 | cudaSuccess) { | ||
| 93 | goto cleanup; | ||
| 94 | } | ||
| 95 | |||
| 96 | if (!write_pbm(image_host, width, height, "julia.pbm")) { | ||
| 97 | goto cleanup; | ||
| 98 | } | ||
| 99 | |||
| 100 | success = true; | ||
| 101 | |||
| 102 | cleanup: | ||
| 103 | delete[] image_host; | ||
| 104 | if (image_dev) { | ||
| 105 | cudaFree(image_dev); | ||
| 106 | } | ||
| 107 | return success ? 0 : 1; | ||
| 108 | } | ||
| diff --git a/vector_sum/CMakeLists.txt b/vector_sum/CMakeLists.txt new file mode 100644 index 0000000..1eea51b --- /dev/null +++ b/vector_sum/CMakeLists.txt | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | cmake_minimum_required(VERSION 3.28) | ||
| 2 | |||
| 3 | project(vector_sum LANGUAGES CUDA CXX) | ||
| 4 | |||
| 5 | add_executable(vector_sum | ||
| 6 | main.cu) | ||
| 7 | |||
| 8 | # -Wpedantic causes warnings due to nvcc emitting non-standard (gcc-specific) | ||
| 9 | # host code. | ||
| 10 | # https://stackoverflow.com/questions/31000996/warning-when-compiling-cu-with-wpedantic-style-of-line-directive-is-a-gcc-ex | ||
| 11 | target_compile_options(vector_sum PRIVATE -Wall -Wextra -Wno-pedantic) | ||
| diff --git a/vector_sum/main.cu b/vector_sum/main.cu new file mode 100644 index 0000000..ba2e964 --- /dev/null +++ b/vector_sum/main.cu | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | #include <cstdio> | ||
| 2 | |||
| 3 | __global__ void add(int N, int* a, int* b, int* out) { | ||
| 4 | const int id = blockIdx.x; | ||
| 5 | out[id] = a[id] + b[id]; | ||
| 6 | } | ||
| 7 | |||
| 8 | int main() { | ||
| 9 | constexpr int N = 100; | ||
| 10 | |||
| 11 | bool success = false; | ||
| 12 | int host_array[N] = {0}; | ||
| 13 | int* dev_arrays[3] = {nullptr}; | ||
| 14 | |||
| 15 | // Allocate device arrays. | ||
| 16 | for (int i = 0; i < 3; ++i) { | ||
| 17 | if (cudaMalloc(&dev_arrays[i], N * sizeof(int)) != cudaSuccess) { | ||
| 18 | goto cleanup; | ||
| 19 | } | ||
| 20 | } | ||
| 21 | |||
| 22 | // Fill the host array with values 0..N-1. | ||
| 23 | for (int i = 0; i < N; ++i) { | ||
| 24 | host_array[i] = i; | ||
| 25 | } | ||
| 26 | |||
| 27 | // Copy the host array to each of the first two device arrays. | ||
| 28 | for (int i = 0; i < 2; ++i) { | ||
| 29 | if (cudaMemcpy( | ||
| 30 | dev_arrays[i], host_array, N * sizeof(int), | ||
| 31 | cudaMemcpyHostToDevice) != cudaSuccess) { | ||
| 32 | goto cleanup; | ||
| 33 | } | ||
| 34 | } | ||
| 35 | |||
| 36 | // Add the first two arrays. | ||
| 37 | // N blocks, 1 thread per block. | ||
| 38 | add<<<N, 1>>>(N, dev_arrays[0], dev_arrays[1], dev_arrays[2]); | ||
| 39 | |||
| 40 | // Copy the result from the third array to the host. | ||
| 41 | if (cudaMemcpy( | ||
| 42 | host_array, dev_arrays[2], N * sizeof(int), cudaMemcpyDeviceToHost) != | ||
| 43 | cudaSuccess) { | ||
| 44 | goto cleanup; | ||
| 45 | } | ||
| 46 | |||
| 47 | // Print the result. | ||
| 48 | for (int i = 0; i < N; ++i) { | ||
| 49 | printf("%d ", host_array[i]); | ||
| 50 | } | ||
| 51 | printf("\n"); | ||
| 52 | |||
| 53 | success = true; | ||
| 54 | |||
| 55 | cleanup: | ||
| 56 | for (int i = 0; i < 3; ++i) { | ||
| 57 | if (dev_arrays[i] != nullptr) { | ||
| 58 | cudaFree(dev_arrays[i]); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | return success ? 0 : 1; | ||
| 62 | } | ||
