diff options
| author | 3gg <3gg@shellblade.net> | 2024-11-27 13:41:09 -0800 |
|---|---|---|
| committer | 3gg <3gg@shellblade.net> | 2024-11-27 13:41:09 -0800 |
| commit | c8be8496c8a15d0ede8338939a7512109b8e5e46 (patch) | |
| tree | 1e60112652e9f3c3a20e6bf4cc0b8bef0ebc81fd /vector_sum | |
Initial commit.
Diffstat (limited to 'vector_sum')
| -rw-r--r-- | vector_sum/CMakeLists.txt | 11 | ||||
| -rw-r--r-- | vector_sum/main.cu | 62 |
2 files changed, 73 insertions, 0 deletions
diff --git a/vector_sum/CMakeLists.txt b/vector_sum/CMakeLists.txt new file mode 100644 index 0000000..1eea51b --- /dev/null +++ b/vector_sum/CMakeLists.txt | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | cmake_minimum_required(VERSION 3.28) | ||
| 2 | |||
| 3 | project(vector_sum LANGUAGES CUDA CXX) | ||
| 4 | |||
| 5 | add_executable(vector_sum | ||
| 6 | main.cu) | ||
| 7 | |||
| 8 | # -Wpedantic causes warnings due to nvcc emitting non-standard (gcc-specific) | ||
| 9 | # host code. | ||
| 10 | # https://stackoverflow.com/questions/31000996/warning-when-compiling-cu-with-wpedantic-style-of-line-directive-is-a-gcc-ex | ||
| 11 | target_compile_options(vector_sum PRIVATE -Wall -Wextra -Wno-pedantic) | ||
diff --git a/vector_sum/main.cu b/vector_sum/main.cu new file mode 100644 index 0000000..ba2e964 --- /dev/null +++ b/vector_sum/main.cu | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | #include <cstdio> | ||
| 2 | |||
| 3 | __global__ void add(int N, int* a, int* b, int* out) { | ||
| 4 | const int id = blockIdx.x; | ||
| 5 | out[id] = a[id] + b[id]; | ||
| 6 | } | ||
| 7 | |||
| 8 | int main() { | ||
| 9 | constexpr int N = 100; | ||
| 10 | |||
| 11 | bool success = false; | ||
| 12 | int host_array[N] = {0}; | ||
| 13 | int* dev_arrays[3] = {nullptr}; | ||
| 14 | |||
| 15 | // Allocate device arrays. | ||
| 16 | for (int i = 0; i < 3; ++i) { | ||
| 17 | if (cudaMalloc(&dev_arrays[i], N * sizeof(int)) != cudaSuccess) { | ||
| 18 | goto cleanup; | ||
| 19 | } | ||
| 20 | } | ||
| 21 | |||
| 22 | // Fill the host array with values 0..N-1. | ||
| 23 | for (int i = 0; i < N; ++i) { | ||
| 24 | host_array[i] = i; | ||
| 25 | } | ||
| 26 | |||
| 27 | // Copy the host array to each of the first two device arrays. | ||
| 28 | for (int i = 0; i < 2; ++i) { | ||
| 29 | if (cudaMemcpy( | ||
| 30 | dev_arrays[i], host_array, N * sizeof(int), | ||
| 31 | cudaMemcpyHostToDevice) != cudaSuccess) { | ||
| 32 | goto cleanup; | ||
| 33 | } | ||
| 34 | } | ||
| 35 | |||
| 36 | // Add the first two arrays. | ||
| 37 | // N blocks, 1 thread per block. | ||
| 38 | add<<<N, 1>>>(N, dev_arrays[0], dev_arrays[1], dev_arrays[2]); | ||
| 39 | |||
| 40 | // Copy the result from the third array to the host. | ||
| 41 | if (cudaMemcpy( | ||
| 42 | host_array, dev_arrays[2], N * sizeof(int), cudaMemcpyDeviceToHost) != | ||
| 43 | cudaSuccess) { | ||
| 44 | goto cleanup; | ||
| 45 | } | ||
| 46 | |||
| 47 | // Print the result. | ||
| 48 | for (int i = 0; i < N; ++i) { | ||
| 49 | printf("%d ", host_array[i]); | ||
| 50 | } | ||
| 51 | printf("\n"); | ||
| 52 | |||
| 53 | success = true; | ||
| 54 | |||
| 55 | cleanup: | ||
| 56 | for (int i = 0; i < 3; ++i) { | ||
| 57 | if (dev_arrays[i] != nullptr) { | ||
| 58 | cudaFree(dev_arrays[i]); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | return success ? 0 : 1; | ||
| 62 | } | ||
