diff options
Diffstat (limited to 'hello')
| -rw-r--r-- | hello/CMakeLists.txt | 11 | ||||
| -rw-r--r-- | hello/hello.cu | 59 |
2 files changed, 70 insertions, 0 deletions
diff --git a/hello/CMakeLists.txt b/hello/CMakeLists.txt new file mode 100644 index 0000000..e4b4acc --- /dev/null +++ b/hello/CMakeLists.txt | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | cmake_minimum_required(VERSION 3.28) | ||
| 2 | |||
| 3 | project(cuda_hello LANGUAGES CUDA CXX) | ||
| 4 | |||
| 5 | add_executable(cuda_hello | ||
| 6 | hello.cu) | ||
| 7 | |||
| 8 | # -Wpedantic causes warnings due to nvcc emitting non-standard (gcc-specific) | ||
| 9 | # host code. | ||
| 10 | # https://stackoverflow.com/questions/31000996/warning-when-compiling-cu-with-wpedantic-style-of-line-directive-is-a-gcc-ex | ||
| 11 | target_compile_options(cuda_hello PRIVATE -Wall -Wextra -Wno-pedantic) | ||
diff --git a/hello/hello.cu b/hello/hello.cu new file mode 100644 index 0000000..691b18c --- /dev/null +++ b/hello/hello.cu | |||
| @@ -0,0 +1,59 @@ | |||
| 1 | #include <cstdio> | ||
| 2 | |||
| 3 | void logDevices() { | ||
| 4 | int count; | ||
| 5 | if (cudaGetDeviceCount(&count) != cudaSuccess) { | ||
| 6 | printf("No CUDA devices found\n"); | ||
| 7 | return; | ||
| 8 | } | ||
| 9 | |||
| 10 | printf("CUDA devices found: %d\n", count); | ||
| 11 | for (int i = 0; i < count; ++i) { | ||
| 12 | cudaDeviceProp properties; | ||
| 13 | if (cudaGetDeviceProperties(&properties, i) == cudaSuccess) { | ||
| 14 | printf("Device [%d]: %s\n", i, properties.name); | ||
| 15 | } | ||
| 16 | } | ||
| 17 | } | ||
| 18 | |||
| 19 | __global__ void kernel(int* array, int N) { | ||
| 20 | for (int i = 0; i < N; ++i) { | ||
| 21 | array[i] = i; | ||
| 22 | } | ||
| 23 | } | ||
| 24 | |||
| 25 | int main() { | ||
| 26 | logDevices(); | ||
| 27 | |||
| 28 | constexpr int N = 100; | ||
| 29 | |||
| 30 | int* host_array = new int[N]; | ||
| 31 | int* device_array = nullptr; | ||
| 32 | bool success = false; | ||
| 33 | |||
| 34 | if (cudaMalloc(&device_array, N * sizeof(int)) != cudaSuccess) { | ||
| 35 | goto cleanup; | ||
| 36 | } | ||
| 37 | |||
| 38 | kernel<<<1, 1>>>(device_array, N); | ||
| 39 | |||
| 40 | if (cudaMemcpy( | ||
| 41 | host_array, device_array, N * sizeof(int), cudaMemcpyDeviceToHost) != | ||
| 42 | cudaSuccess) { | ||
| 43 | goto cleanup; | ||
| 44 | } | ||
| 45 | |||
| 46 | for (int i = 0; i < N; ++i) { | ||
| 47 | printf("%d ", host_array[i]); | ||
| 48 | } | ||
| 49 | printf("\n"); | ||
| 50 | |||
| 51 | success = true; | ||
| 52 | |||
| 53 | cleanup: | ||
| 54 | delete[] host_array; | ||
| 55 | if (device_array != nullptr) { | ||
| 56 | cudaFree(device_array); | ||
| 57 | } | ||
| 58 | return success ? 0 : 1; | ||
| 59 | } | ||
