hi i have a cuda program which run successfully here is code for cuda program
#include <stdio.h> #include <cuda.h> __global__ void square_array(float *a, int N) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx<N) a[idx] = a[idx] * a[idx]; } int main(void) { float *a_h, *a_d; const int N = 10; size_t size = N * sizeof(float); a_h = (float *)malloc(size); cudaMalloc((void **) &a_d, size); for (int i=0; i<N; i++) a_h[i] = (float)i; cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice); int block_size = 4; int n_blocks = N/block_size + (N%block_size == 0 ? 0:1); square_array <<< n_blocks, block_size >>> (a_d, N); cudaMemcpy(a_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost); // Print results for (int i=0; i<N; i++) printf("%d %f\n", i, a_h[i]); free(a_h); cudaFree(a_d); } now i want to split this code into two files means there should be two file one for c++ code or c code and other one .cu file for kernel. i just wanat to do it for learning and i don't want to write same kernel code again and again. can any one tell me how to do this ? how to split this code into two different file? than how to compile it? how to write makefile for it ? how to