I have a float array that needs to be referenced many times on the device, so I believe the best place to store it is in __ constant __ memory (using this reference). The array (or vector) will need to be written once at run-time when initializing, but read by multiple different functions many millions of times, so constant copying to the kernel each function call seems like A Bad Idea.
const int n = 32; __constant__ float dev_x[n]; //the array in question struct struct_max : public thrust::unary_function<float,float> { float C; struct_max(float _C) : C(_C) {} __host__ __device__ float operator()(const float& x) const { return fmax(x,C);} }; void foo(const thrust::host_vector<float> &, const float &); int main() { thrust::host_vector<float> x(n); //magic happens populate x cudaMemcpyToSymbol(dev_x,x.data(),n*sizeof(float)); foo(x,0.0); return(0); } void foo(const thrust::host_vector<float> &input_host_x, const float &x0) { thrust::device_vector<float> dev_sol(n); thrust::host_vector<float> host_sol(n); //this method works fine, but the memory transfer is unacceptable thrust::device_vector<float> input_dev_vec(n); input_dev_vec = input_host_x; //I want to avoid this thrust::transform(input_dev_vec.begin(),input_dev_vec.end(),dev_sol.begin(),struct_max(x0)); host_sol = dev_sol; //this memory transfer for debugging //this method compiles fine, but crashes at runtime thrust::device_ptr<float> dev_ptr = thrust::device_pointer_cast(dev_x); thrust::transform(dev_ptr,dev_ptr+n,dev_sol.begin(),struct_max(x0)); host_sol = dev_sol; //this line crashes } I tried adding a global thrust::device_vector dev_x(n), but that also crashed at run-time, and would be in __ global __ memory rather than __ constant__ memory
This can all be made to work if I just discard the thrust library, but is there a way to use the thrust library with globals and device constant memory?