1

I am trying to compute the euclidean distance of a set of 5D points (pixels) to a 5D single point (center) and store in another result vector, I want to use vector indexing to store all info in a single vector so for the ith pixel, the 5 dimensions are (5i) , (5i+1) , ... I am new to OpenCL and I just edited a sample code on the internet for my own intentions. The theory is right but the code doesn't show the right answers ! Here is the kernel:

//d_kernel.cl __kernel void distance_kernel(__global double *pixelInfo, __global double *clusterCentres, __global double *distanceFromClusterCentre) { int index = get_global_id(0); int d, dl, da, db, dx, dy; dl = pixelInfo[5 * index] - clusterCentres[0]; dl = dl * dl; da = pixelInfo[5 * index + 1] - clusterCentres[1]; da = da * da; db = pixelInfo[5 * index + 2] - clusterCentres[2]; db = db * db; dx = pixelInfo[5 * index + 3] - clusterCentres[3]; dx = dx * dx; dy = pixelInfo[5 * index + 4] - clusterCentres[4]; dy = dy * dy; distanceFromClusterCentre[index] = dx + dy + dl + da + db; } 

and here is the HOST CODE:

#include <iostream> #include <CL/cl.h> #include <vector> using namespace std; #define MAX_SOURCE_SIZE (0x100000) int main(int argc, char **argv) { // Create the two input vectors int i; const int pixelsNumber = 1024; const int clustersNumber = 1; std::vector<double> pixelInfo; pixelInfo.resize(5 * pixelsNumber); std::fill(pixelInfo.begin(), pixelInfo.end(), 500); std::vector<double> clusterCentres; clusterCentres.resize(5 * clustersNumber); std::fill(clusterCentres.begin(), clusterCentres.end(), 200); std::vector<double> distanceFromClusterCentre; distanceFromClusterCentre.resize(pixelsNumber); std::fill(distanceFromClusterCentre.begin(), distanceFromClusterCentre.end(), 0); // Load the kernel source code into the array source_str FILE *fp; char *source_str; size_t source_size; fp = fopen("d_kernel.cl", "r"); if (!fp) { fprintf(stderr, "Failed to load kernel.\n"); exit(1); } source_str = (char*)malloc(MAX_SOURCE_SIZE); source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp); fclose(fp); // Get platform and device information cl_platform_id platform_id = NULL; cl_device_id device_id = NULL; cl_uint ret_num_devices; cl_uint ret_num_platforms; cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices); // Create an OpenCL context cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); // Create a command queue cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); // Create memory buffers on the device for each vector cl_mem pixelInfo_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, 5 * pixelsNumber * sizeof(int), NULL, &ret); cl_mem clusterCentres_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, 5 * clustersNumber * sizeof(int), NULL, &ret); cl_mem distanceFromClusterCentre_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, pixelsNumber * sizeof(int), NULL, &ret); // Copy the vectors to their respective memory buffers ret = clEnqueueWriteBuffer(command_queue, pixelInfo_mem, CL_TRUE, 0, 5 * pixelsNumber * sizeof(int), pixelInfo.data(), 0, NULL, NULL); ret = clEnqueueWriteBuffer(command_queue, clusterCentres_mem, CL_TRUE, 0, 5 * clustersNumber * sizeof(int), clusterCentres.data(), 0, NULL, NULL); // Create a program from the kernel source cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret); // Build the program ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); // Create the OpenCL kernel cl_kernel kernel = clCreateKernel(program, "vector_add", &ret); // Set the arguments of the kernel ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&pixelInfo_mem); ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&clusterCentres_mem); ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&distanceFromClusterCentre_mem); // Execute the OpenCL kernel on the list size_t global_item_size = pixelsNumber; // Process the entire lists size_t local_item_size = 64; // Divide work items into groups of 64 ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL); // Read the memory buffer result on the device to the local vector result ret = clEnqueueReadBuffer(command_queue, distanceFromClusterCentre_mem, CL_TRUE, 0, pixelsNumber * sizeof(int), distanceFromClusterCentre.data(), 0, NULL, NULL); // Display the result to the screen for (i = 0; i < pixelsNumber; i++) { cout << "Pixel " << i << ": " << distanceFromClusterCentre[i] << endl; //system("PAUSE"); } // Clean up ret = clFlush(command_queue); ret = clFinish(command_queue); ret = clReleaseKernel(kernel); ret = clReleaseProgram(program); ret = clReleaseMemObject(pixelInfo_mem); ret = clReleaseMemObject(clusterCentres_mem); ret = clReleaseMemObject(distanceFromClusterCentre_mem); ret = clReleaseCommandQueue(command_queue); ret = clReleaseContext(context); free(pixelInfo.data()); free(clusterCentres.data()); free(distanceFromClusterCentre.data()); system("PAUSE"); return 0; } 

and a part of the RESULT is:

. . . Pixel 501: -1.11874e+306 Pixel 502: -1.16263e+306 Pixel 503: -1.07485e+306 Pixel 504: -1.03079e+306 Pixel 505: -9.42843e+305 Pixel 506: -9.86903e+305 Pixel 507: -8.98954e+305 Pixel 508: -9.86903e+305 Pixel 509: -8.98954e+305 Pixel 510: -9.43014e+305 Press any key to continue . . . Pixel 511: -8.55065e+305 Pixel 512: 0 Pixel 513: 0 Pixel 514: 0 Pixel 515: 0 Pixel 516: 0 Pixel 517: 0 Pixel 518: 0 Pixel 519: 0 Pixel 520: 0 . . . 

after index 511 the rest of the vector is zero !

8
  • What result do you get from this code? i.e. What is wrong about it? Commented May 22, 2015 at 12:21
  • @tobi303 I have updated the post with a part of result ! Commented May 22, 2015 at 12:30
  • rename nearest_neighbour to euclidian_distance_squared Commented May 22, 2015 at 12:47
  • Try to learn the OpenCL vector datatypes. Your code can be shortened to 1 or 2 lines with 'float8' points (projecting your 5D-points to a higher level) and constructs like distanceFromClusterCentre[index] = distance(pixelInfo[index], clusterCentres). Commented May 22, 2015 at 13:01
  • 1
    You're allocating OpenCL buffers sized for ints, but your data vectors are doubles. Commented May 22, 2015 at 13:18

1 Answer 1

1

You created your vectors of double's and then you treat them as there were ints (created buffer for ints, writing data to int buffers and reading back results as there were ints). To avoid such mistakes you could write your code this way:

cl_mem pixelInfo_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, pixelInfo.size() * sizeof(pixelInfo[0]), NULL, &ret); ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 
Sign up to request clarification or add additional context in comments.

3 Comments

I am using a int index = get_global_id(0) in my kernel ! this should handle a correct indexing I guess. I also followed your suggestion and I replace my own ID with int index = get_group_id(0) but the results are still wrong and same with the last one
you know that a simple get_global_id will easily replace a combination of get_group_id and get_local_id ?
@AmirHosseinF yep, the problem is not with indexes but with vectors which are created to keep double's you then treat like they were ints. Updated my answer.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.