I am using SSE to implement matrix multiplication, but I found there exists memory leak(see the picture below), the memory usage is increasing from 400M to 1G or more.
But, I free the memory in the code.
The following are codes
main.cpp
#include "sse_matrix.h" #include <ctime> int main(int argc, char* argv[]) { vector<float> left(size, 0); vector<float> right(size, 0); vector<float> result(size, 0); // initialize value for (int i = 0; i < dim; i ++) { for (int j = 0; j < dim; j ++) { left[i*dim + j] = j; right[i*dim + j] = j; } } cout << "1. INFO: value initialized, starting matrix multiplication" << endl; // calculate the result clock_t my_time = clock(); SSE_Matrix_Multiply(&left, &right, &result); cout << "2. INFO: SSE matrix multiplication result has got" << endl; /*for (int i = 0; i < dim; i ++) { for (int j = 0; j < dim; j ++) { cout << result[i * dim + j] << " "; } cout << endl; }*/ cout << "3. INFO: " << float(clock() - my_time)/1000.0 << endl; system("pause"); return 0; } sse_matrix.h
#ifndef __SSE_MATRIX_H #define __SSE_MATRIX_H #include <vector> #include <iostream> using std::cin; using std::cout; using std::endl; using std::vector; //#define dim 8 //#define size (dim * dim) const int dim = 4096; const int size = dim * dim; struct Matrix_Info { vector<float> * A; int ax, ay; vector<float> * B; int bx, by; vector<float> * C; int cx, cy; int m; int n; }; void Transpose_Matrix_SSE(float * matrix) { __m128 row1 = _mm_loadu_ps(&matrix[0*4]); __m128 row2 = _mm_loadu_ps(&matrix[1*4]); __m128 row3 = _mm_loadu_ps(&matrix[2*4]); __m128 row4 = _mm_loadu_ps(&matrix[3*4]); _MM_TRANSPOSE4_PS(row1, row2, row3, row4); _mm_storeu_ps(&matrix[0*4], row1); _mm_storeu_ps(&matrix[1*4], row2); _mm_storeu_ps(&matrix[2*4], row3); _mm_storeu_ps(&matrix[3*4], row4); } float * Shuffle_Matrix_Multiply(float * left, float * right) { __m128 _t1, _t2, _sum; _sum = _mm_setzero_ps(); // set all value of _sum to zero float * _result = new float[size]; float _res[4] = {0}; for (int i = 0; i < 4; i ++) { for (int j = 0; j < 4; j ++) { _t1 = _mm_loadu_ps(left + i * 4); _t2 = _mm_loadu_ps(right + j * 4); _sum = _mm_mul_ps(_t1, _t2); _mm_storeu_ps(_res, _sum); _result[i * 4 + j] = _res[0] + _res[1] + _res[2] + _res[3]; } } return _result; } float * SSE_4_Matrix(struct Matrix_Info * my_info) { int m = my_info->m; int n = my_info->n; int ax = my_info->ax; int ay = my_info->ay; int bx = my_info->bx; int by = my_info->by; //1. split Matrix A and Matrix B float * _a = new float[16]; float * _b = new float[16]; for (int i = 0; i < m; i ++) { for (int j = 0; j < m; j ++) { _a[i*m + j] = (*my_info->A)[(i + ax) * n + j + ay]; _b[i*m + j] = (*my_info->B)[(i + bx) * n + j + by]; } } //2. transpose Matrix B Transpose_Matrix_SSE(_b); //3. calculate result and return a float pointer return Shuffle_Matrix_Multiply(_a, _b); } int Matrix_Multiply(struct Matrix_Info * my_info) { int m = my_info->m; int n = my_info->n; int cx = my_info->cx; int cy = my_info->cy; for (int i = 0; i < m; i ++) { for (int j = 0; j < m; j ++) { float * temp = SSE_4_Matrix(my_info); (*my_info->C)[(i + cx) * n + j + cy] += temp[i*m + j]; delete [] temp; } } return 0; } void SSE_Matrix_Multiply(vector<float> * left, vector<float> * right, vector<float> * result) { struct Matrix_Info my_info; my_info.A = left; my_info.B = right; my_info.C = result; my_info.n = dim; my_info.m = 4; // Matrix A row:i, column:j for (int i = 0; i < dim; i += 4) { for (int j = 0; j < dim; j += 4) { // Matrix B row:j column:k for (int k = 0; k < dim; k += 4) { my_info.ax = i; my_info.ay = j; my_info.bx = j; my_info.by = k; my_info.cx = i; my_info.cy = k; Matrix_Multiply(&my_info); } } } } #endif And I guess maybe the memory leak is in Shuffle_Matrix_Multiply function in sse_matrix.h file. But, I am not sure, and now, the memory usage is increasing and my system will crash.
Hope someone can help to figure out and thanks in advance.
