I am trying to understand what kind of magic optimizations gcc/clang does with this code.
#include <random> #include <iostream> int main() { std::random_device rd; std::mt19937 mt(rd()); const unsigned arraySize = 100000; int data[arraySize]; for (unsigned c = 0; c < arraySize; ++c) data[c] = mt() % 256; long long sum = 0; for (unsigned i = 0; i < 100000; ++i) { for (unsigned c = 0; c < arraySize; ++c) { if (data[c] >= 128) sum += data[c]; } } std::cout << sum << std::endl; } and this code
#include <random> #include <iostream> #include <algorithm> int main() { std::random_device rd; std::mt19937 mt(rd()); const unsigned arraySize = 100000; int data[arraySize]; for (unsigned c = 0; c < arraySize; ++c) data[c] = mt() % 256; std::sort(data, data + arraySize); long long sum = 0; for (unsigned i = 0; i < 100000; ++i) { for (unsigned c = 0; c < arraySize; ++c) { if (data[c] >= 128) sum += data[c]; } } std::cout << sum << std::endl; } Basicly when I compiled it and run about 3 years ago, the second code was like 4x faster because of much better branch prediction. When I compile it and run now, it works in almost the same time, and I have no idea what kind of sorcery gcc/clang does.
+=operator. This operation should be getting cached since you are doing it every iteration. I have read a very nice tutorial that explains the benefits of keeping memory and operations "hot" to improve performance.