perf 性能分析案例
1 分支预测
#include <algorithm>
#include <ctime>
#include <iostream>
int main() {
// Generate data
const unsigned arraySize = 32768;
int data[arraySize];
for (unsigned c = 0; c < arraySize; ++c)
data[c] = std::rand() % 256;
// !!! With this, the next loop runs faster.
// std::sort(data, data + arraySize);
// Test
clock_t start = clock();
long long sum = 0;
for (unsigned i = 0; i < 100000; ++i) {
for (unsigned c = 0; c < arraySize; ++c) { // Primary loop.
if (data[c] >= 128)
sum += data[c];
}
}
double elapsedTime = static_cast<double>(clock() - start) / CLOCKS_PER_SEC;
std::cout << elapsedTime << '\n';
std::cout << "sum = " << sum << '\n';
}
2 缓存
#include <cstdint>
#include <cstdio>
#include <string.h>
uint32_t g_num = 0;
void sum_array(uint32_t *a, uint32_t *b, uint32_t num) {
for (uint32_t i = 0; i < num; ++i) {
g_num = g_num + a[i] + b[i];
}
}
int main() {
constexpr uint32_t num = 1024;
uint32_t a[1024], b[1024];
memset(a, 1, num);
memset(b, 1, num);
sum_array(a, b, num);
printf("sum= [%u]",g_num);
return 0;
}