跳转至

perf 性能分析案例

1 分支预测

#include <algorithm>
#include <ctime>
#include <iostream>

int main() {
  // Generate data
  const unsigned arraySize = 32768;
  int data[arraySize];

  for (unsigned c = 0; c < arraySize; ++c)
    data[c] = std::rand() % 256;

  // !!! With this, the next loop runs faster.
  // std::sort(data, data + arraySize);

  // Test
  clock_t start = clock();
  long long sum = 0;
  for (unsigned i = 0; i < 100000; ++i) {
    for (unsigned c = 0; c < arraySize; ++c) { // Primary loop.
      if (data[c] >= 128)
        sum += data[c];
    }
  }

  double elapsedTime = static_cast<double>(clock() - start) / CLOCKS_PER_SEC;

  std::cout << elapsedTime << '\n';
  std::cout << "sum = " << sum << '\n';
}

2 缓存

  • 测试代码
#include <cstdint>
#include <cstdio>
#include <string.h>
uint32_t g_num = 0;
void sum_array(uint32_t *a, uint32_t *b, uint32_t num) {
  for (uint32_t i = 0; i < num; ++i) {
    g_num = g_num + a[i] + b[i];
  }
}

int main() {
  constexpr uint32_t num = 1024;
  uint32_t a[1024], b[1024];
  memset(a, 1, num);
  memset(b, 1, num);
  sum_array(a, b, num);
  printf("sum= [%u]",g_num);
  return 0;
}