Menu

[0caea8]: / memcpy-perf / memcpy-perf.cpp  Maximize  Restore  History

Download this file

106 lines (93 with data), 3.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
#include <iostream>
#include <chrono>
#include <vector>
#include <algorithm>
#include <numeric>
#include <stdlib.h>
#include <memory>
#include <cmath>
#include <string>
using namespace std;
const size_t size_start = 64;
const size_t size_end = 16 * (1ull << 20);
const size_t samples = 2048;
size_t size_per_test = 64 * (1ull << 20);
size_t tot_sum = 0;
void __attribute__((noinline)) memcpy_noinline(void *dst, void *src, size_t size);
void __attribute__((noinline)) memset_noinline(void *dst, int value, size_t size);
uint64_t __attribute__((noinline)) sum(volatile void *src, size_t size);
enum BenchType {
MemcpyBench,
MemsetBench,
SumBench,
};
int main(int argc, char *argv[])
{
BenchType type;
if (argc <= 1) {
cerr << "memcpy_perf [--memcpy|--memset|--sum]" << endl;
return 0;
}
if (string(argv[1]) == string("--memcpy")) {
type = MemcpyBench;
} else if (string(argv[1]) == string("--memset")) {
type = MemsetBench;
} else if (string(argv[1]) == string("--sum")) {
type = SumBench;
} else {
type = MemcpyBench;
}
unique_ptr<uint8_t[]> src(new uint8_t[size_end]);
unique_ptr<uint8_t[]> dst(new uint8_t[size_end]);
memset(src.get(), 1, size_end);
double start_pow = log10(size_start);
double end_pow = log10(size_end);
double pow_inc = (end_pow - start_pow) / samples;
//cout << "src: " << (uintptr_t)src.get() << endl;
//cout << "dst: " << (uintptr_t)dst.get() << endl;
for (double cur_pow = start_pow; cur_pow <= end_pow; cur_pow += pow_inc) {
chrono::time_point<chrono::high_resolution_clock> copy_start, copy_end;
size_t cur_size = (size_t)pow(10.0, cur_pow);
size_t iter_per_size = size_per_test / cur_size;
// run benchmark
switch (type) {
case MemsetBench: {
memcpy_noinline(src.get(), dst.get(), cur_size);
memset_noinline(dst.get(), 0xdeadbeef, cur_size);
copy_start = chrono::high_resolution_clock::now();
for (int i = 0; i < iter_per_size; i++) {
memset_noinline(dst.get(), 0xdeadbeef, cur_size);
}
copy_end = chrono::high_resolution_clock::now();
break;
}
case MemcpyBench: {
memcpy_noinline(dst.get(), src.get(), cur_size);
memcpy_noinline(src.get(), dst.get(), cur_size);
copy_start = chrono::high_resolution_clock::now();
for (int i = 0; i < iter_per_size; i++) {
memcpy_noinline(dst.get(), src.get(), cur_size);
}
copy_end = chrono::high_resolution_clock::now();
break;
}
case SumBench: {
uint64_t s = 0;
s += sum(src.get(), cur_size);
copy_start = chrono::high_resolution_clock::now();
for (int i = 0; i < iter_per_size; i++) {
s += sum(src.get(), cur_size);
}
copy_end = chrono::high_resolution_clock::now();
tot_sum += s;
break;
}
}
double ns_per_copy = chrono::duration_cast<chrono::nanoseconds>(copy_end - copy_start).count() / double(iter_per_size);
double gb_per_sec = ((double)cur_size / (1ull<<30)) / (ns_per_copy / 1.0E9);
if (type == MemcpyBench)
gb_per_sec *= 2.0;
cout << "size: " << cur_size << ", perf: " << gb_per_sec << "GB/s, iter: " << iter_per_size << endl;
}
return 0;
}
Want the latest updates on software, tech news, and AI?
Get latest updates about software, tech news, and AI from SourceForge directly in your inbox once a month.