Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save primiano/f468a8d77de9a56e347032adb2bf1fe3 to your computer and use it in GitHub Desktop.
Save primiano/f468a8d77de9a56e347032adb2bf1fe3 to your computer and use it in GitHub Desktop.
std::vector data() vs operator[] vs push_back perftests
$ sudo cpupower frequency-set -g powersave
$ sudo sh -c "echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
$ for cpu in /sys/devices/system/cpu/cpu*; do for p in $(seq 4); do sudo sh -c "echo 1 > $cpu/cpuidle/state$p/disable"; done; do
#########################
# F1, use data() directly
#########################
$ perf stat out/offlnx/ctest 1
Performance counter stats for 'out/offlnx/ctest 1':
266.453700 task-clock (msec) # 0.999 CPUs utilized
1 context-switches # 0.004 K/sec
0 cpu-migrations # 0.000 K/sec
98,038 page-faults # 0.368 M/sec
744,248,127 cycles # 2.793 GHz
582,032,556 stalled-cycles-frontend # 78.20% frontend cycles idle
<not supported> stalled-cycles-backend
393,796,470 instructions # 0.53 insns per cycle
# 1.48 stalled cycles per insn
55,991,312 branches # 210.135 M/sec
88,627 branch-misses # 0.16% of all branches
0.266820699 seconds time elapsed
#############################
# F2: use vector's operator[]
#############################
$ perf stat out/offlnx/ctest 2
Performance counter stats for 'out/offlnx/ctest 2':
291.833991 task-clock (msec) # 0.998 CPUs utilized
0 context-switches # 0.000 K/sec
0 cpu-migrations # 0.000 K/sec
98,038 page-faults # 0.336 M/sec
815,173,254 cycles # 2.793 GHz
567,599,853 stalled-cycles-frontend # 69.63% frontend cycles idle
<not supported> stalled-cycles-backend
636,500,963 instructions # 0.78 insns per cycle
# 0.89 stalled cycles per insn
65,117,962 branches # 223.134 M/sec
88,464 branch-misses # 0.14% of all branches
0.292292323 seconds time elapsed
############################
# F3: use vector's push_back
############################
$ perf stat out/offlnx/ctest 3
Performance counter stats for 'out/offlnx/ctest 3':
521.080049 task-clock (msec) # 0.999 CPUs utilized
0 context-switches # 0.000 K/sec
0 cpu-migrations # 0.000 K/sec
98,034 page-faults # 0.188 M/sec
1,307,208,215 cycles # 2.509 GHz
815,618,071 stalled-cycles-frontend # 62.39% frontend cycles idle
<not supported> stalled-cycles-backend
1,568,229,826 instructions # 1.20 insns per cycle
# 0.52 stalled cycles per insn
346,674,880 branches # 665.301 M/sec
78,168 branch-misses # 0.02% of all branches
0.521667218 seconds time elapsed
#include <stdio.h>
#include <stdlib.h>
#include <vector>
const int N = 100000000;
#define EXP __attribute__((noinline)) __attribute__ ((visibility("default")))
void EXP F1(std::vector<int>* v) {
v->resize(N);
int* data = v->data();
for (int i = 0; i < N; ++i) {
data[i] = i;
}
}
void EXP F2(std::vector<int>* v) {
v->resize(N);
for (int i = 0; i < N; ++i) {
(*v)[i] = i;
}
}
void EXP F3(std::vector<int>* v) {
v->reserve(N);
for (int i = 0; i < N; ++i)
v->push_back(i);
}
int main(int argc, char** argv) {
std::vector<int> v;
switch(atoi(argv[1])) {
case 1:
F1(&v);
break;
case 2:
F2(&v);
break;
case 3:
F3(&v);
break;
default:
abort();
}
return v.size();
}
$ cat out/offlnx/args.gn
is_component_build = false
is_debug = false
symbol_level=2
is_official_build = true
enable_nacl = false
allow_posix_link_time_opt = false
../../third_party/llvm-build/Release+Asserts/bin/clang++ -MMD -MF obj/ctest/ctest.o.d -DV8_DEPRECATION_WARNINGS -DUSE_UDEV -DUSE_AURA=1 -DUSE_PANGO=1 -DUSE_CAIRO=1 -DUSE_GLIB=1 -DUSE_NSS_CERTS=1 -DUSE_X11=1 -DDISABLE_NACL -DFULL_SAFE_BROWSING -DSAFE_BROWSING_CSD -DSAFE_BROWSING_DB_LOCAL -DOFFICIAL_BUILD -DCHROMIUM_BUILD -DFIELDTRIAL_TESTING_ENABLED -DCR_CLANG_REVISION=\"303910-1\" -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D_FORTIFY_SOURCE=2 -DNDEBUG -DNVALGRIND -DDYNAMIC_ANNOTATIONS_ENABLED=0 -DGLIB_VERSION_MAX_ALLOWED=GLIB_VERSION_2_32 -DGLIB_VERSION_MIN_REQUIRED=GLIB_VERSION_2_26 -I../.. -Igen -I../../build/linux/debian_jessie_amd64-sysroot/usr/include/glib-2.0 -I../../build/linux/debian_jessie_amd64-sysroot/usr/lib/x86_64-linux-gnu/glib-2.0/include -fno-strict-aliasing --param=ssp-buffer-size=4 -fstack-protector -funwind-tables -fPIC -pipe -B../../third_party/binutils/Linux_x64/Release/bin -fcolor-diagnostics -m64 -march=x86-64 -pthread -Wall -Werror -Wextra -Wno-missing-field-initializers -Wno-unused-parameter -Wno-c++11-narrowing -Wno-covered-switch-default -Wno-unneeded-internal-declaration -Wno-inconsistent-missing-override -Wno-undefined-var-template -Wno-nonportable-include-path -Wno-address-of-packed-member -Wno-unused-lambda-capture -Wno-user-defined-warnings -O2 -fno-ident -fdata-sections -ffunction-sections -fomit-frame-pointer -g2 --sysroot=../../build/linux/debian_jessie_amd64-sysroot -fvisibility=hidden -Xclang -load -Xclang ../../third_party/llvm-build/Release+Asserts/lib/libFindBadConstructs.so -Xclang -add-plugin -Xclang find-bad-constructs -Xclang -plugin-arg-find-bad-constructs -Xclang check-auto-raw-pointer -Xclang -plugin-arg-find-bad-constructs -Xclang check-ipc -Wheader-hygiene -Wstring-conversion -Wtautological-overlap-compare -std=gnu++11 -fno-rtti -fno-exceptions -fvisibility-inlines-hidden -c ../../ctest.cc -o obj/ctest/ctest.o
Dump of assembler code for function F1(std::vector<int, std::allocator<int> >*):
0x00000000004084a0 <+0>: push %rbx
0x00000000004084a1 <+1>: mov %rdi,%rbx
0x00000000004084a4 <+4>: mov (%rbx),%rax
0x00000000004084a7 <+7>: mov 0x8(%rbx),%rcx
0x00000000004084ab <+11>: sub %rax,%rcx
0x00000000004084ae <+14>: sar $0x2,%rcx
0x00000000004084b2 <+18>: cmp $0x5f5e0ff,%rcx
0x00000000004084b9 <+25>: ja 0x4084d0 <F1(std::vector<int, std::allocator<int> >*)+48>
0x00000000004084bb <+27>: mov $0x5f5e100,%esi
0x00000000004084c0 <+32>: sub %rcx,%rsi
0x00000000004084c3 <+35>: mov %rbx,%rdi
0x00000000004084c6 <+38>: callq 0x4088e0 <std::vector<int, std::allocator<int> >::_M_default_append(unsigned long)>
0x00000000004084cb <+43>: mov (%rbx),%rax
0x00000000004084ce <+46>: jmp 0x4084e7 <F1(std::vector<int, std::allocator<int> >*)+71>
0x00000000004084d0 <+48>: cmp $0x5f5e100,%rcx
0x00000000004084d7 <+55>: je 0x4084e7 <F1(std::vector<int, std::allocator<int> >*)+71>
0x00000000004084d9 <+57>: mov %rax,%rcx
0x00000000004084dc <+60>: add $0x17d78400,%rcx
0x00000000004084e3 <+67>: mov %rcx,0x8(%rbx)
0x00000000004084e7 <+71>: movdqa 0x12981(%rip),%xmm0 # 0x41ae70
0x00000000004084ef <+79>: mov $0x24,%ecx
0x00000000004084f4 <+84>: movdqa 0x12983(%rip),%xmm8 # 0x41ae80
0x00000000004084fd <+93>: movdqa 0x1298a(%rip),%xmm9 # 0x41ae90
0x0000000000408506 <+102>: movdqa 0x12991(%rip),%xmm10 # 0x41aea0
0x000000000040850f <+111>: movdqa 0x12998(%rip),%xmm11 # 0x41aeb0
0x0000000000408518 <+120>: movdqa 0x1299f(%rip),%xmm12 # 0x41aec0
0x0000000000408521 <+129>: movdqa 0x129a7(%rip),%xmm6 # 0x41aed0
0x0000000000408529 <+137>: movdqa 0x129af(%rip),%xmm7 # 0x41aee0
0x0000000000408531 <+145>: movdqa 0x129b7(%rip),%xmm1 # 0x41aef0
0x0000000000408539 <+153>: movdqa 0x129bf(%rip),%xmm2 # 0x41af00
0x0000000000408541 <+161>: movdqa 0x129c7(%rip),%xmm3 # 0x41af10
0x0000000000408549 <+169>: nopl 0x0(%rax)
0x0000000000408550 <+176>: movdqa %xmm0,%xmm4
0x0000000000408554 <+180>: paddd %xmm8,%xmm4
0x0000000000408559 <+185>: movdqu %xmm0,-0x90(%rax,%rcx,4)
0x0000000000408562 <+194>: movdqu %xmm4,-0x80(%rax,%rcx,4)
0x0000000000408568 <+200>: movdqa %xmm0,%xmm4
0x000000000040856c <+204>: paddd %xmm9,%xmm4
0x0000000000408571 <+209>: movdqa %xmm0,%xmm5
0x0000000000408575 <+213>: paddd %xmm10,%xmm5
0x000000000040857a <+218>: movdqu %xmm4,-0x70(%rax,%rcx,4)
0x0000000000408580 <+224>: movdqu %xmm5,-0x60(%rax,%rcx,4)
0x0000000000408586 <+230>: movdqa %xmm0,%xmm4
0x000000000040858a <+234>: paddd %xmm11,%xmm4
0x000000000040858f <+239>: movdqa %xmm0,%xmm5
0x0000000000408593 <+243>: paddd %xmm12,%xmm5
0x0000000000408598 <+248>: movdqu %xmm4,-0x50(%rax,%rcx,4)
0x000000000040859e <+254>: movdqu %xmm5,-0x40(%rax,%rcx,4)
0x00000000004085a4 <+260>: movdqa %xmm0,%xmm4
0x00000000004085a8 <+264>: paddd %xmm6,%xmm4
0x00000000004085ac <+268>: movdqa %xmm0,%xmm5
0x00000000004085b0 <+272>: paddd %xmm7,%xmm5
0x00000000004085b4 <+276>: movdqu %xmm4,-0x30(%rax,%rcx,4)
0x00000000004085ba <+282>: movdqu %xmm5,-0x20(%rax,%rcx,4)
0x00000000004085c0 <+288>: movdqa %xmm0,%xmm4
0x00000000004085c4 <+292>: paddd %xmm1,%xmm4
0x00000000004085c8 <+296>: movdqa %xmm0,%xmm5
0x00000000004085cc <+300>: paddd %xmm2,%xmm5
0x00000000004085d0 <+304>: movdqu %xmm4,-0x10(%rax,%rcx,4)
0x00000000004085d6 <+310>: movdqu %xmm5,(%rax,%rcx,4)
0x00000000004085db <+315>: paddd %xmm3,%xmm0
0x00000000004085df <+319>: add $0x28,%rcx
0x00000000004085e3 <+323>: cmp $0x5f5e124,%rcx
0x00000000004085ea <+330>: jne 0x408550 <F1(std::vector<int, std::allocator<int> >*)+176>
0x00000000004085f0 <+336>: pop %rbx
0x00000000004085f1 <+337>: retq
Dump of assembler code for function F2(std::vector<int, std::allocator<int> >*):
0x0000000000408600 <+0>: push %rbx
0x0000000000408601 <+1>: mov %rdi,%rbx
0x0000000000408604 <+4>: mov (%rbx),%rax
0x0000000000408607 <+7>: mov 0x8(%rbx),%rcx
0x000000000040860b <+11>: sub %rax,%rcx
0x000000000040860e <+14>: sar $0x2,%rcx
0x0000000000408612 <+18>: cmp $0x5f5e0ff,%rcx
0x0000000000408619 <+25>: ja 0x40862d <F2(std::vector<int, std::allocator<int> >*)+45>
0x000000000040861b <+27>: mov $0x5f5e100,%esi
0x0000000000408620 <+32>: sub %rcx,%rsi
0x0000000000408623 <+35>: mov %rbx,%rdi
0x0000000000408626 <+38>: callq 0x4088e0 <std::vector<int, std::allocator<int> >::_M_default_append(unsigned long)>
0x000000000040862b <+43>: jmp 0x408640 <F2(std::vector<int, std::allocator<int> >*)+64>
0x000000000040862d <+45>: cmp $0x5f5e100,%rcx
0x0000000000408634 <+52>: je 0x408640 <F2(std::vector<int, std::allocator<int> >*)+64>
0x0000000000408636 <+54>: add $0x17d78400,%rax
0x000000000040863c <+60>: mov %rax,0x8(%rbx)
0x0000000000408640 <+64>: xor %eax,%eax
0x0000000000408642 <+66>: data16 data16 data16 data16 nopw %cs:0x0(%rax,%rax,1)
0x0000000000408650 <+80>: mov (%rbx),%rcx
0x0000000000408653 <+83>: mov %eax,(%rcx,%rax,4)
0x0000000000408656 <+86>: mov (%rbx),%rcx
0x0000000000408659 <+89>: lea 0x1(%rax),%edx
0x000000000040865c <+92>: mov %edx,0x4(%rcx,%rax,4)
0x0000000000408660 <+96>: mov (%rbx),%rcx
0x0000000000408663 <+99>: lea 0x2(%rax),%edx
0x0000000000408666 <+102>: mov %edx,0x8(%rcx,%rax,4)
0x000000000040866a <+106>: mov (%rbx),%rcx
0x000000000040866d <+109>: lea 0x3(%rax),%edx
0x0000000000408670 <+112>: mov %edx,0xc(%rcx,%rax,4)
0x0000000000408674 <+116>: mov (%rbx),%rcx
0x0000000000408677 <+119>: lea 0x4(%rax),%edx
0x000000000040867a <+122>: mov %edx,0x10(%rcx,%rax,4)
0x000000000040867e <+126>: mov (%rbx),%rcx
0x0000000000408681 <+129>: lea 0x5(%rax),%edx
0x0000000000408684 <+132>: mov %edx,0x14(%rcx,%rax,4)
0x0000000000408688 <+136>: mov (%rbx),%rcx
0x000000000040868b <+139>: lea 0x6(%rax),%edx
0x000000000040868e <+142>: mov %edx,0x18(%rcx,%rax,4)
0x0000000000408692 <+146>: mov (%rbx),%rcx
0x0000000000408695 <+149>: lea 0x7(%rax),%edx
0x0000000000408698 <+152>: mov %edx,0x1c(%rcx,%rax,4)
0x000000000040869c <+156>: add $0x8,%rax
0x00000000004086a0 <+160>: cmp $0x5f5e100,%rax
0x00000000004086a6 <+166>: jne 0x408650 <F2(std::vector<int, std::allocator<int> >*)+80>
0x00000000004086a8 <+168>: pop %rbx
0x00000000004086a9 <+169>: retq
Dump of assembler code for function F3(std::vector<int, std::allocator<int> >*):
0x00000000004086b0 <+0>: push %rbp
0x00000000004086b1 <+1>: push %r15
0x00000000004086b3 <+3>: push %r14
0x00000000004086b5 <+5>: push %r13
0x00000000004086b7 <+7>: push %r12
0x00000000004086b9 <+9>: push %rbx
0x00000000004086ba <+10>: sub $0x18,%rsp
0x00000000004086be <+14>: mov %rdi,%r8
0x00000000004086c1 <+17>: lea 0x10(%r8),%r13
0x00000000004086c5 <+21>: mov (%r8),%r15
0x00000000004086c8 <+24>: mov 0x10(%r8),%rbx
0x00000000004086cc <+28>: mov %rbx,%rax
0x00000000004086cf <+31>: sub %r15,%rax
0x00000000004086d2 <+34>: sar $0x2,%rax
0x00000000004086d6 <+38>: cmp $0x5f5e100,%rax
0x00000000004086dc <+44>: mov %r13,0x10(%rsp)
0x00000000004086e1 <+49>: jae 0x408743 <F3(std::vector<int, std::allocator<int> >*)+147>
0x00000000004086e3 <+51>: mov %r8,%r14
0x00000000004086e6 <+54>: mov 0x8(%r8),%r12
0x00000000004086ea <+58>: mov %r12,%rbp
0x00000000004086ed <+61>: sub %r15,%rbp
0x00000000004086f0 <+64>: mov $0x17d78400,%edi
0x00000000004086f5 <+69>: callq 0x4131e0 <operator new(unsigned long)>
0x00000000004086fa <+74>: mov %rax,%rbx
0x00000000004086fd <+77>: sub %r15,%r12
0x0000000000408700 <+80>: je 0x408710 <F3(std::vector<int, std::allocator<int> >*)+96>
0x0000000000408702 <+82>: mov %rbx,%rdi
0x0000000000408705 <+85>: mov %r15,%rsi
0x0000000000408708 <+88>: mov %r12,%rdx
0x000000000040870b <+91>: callq 0x407d70 <memcpy@plt>
0x0000000000408710 <+96>: sar $0x2,%rbp
0x0000000000408714 <+100>: mov %r14,%r8
0x0000000000408717 <+103>: mov (%r8),%rdi
0x000000000040871a <+106>: test %rdi,%rdi
0x000000000040871d <+109>: je 0x408727 <F3(std::vector<int, std::allocator<int> >*)+119>
0x000000000040871f <+111>: callq 0x413270 <(anonymous namespace)::GlibcFreeHook(void*, void const*)>
0x0000000000408724 <+116>: mov %r14,%r8
0x0000000000408727 <+119>: lea 0x8(%r8),%r12
0x000000000040872b <+123>: mov %rbx,(%r8)
0x000000000040872e <+126>: lea (%rbx,%rbp,4),%rax
0x0000000000408732 <+130>: mov %rax,0x8(%r8)
0x0000000000408736 <+134>: add $0x17d78400,%rbx
0x000000000040873d <+141>: mov %rbx,0x10(%r8)
0x0000000000408741 <+145>: jmp 0x408747 <F3(std::vector<int, std::allocator<int> >*)+151>
0x0000000000408743 <+147>: lea 0x8(%r8),%r12
0x0000000000408747 <+151>: xor %ebp,%ebp
0x0000000000408749 <+153>: jmp 0x408756 <F3(std::vector<int, std::allocator<int> >*)+166>
0x000000000040874b <+155>: nopl 0x0(%rax,%rax,1)
0x0000000000408750 <+160>: inc %ebp
0x0000000000408752 <+162>: mov 0x0(%r13),%rbx
0x0000000000408756 <+166>: mov (%r12),%rdx
0x000000000040875a <+170>: cmp %rbx,%rdx
0x000000000040875d <+173>: je 0x408780 <F3(std::vector<int, std::allocator<int> >*)+208>
0x000000000040875f <+175>: mov %ebp,(%rdx)
0x0000000000408761 <+177>: mov (%r12),%rax
0x0000000000408765 <+181>: add $0x4,%rax
0x0000000000408769 <+185>: mov %r12,%rcx
0x000000000040876c <+188>: jmpq 0x408838 <F3(std::vector<int, std::allocator<int> >*)+392>
0x0000000000408771 <+193>: data16 data16 data16 data16 data16 nopw %cs:0x0(%rax,%rax,1)
0x0000000000408780 <+208>: mov (%r8),%r15
0x0000000000408783 <+211>: mov %rdx,%rcx
0x0000000000408786 <+214>: sub %r15,%rcx
0x0000000000408789 <+217>: sar $0x2,%rcx
0x000000000040878d <+221>: mov $0x1,%esi
0x0000000000408792 <+226>: cmovne %rcx,%rsi
0x0000000000408796 <+230>: lea (%rsi,%rcx,1),%r13
0x000000000040879a <+234>: mov %r13,%rax
0x000000000040879d <+237>: shr $0x3e,%rax
0x00000000004087a1 <+241>: movabs $0x3fffffffffffffff,%rax
0x00000000004087ab <+251>: cmovne %rax,%r13
0x00000000004087af <+255>: add %rcx,%rsi
0x00000000004087b2 <+258>: cmovb %rax,%r13
0x00000000004087b6 <+262>: test %r13,%r13
0x00000000004087b9 <+265>: mov %r8,0x8(%rsp)
0x00000000004087be <+270>: je 0x4087e7 <F3(std::vector<int, std::allocator<int> >*)+311>
0x00000000004087c0 <+272>: cmp %rax,%r13
0x00000000004087c3 <+275>: ja 0x408856 <F3(std::vector<int, std::allocator<int> >*)+422>
0x00000000004087c9 <+281>: lea 0x0(,%r13,4),%rdi
0x00000000004087d1 <+289>: callq 0x4131e0 <operator new(unsigned long)>
0x00000000004087d6 <+294>: mov %rax,%rbx
0x00000000004087d9 <+297>: mov 0x8(%rsp),%r8
0x00000000004087de <+302>: mov (%r8),%r15
0x00000000004087e1 <+305>: mov 0x8(%r8),%rdx
0x00000000004087e5 <+309>: jmp 0x4087e9 <F3(std::vector<int, std::allocator<int> >*)+313>
0x00000000004087e7 <+311>: xor %ebx,%ebx
0x00000000004087e9 <+313>: sub %r15,%rdx
0x00000000004087ec <+316>: mov %rdx,%rax
0x00000000004087ef <+319>: sar $0x2,%rax
0x00000000004087f3 <+323>: lea (%rbx,%rdx,1),%r14
0x00000000004087f7 <+327>: mov %ebp,(%rbx,%rdx,1)
0x00000000004087fa <+330>: test %rax,%rax
0x00000000004087fd <+333>: je 0x40880f <F3(std::vector<int, std::allocator<int> >*)+351>
0x00000000004087ff <+335>: mov %rbx,%rdi
0x0000000000408802 <+338>: mov %r15,%rsi
0x0000000000408805 <+341>: callq 0x407d80 <memmove@plt>
0x000000000040880a <+346>: mov 0x8(%rsp),%r8
0x000000000040880f <+351>: add $0x4,%r14
0x0000000000408813 <+355>: test %r15,%r15
0x0000000000408816 <+358>: je 0x408825 <F3(std::vector<int, std::allocator<int> >*)+373>
0x0000000000408818 <+360>: mov %r15,%rdi
0x000000000040881b <+363>: callq 0x413270 <(anonymous namespace)::GlibcFreeHook(void*, void const*)>
0x0000000000408820 <+368>: mov 0x8(%rsp),%r8
0x0000000000408825 <+373>: mov %rbx,(%r8)
0x0000000000408828 <+376>: mov %r14,(%r12)
0x000000000040882c <+380>: lea (%rbx,%r13,4),%rax
0x0000000000408830 <+384>: mov 0x10(%rsp),%r13
0x0000000000408835 <+389>: mov %r13,%rcx
0x0000000000408838 <+392>: mov %rax,(%rcx)
0x000000000040883b <+395>: cmp $0x5f5e0fe,%ebp
0x0000000000408841 <+401>: jle 0x408750 <F3(std::vector<int, std::allocator<int> >*)+160>
0x0000000000408847 <+407>: add $0x18,%rsp
0x000000000040884b <+411>: pop %rbx
0x000000000040884c <+412>: pop %r12
0x000000000040884e <+414>: pop %r13
0x0000000000408850 <+416>: pop %r14
0x0000000000408852 <+418>: pop %r15
0x0000000000408854 <+420>: pop %rbp
0x0000000000408855 <+421>: retq
0x0000000000408856 <+422>: callq 0x407d90 <_ZSt17__throw_bad_allocv@plt>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment