Stabilizer: Statistically Sound Performance Evaluation

6,764 views
6,967 views

Published on

0 Comments
1 Like
Statistics
Notes
  • Be the first to comment

No Downloads
Views
Total views
6,764
On SlideShare
0
From Embeds
0
Number of Embeds
5,714
Actions
Shares
0
Downloads
0
Comments
0
Likes
1
Embeds 0
No embeds

No notes for slide

Stabilizer: Statistically Sound Performance Evaluation

  1. 1. STABILIZER[ASPLOS 2013]Emery Berger, Charlie CurtsingerStatistically SoundPerformance Evaluation
  2. 2. We all care about performance evaluationWe’ve been doing it wrongSTABILIZERRepeated runs and error bars not enoughWe’re not measuring what we thought
  3. 3. changing a program changes its layoutSTABILIZERMemory layout affects performanceSTABILIZER eliminates the effect of layoutno way to measure effect of change in isolationevaluation of LLVM’s optimizations with STABILIZERCase Studiesenables sound performance evaluationWe’ve been doing it wrong
  4. 4. STABILIZERWe’ve been doing it wrongchanging a program changes its layoutMemory layout affects performanceSTABILIZER eliminates the effect of layoutno way to measure effect of change in isolationevaluation of LLVM’s optimizations with STABILIZERCase Studiesenables sound performance evaluation
  5. 5. AUnsound performanceevaluationint main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}asm("isync");}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}
  6. 6. int main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}AUnsound performanceevaluationstatic void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}asm("isync");}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}
  7. 7. int main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}AUnsound performanceevaluationstatic void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;size_t meaning_of_life=42;for (size_t i = 0; i < size; i += 32) {asm("icbi 0,%0" : : "r"(p));p += 32;}for (size_t i = 16; i < size; i += 32) {asm("icbi 0,%0" : : "r"(p));p += 32;}asm("isync");}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}
  8. 8. int main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}AUnsound performanceevaluationstatic void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;size_t meaning_of_life=42;for (size_t i = 0; i < size; i += 32) {asm("icbi 0,%0" : : "r"(p));p += 32;}for (size_t i = 16; i < size; i += 32) {asm("icbi 0,%0" : : "r"(p));p += 32;}asm("isync");}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}
  9. 9. int main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}AUnsound performanceevaluationDataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;size_t meaning_of_life=42;for (size_t i = 0; i < size; i += 32) {asm("icbi 0,%0" : : "r"(p));p += 32;}for (size_t i = 16; i < size; i += 32) {asm("icbi 0,%0" : : "r"(p));p += 32;}asm("isync");}
  10. 10. int main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}AUnsound performanceevaluationDataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;size_t meaning_of_life=42;for (size_t i = 0; i < size; i += 32) {asm("icbi 0,%0" : : "r"(p));p += 32;}for (size_t i = 16; i < size; i += 32) {asm("icbi 0,%0" : : "r"(p));p += 32;}asm("isync");}
  11. 11. A′int main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}AUnsound performanceevaluationDataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;size_t meaning_of_life=42;for (size_t i = 0; i < size; i += 32) {asm("icbi 0,%0" : : "r"(p));p += 32;}for (size_t i = 16; i < size; i += 32) {asm("icbi 0,%0" : : "r"(p));p += 32;}asm("isync");}
  12. 12. A′AUnsound performanceevaluationint main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;size_t meaning_of_life=42;for (size_t i = 0; i < size; i += 32) {asm("icbi 0,%0" : : "r"(p));p += 32;}for (size_t i = 16; i < size; i += 32) {asm("icbi 0,%0" : : "r"(p));p += 32;}asm("isync");}
  13. 13. 0.000.250.500.751.0085.0 87.5 90.0 92.5 95.0Time (s)NumberofrunsVersionAAA′A ×1 ×1Which is faster?
  14. 14. 0.000.250.500.751.0085.0 87.5 90.0 92.5 95.0Time (s)NumberofrunsVersionAAIs faster than ?A′ A
  15. 15. 0.000.250.500.751.0085.0 87.5 90.0 92.5 95.0Time (s)NumberofrunsVersionAAIs faster than ?A′ A
  16. 16. 0.000.250.500.751.0085.0 87.5 90.0 92.5 95.0Time (s)NumberofrunsVersionAAIs faster than ?A′ A
  17. 17. 0.000.250.500.751.0085.0 87.5 90.0 92.5 95.0Time (s)NumberofrunsVersionAAIs faster than ?A′ Awhat aboutvariance?2.8% faster
  18. 18. 05101585.0 87.5 90.0 92.5 95.0Time (s)NumberofrunsVersionAAWhich is faster?A′A ×30 ×30
  19. 19. 05101585.0 87.5 90.0 92.5 95.0Time (s)NumberofrunsVersionAAIs faster than ?A′ Astill2.8% faster
  20. 20. Why is faster than ?A′ A
  21. 21. Why is faster than ?A′ Aint main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}int main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}asm("isync");}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}
  22. 22. Why is faster than ?A′ Aint main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}int main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}asm("isync");}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}Was it thecode change?
  23. 23. Why is faster than ?A′ Aint main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}int main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}asm("isync");}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}Or was it thenew layout?
  24. 24. Why is faster than ?A′ Aint main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}int main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}asm("isync");}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}Mytkowicz et al. (ASPLOS’09)Layout biases measurement
  25. 25. Layout biases measurementMytkowicz et al. (ASPLOS’09)Link OrderEnvironmentVariable SizeChanges function addressesMoves the program stack
  26. 26. Layout biases measurementMytkowicz et al. (ASPLOS’09)Link OrderEnvironmentVariable SizeChanges function addressesMoves the program stackLarger thanimpact of -O3
  27. 27. Blame the cacheint main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}asm("isync");}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}A
  28. 28. Blame the cacheAint main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}asm("isync");}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}int main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}asm("isync");}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}conflictmap to samecache set
  29. 29. A′Blame the cacheAint main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}int main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}asm("isync");}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}
  30. 30. Blame the cacheAint main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}int main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}asm("isync");}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}map tosame setA′Nothing hereno conflict
  31. 31. A′Blame the cacheAint main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}int main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}asm("isync");}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}or branch predictoror TLBor prefetcheror branchtarget predictor
  32. 32. Blame the hashAint main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}int main(int argc, char **argv) {topFrame = (void**)__builtin_frame_address(0);setHandler(Trap::TrapSignal, onTrap);setHandler(SIGALRM, onTimer);setHandler(SIGSEGV, onFault);for(Function* f: functions) {f->setTrap();}setTimer(interval);int r = stabilizer_main(argc, argv);return r;}void setTimer(int msec) {struct itimerval timer;timer.it_value.tv_sec = (msec - msec % 1000) / 1000;timer.it_value.tv_usec = 1000 * (msec % 1000);timer.it_interval.tv_sec = 0;timer.it_interval.tv_usec = 0;setitimer(ITIMER_REAL, &timer, 0);}static void flush_icache(void* begin, size_t size) {uintptr_t p = (uintptr_t)begin & ~15UL;for (size_t i = 0; i < size; i += 16) {asm("icbi 0,%0" : : "r"(p));p += 16;}asm("isync");}DataHeapType* getDataHeap() {static char buf[sizeof(DataHeapType)];static DataHeapType* _theDataHeap = new (buf) DataHeapType;return _theDataHeap;}A′
  33. 33. it’s faster it’s fasterit’s fasterit’s fasterIs faster than ?A′ ALet’s do a poll
  34. 34. Do we trust this?it’s faster it’s fasterit’s fasterit’s faster
  35. 35. it’s faster it’s slower they’re the sameIs faster than ?A′ A
  36. 36. it’s faster it’s slower they’re the sameBut it ran faster!What if we only talk to Bob?
  37. 37. it’s fasterBut it ran faster!What if we only use this layout?it’s slower they’re the same
  38. 38. it’s fasterBut it ran faster!What if we only use this layout?
  39. 39. it’s fasterBut it ran faster!What if we only use this layout?Upgrade libcChanges layout
  40. 40. it’s fasterBut it ran faster!What if we only use this layout?Change UsernameChanges layout
  41. 41. Layout is Brittleit’s fasterBut it ran faster!What if we only use this layout?Run in a newdirectoryChanges layout
  42. 42. Layout is BrittleBut it ran faster!What if we only use this layout?Layout biases measurementMytkowicz et al. (ASPLOS’09)Can we eliminate theeffect of layout?
  43. 43. But it ran faster!What if we only use this layout?Layout biases measurementCan we eliminate theeffect of layout?YES
  44. 44. STABILIZERMemory layout affects performanceSTABILIZER eliminates the effect of layoutenables sound performance evaluationevaluation of LLVM’s optimizations with STABILIZERCase Studiesmakes performance evaluation difficult
  45. 45. STABILIZERMemory layout affects performanceSTABILIZER eliminates the effect of layoutenables sound performance evaluationevaluation of LLVM’s optimizations with STABILIZERCase Studiesmakes performance evaluation difficult
  46. 46. Layout biases measurementSTABILIZERfunction addresses stack frame sizesheap allocationsrandomizes layout
  47. 47. STABILIZERrandomizes layoutfunction addresses stack frame sizesheap allocationsrepeatedlyLayout biases measurementduringexecution
  48. 48. STABILIZERrandomizes layoutfunction addresses stack frame sizesheap allocationsrepeatedlyLayout biases measurementa completely random layoutcannot bias results
  49. 49. A′A ×30 ×30Sound PerformanceEvaluation
  50. 50. A ×30 ×300%10%20%30%40%85.0 87.5 90.0 92.5 95.0Time (s)PercentofObservedRuntimesA′Sound PerformanceEvaluation
  51. 51. Is faster than ?A′ A0%10%20%30%40%85.0 87.5 90.0 92.5 95.0Time (s)PercentofObservedRuntimes
  52. 52. Is faster than ?A0%10%20%30%40%85.0 87.5 90.0 92.5 95.0Time (s)PercentofObservedRuntimesA′
  53. 53. Is faster than ?A0%10%20%30%40%85.0 87.5 90.0 92.5 95.0Time (s)PercentofObservedRuntimesA′
  54. 54. Is faster than ?A0%10%20%30%40%85.0 87.5 90.0 92.5 95.0Time (s)PercentofObservedRuntimesA′
  55. 55. AA′what is the probability of measuringa speedup this large by chance?The Statistical ApproachIf =hypothesis testing
  56. 56. what is the probability of measuringa speedup this large by chance?easy to compute forthe normal distributionAA′If =
  57. 57. easy to compute forthe normal distributionAA′If =
  58. 58. STABILIZERrandomizes layoutrepeatedly0%10%20%30%40%85.0 87.5 90.0 92.5 95.0Time (s)PercentofObservedRuntimeswhat is the probability of measuringa speedup this large by chance?if there is alow probability
  59. 59. STABILIZERrandomizes layoutrepeatedlythis speedup is real0%10%20%30%40%85.0 87.5 90.0 92.5 95.0Time (s)PercentofObservedRuntimesnot due to the effecton memory layout
  60. 60. this speedup is real0%10%20%30%40%85.0 87.5 90.0 92.5 95.0Time (s)PercentofObservedRuntimesSound PerformanceEvaluationSTABILIZERrandomizes layoutrepeatedlywhat doesre-randomization do?not due to the effecton memory layout
  61. 61. 0.00.20.4350 360 370 380 390 400Time (s)ProbabilityDensitySTABILIZERrandomizes layoutrepeatedlyone randomlayout per-run
  62. 62. STABILIZERrandomizes layoutrepeatedly0.00.20.4350 360 370 380 390 400Time (s)ProbabilityDensitymany randomlayouts in each runone randomlayout per-run
  63. 63. STABILIZER generates a newrandom layout every ½ secondTotal execution time isthe sum of all periodsSTABILIZERrandomizes layoutrepeatedly
  64. 64. STABILIZER generates a newrandom layout every ½ secondTotal execution time isthe sum of all periodsThe sum of a sufficient number ofindependent, identically distributed randomvariables is approximately normally distributed.STABILIZERrandomizes layoutrepeatedly
  65. 65. STABILIZER generates a newrandom layout every ½ secondTotal execution time isthe sum of all periodsThe sum of a sufficient number ofindependent, identically distributed randomvariables is approximately normally distributed.STABILIZERrandomizes layoutrepeatedly
  66. 66. STABILIZER generates a newrandom layout every ½ secondTotal execution time isthe sum of all periodsThe sum of a sufficient number ofindependent, identically distributed randomvariables is approximately normally distributed.STABILIZERrandomizes layoutrepeatedly
  67. 67. Central Limit Theoremexecution times arenormally distributedThe sum of a sufficient number ofindependent, identically distributed randomvariables is approximately normally distributed.
  68. 68. STABILIZERMemory layout affects performanceSTABILIZER eliminates the effect of layoutenables sound performance evaluationevaluation of LLVM’s optimizations with STABILIZERCase Studiesmakes performance evaluation difficult
  69. 69. STABILIZERmakes performance evaluation difficultMemory layout affects performanceSTABILIZER eliminates the effect of layoutenables sound performance evaluationevaluation of LLVM’s optimizations with STABILIZERCase Studies
  70. 70. Case Studieson each benchmarkacross the wholebenchmark suiteevaluation of LLVM’s optimizations with STABILIZERfirst, build benchmarks with STABILIZER
  71. 71. Build programs withSTABILIZER> szc main.c
  72. 72. > szc main.cBuild programs withSTABILIZER
  73. 73. > szc main.c-RcodeBuild programs withSTABILIZER
  74. 74. > szc main.c-Rcode -Rheap -RstackBuild programs withSTABILIZERnow run the benchmarks
  75. 75. 0%10%20%30%40%85.0 87.5 90.0 92.5 95.0Time (s)PercentofObservedRuntimesRun benchmarks as usualA′A ×30 ×30drop the results into R
  76. 76. Is faster than ?A0%10%20%30%40%85.0 87.5 90.0 92.5 95.0Time (s)PercentofObservedRuntimesA′
  77. 77. A′0%10%20%30%40%85.0 87.5 90.0 92.5 95.0Time (s)PercentofObservedRuntimesIf = A
  78. 78. If =A′ A
  79. 79. A′If = Awhat is the probability of measuring adifference at least this large?
  80. 80. what is the probability of measuring adifference at least this large?A′The Student’s t-testIf = At.test(times.A′, times.A)
  81. 81. what is the probability of measuring adifference at least this large?A′If = AThe Student’s t-testIf p-value
  82. 82. A′The Student’s t-testIf p-value ≤ 5%If = A95% Confidencewhat is the probability of measuring adifference at least this large?
  83. 83. If =A′ AThe Student’s t-testIf p-value ≤ 5%we reject the null hypothesiswhat is the probability of measuring adifference at least this large?
  84. 84. ≠ AThe Student’s t-testIf p-value ≤ 5%we reject the null hypothesisRandom chance not responsible forthe measured differenceA′
  85. 85. ≠ AThe difference is realA′
  86. 86. −10%0%10%20%libquantummilcbzip2sphinx3namdlbmperlbenchhmmerh264refcactusADMwrfsjenggobmkgromacszeusmpmcfgccastarSpeedupSignificantYesNoSpeedup of -O2 over -O1
  87. 87. −10%0%10%20%libquantummilcbzip2sphinx3namdlbmperlbenchhmmerh264refcactusADMwrfsjenggobmkgromacszeusmpmcfgccastarSpeedupSignificantYesNoSpeedup of -O2 over -O1
  88. 88. −10%0%10%20%libquantummilcbzip2sphinx3namdlbmperlbenchhmmerh264refcactusADMwrfsjenggobmkgromacszeusmpmcfgccastarSpeedupSignificantYesNoSpeedup of -O2 over -O1
  89. 89. −10%0%10%20%libquantummilcbzip2sphinx3namdlbmperlbenchhmmerh264refcactusADMwrfsjenggobmkgromacszeusmpmcfgccastarSpeedupSignificantYesNoSpeedup of -O2 over -O1
  90. 90. −10%0%10%20%bzip2gobmkzeusmplibquantumwrfastarmcfhmmermilcnamdgcclbmgromacsh264refcactusADMperlbenchsphinx3sjengSpeedupSignificantYesNoSpeedup of -O3 over -O2
  91. 91. Speedup of -O3 over -O20.0%0.5%1.0%1.5%bzip2gobmkzeusmplibquantumwrfastarmcfhmmermilcnamdgcclbmgromacsh264refcactusADMperlbenchsphinx3sjengSpeedupSignificantYesNo
  92. 92. 0.0%0.5%1.0%1.5%bzip2gobmkzeusmplibquantumwrfastarmcfhmmermilcnamdgcclbmgromacsh264refcactusADMperlbenchsphinx3sjengSpeedupSignificantYesNoSpeedup of -O3 over -O2
  93. 93. 0.0%0.5%1.0%1.5%bzip2gobmkzeusmplibquantumwrfastarmcfhmmermilcnamdgcclbmgromacsh264refcactusADMperlbenchsphinx3sjengSpeedupSignificantYesNoSpeedup of -O3 over -O2
  94. 94. 0.0%0.5%1.0%1.5%bzip2gobmkzeusmplibquantumwrfastarmcfhmmermilcnamdgcclbmgromacsh264refcactusADMperlbenchsphinx3sjengSpeedupSignificantYesNoSpeedup of -O3 over -O2
  95. 95. 0.0%0.5%1.0%1.5%bzip2gobmkzeusmplibquantumwrfastarmcfhmmermilcnamdgcclbmgromacsh264refcactusADMperlbenchsphinx3sjengSpeedupSignificantYesNoWhat do the results mean?
  96. 96. Comparing optimizations-O2 -O3×30 ×30
  97. 97. -O2 -O3×30 ×30lbm lbm×30 ×30Comparing optimizations
  98. 98. -O2 -O3×30 ×30lbm lbm×30 ×30astar astar×30 ×30Comparing optimizations
  99. 99. -O2 -O3×30 ×30lbm lbm×30 ×30astar astar...×30 ×30Comparing optimizations
  100. 100. -O2 -O3×30 ×300%1%2%3%4%0 25 50 75 100Time (s)PercentofObservedRuntimesVersion−O2−O3Comparing optimizations
  101. 101. -O2-O30%1%2%3%4%0 25 50 75 100Time (s)PercentofObservedRuntimesVersion−O2−O3Is faster than ?
  102. 102. -O2-O30%1%2%3%4%0 25 50 75 100Time (s)PercentofObservedRuntimesVersion−O2−O3Is faster than ?
  103. 103. -O2-O30%1%2%3%4%0 25 50 75 100Time (s)PercentofObservedRuntimesVersion−O2−O3Is faster than ?
  104. 104. -O2-O30%1%2%3%4%0 25 50 75 100Time (s)PercentofObservedRuntimesVersion−O2−O3Is faster than ?
  105. 105. If = -O2-O30%1%2%3%4%0 25 50 75 100Time (s)PercentofObservedRuntimesVersion−O2−O3
  106. 106. If = -O2-O31%2%3%4%PercentofObservedRuntimesVersion−O2−O3
  107. 107. If = -O2-O31%2%3%4%PercentofObservedRuntimesVersion−O2−O3what is the probability of measuringthese differences?
  108. 108. If = -O2-O34%imeswhat is the probability of measuringthese differences?Analysis ofVarianceaov(time~opt+Error(benchmark/opt), times)
  109. 109. If = -O2-O34%imeswhat is the probability of measuringthese differences?Analysis ofVarianceIf p-value
  110. 110. If = -O2-O34%imeswhat is the probability of measuringthese differences?Analysis ofVarianceIf p-value ≤ 5%
  111. 111. If = -O2-O34%imeswhat is the probability of measuringthese differences?Analysis ofVarianceIf p-value ≤ 5%we reject the null hypothesis
  112. 112. Analysis ofVarianceIf p-value ≤ 5%we reject the null hypothesisp-value = 26.4%-O3 -O2vsare we 73.6% confident?one in four experiments will show aneffect that does not exist!
  113. 113. Analysis ofVarianceIf p-value ≤ 5%we reject the null hypothesisp-value = 26.4%fail to reject the null hypothesis-O3 -O2vs
  114. 114. -O3Analysis ofVariancewe reject the null hypothesis-O2The effect of over isindistinguishable from noiseIf p-value ≤ 5%Did STABILIZER hide the effect?
  115. 115. Runtime with -O2Runtime with -O3Runtime with -O1Runtime with -O0Execution TimeDid STABILIZER hide the effect?
  116. 116. STABILIZERSTABILIZERSTABILIZERSTABILIZERExecution TimeRuntime with -O2Runtime with -O3Runtime with -O1Runtime with -O0speedupsDid STABILIZER hide the effect?
  117. 117. Runtime with -O2Runtime with -O3Runtime with -O1Runtime with -O0STABILIZERSTABILIZERSTABILIZERSTABILIZERExecution TimespeedupsDid STABILIZER hide the effect?
  118. 118. SPECint SPECfp Summary0%10%20%30%40%perlbenchgccgobmkh264refsjengastarbzip2libquantumhmmermcfcactusADMzeusmpwrfgromacssphinx3lbmmilcnamdhmeanmedian%IncreaseinExecutionTimeSTABILIZER is fast enough
  119. 119. STABILIZERMemory layout affects performanceSTABILIZER eliminates the effect of layoutshowed that -O3 does not have a statisticallysignificant effect across our benchmarksCase Studiesrandom layout enables sound performance evaluationmakes performance evaluation difficult
  120. 120. available atstabilizer-tool.orgSTABILIZERPhD Fellowship: Charlie Curtsinger
  121. 121. Backup Slides
  122. 122. LLVM’s Optimizations• Function Inlining• Unroll Loops• GlobalValueNumbering• Dead GlobalElimination• Merge DuplicateGlobal Constants• Argument Promotion• Global CSE• Scalar Replacement ofAggregates-O2 -O3
  123. 123. Mechanisms
  124. 124. STABILIZER randomizesstack frame sizeargvargclocalsargumentsmain
  125. 125. STABILIZER randomizesstack frame sizeargvargclocalsargumentsmainreturn addressframe ptrlocalsargumentsfoo
  126. 126. STABILIZER randomizesstack frame sizeargvargclocalsargumentsmainreturn addressframe ptrlocalsargumentsfooreturn addressframe ptr bar
  127. 127. argvargclocalsargumentsmainreturn addressframe ptrlocalsargumentsfooSTABILIZER randomizesstack frame size
  128. 128. return addressframe ptrlocalslocalsargumentsmainreturn addressframe ptrlocalsargumentsfoobarSTABILIZER randomizesstack frame size
  129. 129. return addressframe ptrlocalslocalsargumentsmainreturn addressframe ptrlocalsargumentsfoobarSTABILIZER randomizesstack frame sizeRandomPadding
  130. 130. foobarbazSTABILIZER randomizesfunction placement
  131. 131. foobarbaztraptraptrapSTABILIZER randomizesfunction placement
  132. 132. foobarbazjmptraptrapfoo′STABILIZER randomizesfunction placement
  133. 133. foobarbazjmptraptrap&bar&baz&gSTABILIZER randomizesfunction placementfoo′
  134. 134. foobarbazjmptraptrapSTABILIZER randomizesfunction placement&bar&baz&gfoo′
  135. 135. foobarbazjmptrapbaz′&foo&barjmpSTABILIZER randomizesfunction placement&bar&baz&gfoo′
  136. 136. and re-randomizes during executionRandomizes placement ofheap objectsstack framesfunctions
  137. 137. foobarbazjmptrapbaz′&foo&barjmpSTABILIZER re-randomizesfunction placement&bar&baz&gfoo′
  138. 138. foobarbazjmptrapjmpbaz′&foo&bar&bar&baz&gfoo′STABILIZER re-randomizesfunction placement
  139. 139. foobarbaztrapbaz′&foo&bar&bar&baz&gfoo′traptrapSTABILIZER re-randomizesfunction placement
  140. 140. foobarbaztrapbaz′&foo&bar&bar&baz&gfoo′traptrapSTABILIZER re-randomizesfunction placement
  141. 141. foobarbaztrapbaz′&foo&bar&bar&baz&gfoo′trapjmpbaz″&foo&barSTABILIZER re-randomizesfunction placement
  142. 142. foobarbaztrap &bar&baz&gfoo′trapjmpbaz″&foo&barSTABILIZER re-randomizesfunction placement
  143. 143. 0.00.20.40.6350 360 370 380 390 400Time (s)ProbabilityDensity
  144. 144. 0.00.20.40.6350 360 370 380 390 400Time (s)ProbabilityDensity
  145. 145. 0.00.20.40.6350 360 370 380 390 400Time (s)ProbabilityDensity
  146. 146. 0.00.20.40.6350 360 370 380 390 400Time (s)ProbabilityDensity
  147. 147. 0.00.20.40.6350 360 370 380 390 400Time (s)ProbabilityDensity
  148. 148. Programs with phasesExecution Time
  149. 149. Programs with phasesA B A B AA B A B AExecution Time
  150. 150. A B A B AExecution TimeA A AB B~ N(μA, σA)Programs with phases~ N(μB, σB)
  151. 151. A B A B AExecution TimeA A AB B~ N(μA, σA)Programs with phases~ N(μB, σB)~ A + B
  152. 152. A B A B AExecution TimePrograms with phases~ N(μA, σA) + N(μB, σB)~ A + B= N(μA+μB, σA+σB)

×