C++: objects, reference to objects, reference to vector elements with and without functions - observed performance difference -
a question c++ beginner, getting headache in hours of morning. skip code @ bottom of page if want have look. applying operations on several variables different identifiers same type (i.e. double
). operations either done external function call or within main.
i consider 6 scenarios
(1) local objects not calling function
(2) reference objects not calling function
(3) reference elements in vector not calling function
(4) local objects calling function
(5) reference objects not calling function
(6) reference elements in vector calling function
i got interesting results (to me anyway). (1) , (2) took time of 574ms average, whereas (3),(4),(5) , (6) took approx 2.77 seconds.
i'll admit (4),(5) , (6) due overhead arising function call due passing elements in. questions arise me,
why calculations on references vector elements (i.e. (3)) take same time calling function? mean there sort of overhead between calling reference vector elements, , supplying values function similar? (note function in case not take
double&
ratherdouble
).if change function parameters
&double
, why (1) , (2) take 2.7 seconds??? mean, i'm not calling function (1) , (2)! (can else try - because found weird)are there special ways optimize of these, if any?
code: compiled g++ 4.7.2
g++ -std=c++11 -o3
on windows mingw.
#include <iostream> // c++ input/output libraries #include <stdio.h> #include <vector> #include "timer.h" void do_some_calc(double aa, double bb, double cc, double dd, double ee) { double total{0}, add{0}; for(int tests=0; tests<5; ++tests) { timer time; time.start(); for(int i=0; i<100000; ++i) { for(int j=0; j<2000; ++j) { add = aa*bb/cc*dd/ee; total += add; aa=aa/2; bb=bb/2; cc=cc/2; dd=dd/2; ee=ee/2; aa=aa*2; bb=bb*2; cc=cc*2; dd=dd*2; ee=ee*2; } } cout << total << " " << add << endl; time.finish("func call"); } } int main() { // numbers 12, 13,14,13 , 12 tied vector std::vector<double> ch{12,13,14,13,12}; // numbers 12, 13,14,13 , 12 tied independent objects double = 12; double b = 13; double c = 14; double d = 13; double e = 12; // reference objects double& a_ref = a; double& b_ref = b; double& c_ref = c; double& d_ref = d; double& e_ref = e; // reference vector elements double& a_vref = ch[0]; double& b_vref = ch[1]; double& c_vref = ch[2]; double& d_vref = ch[3]; double& e_vref = ch[4]; cout << "1) normal without function (i.e. local):" << endl; double total{0}, add{0}; for(int tests=0; tests<5; ++tests) { timer time; time.start(); for(int i=0; i<100000; ++i) { for(int j=0; j<2000; ++j) { add = a*b/c*d/e; total += add; a=a/2; b=b/2; c=c/2; d=d/2; e=e/2; a=a*2; b=b*2; c=c*2; d=d*2; e=e*2; } } cout << total << " " << add << endl; time.finish("obj"); } cout << "\n\n2) reference double obj without function (i.e. local):" << endl; total=0, add=0; for(int tests=0; tests<5; ++tests) { timer time; time.start(); for(int i=0; i<100000; ++i) { for(int j=0; j<2000; ++j) { add = a_ref*b_ref/c_ref*d_ref/e_ref; total += add; a_ref=a_ref/2; b_ref=b_ref/2; c_ref=c_ref/2; d_ref=d_ref/2; e_ref=e_ref/2; a_ref=a_ref*2; b_ref=b_ref*2; c_ref=c_ref*2; d_ref=d_ref*2; e_ref=e_ref*2; } } cout << total << " " << add << endl; time.finish("ref obj"); } cout << "\n\n3) reference double obj vector without function (i.e. local):" << endl; total=0, add=0; for(int tests=0; tests<5; ++tests) { timer time; time.start(); for(int i=0; i<100000; ++i) { for(int j=0; j<2000; ++j) { add = a_vref*b_vref/c_vref*d_vref/e_vref; total += add; a_vref=a_vref/2; b_vref=b_vref/2; c_vref=c_vref/2; d_vref=d_vref/2; e_vref=e_vref/2; a_vref=a_vref*2; b_vref=b_vref*2; c_vref=c_vref*2; d_vref=d_vref*2; e_vref=e_vref*2; } } cout << total << " " << add << endl; time.finish("ref vec"); } //cout << "\n\nreference obj vector without function (i.e. local):" << endl; cout << "\n\n4) normal function:" << endl; do_some_calc(a,b,c,d,e); cout << "\n\n5) reference double obj function:" << endl; do_some_calc(a_ref,b_ref,c_ref,d_ref,e_ref); cout << "\n\n6) reference double obj vector function:" << endl; do_some_calc(a_vref,b_vref,c_vref,d_vref,e_vref); return 0; }
here custom #include "timer.h"
created used here calculate times
/* timer class c++11 , pre c++11 (i.e. c++03 , c++99 etc) [version 0.1] static , not include multiple starts author: tested on gcc */ #ifndef timer_h #define timer_h #include <string> #include <iostream> #if (__cplusplus >= 201103l) #include <chrono> // include new c++11 object timer #include <ratio> #else #include <ctime> // include pre c++11 object timer #endif class timer { private: #if __cplusplus >= 201103l typedef std::chrono::high_resolution_clock::time_point hiresclock; typedef std::chrono::duration<long double,std::micro> micro_t; hiresclock store; #else long double store; #endif public: void start(void); // [c++11] method: start timer void finish(const std::string& disp); // [both] method: finish timer }; // end of class timer inline void timer::start(void) { #if __cplusplus >= 201103l store = std::chrono::high_resolution_clock::now(); #else store = (long double)std::clock()/clocks_per_sec; #endif } void timer::finish(const std::string& disp) { std::cout << "time taken: "; #if __cplusplus >= 201103l timer::micro_t out = std::chrono::duration_cast<timer::micro_t> (std::chrono::high_resolution_clock::now()-store); long double temp = out.count(); if(temp<1000) std::cout << out.count() << " micro-seconds" << std::endl; else if(temp<1000000) std::cout << out.count()/1000 << " milli-seconds" << std::endl; else if(temp<1000000000) std::cout << out.count()/1000000 << " seconds" << std::endl; else if(temp<60000000000) std::cout << out.count()/60000000l << " minutes" << std::endl; else std::cout << out.count()/3600000000ull << " hours" << std::endl; #else std::cout << ((long double)std::clock()/clocks_per_sec-store) << " seconds" << std::endl; #endif std::cout << " for: " << disp << std::endl; } #endif // instantiate timer.h once
although not technically answer, recommend when doing performance measurements not use clock because @ moment run test, cpu might or might not in speedstep mode (i.e., running @ lower frequency save power).
instead, try x86-specific thing:
http://en.wikipedia.org/wiki/time_stamp_counter
you can use so:
#include <cstdint> // read cpu time stamp counter ::uint64_t getticks() noexcept { register ::uint32_t lo, hi; #ifdef supports_rdtscp __asm__ __volatile__ ("rdtscp" // on i7 can remove cpuid , use rdtscp : "=a"(lo), "=d"(hi) : : ); #else __asm__ __volatile__ ("cpuid \n\t rdtsc" // on lesser chips there no rdtscp instruction : "=a"(lo), "=d"(hi) // works in 32- or 64-bit modes (don't use "=a"!!!) : : "ebx", "ecx"); // because of cpuid #endif return (::uint64_t)hi<<32 | lo; }
as can see, need define supports_rdtscp based on type of chip have.
no matter speed cpu running at, number of ticks should same when measuring how many ticks went given instruction sequence. keep in mind pipelining , out of order execution , stuff make different, it's lot close using clock stuff using.
Comments
Post a Comment