swift - How to estimate time consumption of my sample code by order of magnitude -


i write piece of code calculate pi using monte carlo method running on 2013 mac book air 1.7 ghz intel core i7 (seems 4650u). when loop count 10^8 took 2 ~ 3 second, , when loop count 10^9 took 25 second.

import foundation  func randomnumber(lowerbound:double, upperbound:double) -> double {     return lowerbound + double(rand()) / double(rand_max) * (upperbound - lowerbound) }  let pointnumber = 1000000000 var pointinsidecount = 0 in 0...pointnumber {     let x = randomnumber(-1.0, upperbound:1.0)     let y = randomnumber(-1.0, upperbound:1.0)     if x*x+y*y <= 1 {         pointinsidecount += 1     } } let result = double(pointinsidecount) / double(pointnumber) * 4 let pistring = string(format: "%.50f", result) print("pi \(pistring)") 

i run "di -n randomnumber" assemble code of randomnumber function

swifttest`swifttest.randomnumber (swift.double, upperbound : swift.double) -> swift.double:     0x10023c160 <+0>:  pushq  %rbp     0x10023c161 <+1>:  movq   %rsp, %rbp     0x10023c164 <+4>:  subq   $0x20, %rsp     0x10023c168 <+8>:  movsd  %xmm0, -0x8(%rbp)     0x10023c16d <+13>: movsd  %xmm1, -0x10(%rbp)     0x10023c172 <+18>: movsd  %xmm0, -0x18(%rbp)     0x10023c177 <+23>: movsd  %xmm1, -0x20(%rbp)     0x10023c17c <+28>: callq  0x10027585e               ; symbol stub for: rand     0x10023c181 <+33>: movsd  0x3bc1f(%rip), %xmm0      ; witness table offset swift.valistbuilder.__allocating_init (swift.valistbuilder.type)() -> swift.valistbuilder + 352     0x10023c189 <+41>: cvtsi2sdl %eax, %xmm1     0x10023c18d <+45>: divsd  %xmm0, %xmm1     0x10023c191 <+49>: movsd  -0x20(%rbp), %xmm0     0x10023c196 <+54>: movsd  -0x18(%rbp), %xmm2     0x10023c19b <+59>: subsd  %xmm2, %xmm0     0x10023c19f <+63>: mulsd  %xmm0, %xmm1     0x10023c1a3 <+67>: addsd  %xmm1, %xmm2     0x10023c1a7 <+71>: movaps %xmm2, %xmm0     0x10023c1aa <+74>: addq   $0x20, %rsp     0x10023c1ae <+78>: popq   %rbp     0x10023c1af <+79>: retq    

and run "di -f" assemble code of hole file

swifttest`main:     0x10023bcd0 <+0>:    pushq  %rbp     0x10023bcd1 <+1>:    movq   %rsp, %rbp     0x10023bcd4 <+4>:    subq   $0x120, %rsp     0x10023bcdb <+11>:   leaq   0x9340e(%rip), %rax       ; globalinit_33_1bdf70ffc18749bab495a73b459ed2f0_token6     0x10023bce2 <+18>:   leaq   0x933ff(%rip), %rcx       ; static swift.process._argc : swift.int32     0x10023bce9 <+25>:   movl   %edi, (%rcx)     0x10023bceb <+27>:   cmpq   $-0x1, (%rax)     0x10023bcf2 <+34>:   movq   %rsi, -0x60(%rbp)     0x10023bcf6 <+38>:   je     0x10023bd0e               ; <+62> @ main.swift     0x10023bcf8 <+40>:   leaq   0x933f1(%rip), %rdi       ; globalinit_33_1bdf70ffc18749bab495a73b459ed2f0_token6     0x10023bcff <+47>:   leaq   -0x99d56(%rip), %rax      ; globalinit_33_1bdf70ffc18749bab495a73b459ed2f0_func6     0x10023bd06 <+54>:   movq   %rax, %rsi     0x10023bd09 <+57>:   callq  0x100266870               ; swift_once     0x10023bd0e <+62>:   leaq   0x933e3(%rip), %rax       ; static swift.process._unsafeargv : swift.unsafemutablepointer<swift.unsafemutablepointer<swift.int8>>     0x10023bd15 <+69>:   movq   -0x60(%rbp), %rcx     0x10023bd19 <+73>:   movq   %rcx, (%rax)     0x10023bd1c <+76>:   movq   $0x989680, 0x93499(%rip)  ; lazy cache variable type metadata swift.valistbuilder + 4     0x10023bd27 <+87>:   movq   $0x0, 0x93496(%rip)       ; swifttest.pointnumber : swift.int + 4     0x10023bd32 <+98>:   movq   0x93487(%rip), %rax       ; swifttest.pointnumber : swift.int     0x10023bd39 <+105>:  movq   %rax, -0x68(%rbp)     0x10023bd3d <+109>:  xorl   %eax, %eax     0x10023bd3f <+111>:  movl   %eax, %ecx     0x10023bd41 <+113>:  movq   -0x68(%rbp), %rdx     0x10023bd45 <+117>:  cmpq   %rdx, %rcx     0x10023bd48 <+120>:  setle  %sil     0x10023bd4c <+124>:  testb  $0x1, %sil     0x10023bd50 <+128>:  jne    0x10023bd54               ; <+132> @ main.swift:17     0x10023bd52 <+130>:  jmp    0x10023bdb3               ; <+227> @ main.swift:17     0x10023bd54 <+132>:  movq   -0x68(%rbp), %rax     0x10023bd58 <+136>:  incq   %rax     0x10023bd5b <+139>:  seto   %cl     0x10023bd5e <+142>:  movq   -0x68(%rbp), %rdx     0x10023bd62 <+146>:  cmpq   %rdx, %rax     0x10023bd65 <+149>:  setg   %sil     0x10023bd69 <+153>:  testb  $0x1, %sil     0x10023bd6d <+157>:  movb   %cl, -0x69(%rbp)     0x10023bd70 <+160>:  jne    0x10023bd74               ; <+164> @ main.swift:17     0x10023bd72 <+162>:  jmp    0x10023bd87               ; <+183> @ main.swift:17     0x10023bd74 <+164>:  movq   -0x68(%rbp), %rax     0x10023bd78 <+168>:  incq   %rax     0x10023bd7b <+171>:  seto   %cl     0x10023bd7e <+174>:  movq   %rax, -0x78(%rbp)     0x10023bd82 <+178>:  movb   %cl, -0x79(%rbp)     0x10023bd85 <+181>:  jmp    0x10023bddf               ; <+271> @ main.swift:17     0x10023bd87 <+183>:  leaq   0x418a2(%rip), %rdi       ; "fatal error"     0x10023bd8e <+190>:  movl   $0xb, %eax     0x10023bd93 <+195>:  movl   %eax, %esi     0x10023bd95 <+197>:  movl   $0x2, %eax     0x10023bd9a <+202>:  leaq   0x487af(%rip), %rcx       ; "range end index has no valid successor"     0x10023bda1 <+209>:  movl   $0x26, %edx     0x10023bda6 <+214>:  movl   %edx, %r8d     0x10023bda9 <+217>:  movl   %eax, %edx     0x10023bdab <+219>:  movl   %eax, %r9d     0x10023bdae <+222>:  callq  0x1001a80f0               ; function signature specialization <arg[0] = exploded, arg[1] = exploded, arg[2] = dead, arg[3] = dead> of swift._fatalerrormessage (swift.staticstring, swift.staticstring, swift.staticstring, swift.uint) -> ()     0x10023bdb3 <+227>:  leaq   0x41876(%rip), %rdi       ; "fatal error"     0x10023bdba <+234>:  movl   $0xb, %eax     0x10023bdbf <+239>:  movl   %eax, %esi     0x10023bdc1 <+241>:  movl   $0x2, %eax     0x10023bdc6 <+246>:  leaq   0x48753(%rip), %rcx       ; "can't form range end < start"     0x10023bdcd <+253>:  movl   $0x21, %edx     0x10023bdd2 <+258>:  movl   %edx, %r8d     0x10023bdd5 <+261>:  movl   %eax, %edx     0x10023bdd7 <+263>:  movl   %eax, %r9d     0x10023bdda <+266>:  callq  0x1001a80f0               ; function signature specialization <arg[0] = exploded, arg[1] = exploded, arg[2] = dead, arg[3] = dead> of swift._fatalerrormessage (swift.staticstring, swift.staticstring, swift.staticstring, swift.uint) -> ()     0x10023bddf <+271>:  leaq   -0x30(%rbp), %rdi     0x10023bde3 <+275>:  leaq   -0x20(%rbp), %rsi     0x10023bde7 <+279>:  movq   $0x0, -0x20(%rbp)     0x10023bdef <+287>:  movq   -0x78(%rbp), %rax     0x10023bdf3 <+291>:  movq   %rax, -0x18(%rbp)     0x10023bdf7 <+295>:  callq  0x1000362e0               ; generic specialization <swift.int swift.int : swift.forwardindextype in swift, swift.int swift.int : swift._signedintegertype in swift, swift.int swift.int : swift._builtinintegerliteralconvertible in swift, swift.int> of swift.range.generate <a a: swift.forwardindextype> (swift.range<a>)() -> swift.rangegenerator<a>     0x10023bdfc <+300>:  movq   -0x30(%rbp), %rax     0x10023be00 <+304>:  movq   -0x28(%rbp), %rsi     0x10023be04 <+308>:  movq   %rax, -0x10(%rbp)     0x10023be08 <+312>:  movq   %rsi, -0x8(%rbp)     0x10023be0c <+316>:  leaq   -0x40(%rbp), %rdi     0x10023be10 <+320>:  leaq   -0x10(%rbp), %rsi     0x10023be14 <+324>:  callq  0x100036960               ; generic specialization <swift.int swift.int : swift.forwardindextype in swift, swift.int swift.int : swift._signedintegertype in swift, swift.int swift.int : swift._builtinintegerliteralconvertible in swift, swift.int> of swift.rangegenerator.next <a a: swift.forwardindextype> (inout swift.rangegenerator<a>)() -> swift.optional<a>     0x10023be19 <+329>:  movq   -0x40(%rbp), %rsi     0x10023be1d <+333>:  movb   -0x38(%rbp), %al     0x10023be20 <+336>:  xorb   $0x1, %al     0x10023be22 <+338>:  testb  $0x1, %al     0x10023be24 <+340>:  movq   %rsi, -0x88(%rbp)     0x10023be2b <+347>:  jne    0x10023be32               ; <+354> @ main.swift:17     0x10023be2d <+349>:  jmp    0x10023bed4               ; <+516> @ main.swift:23     0x10023be32 <+354>:  movsd  0x3bf66(%rip), %xmm0      ; witness table offset swift.valistbuilder.__allocating_init (swift.valistbuilder.type)() -> swift.valistbuilder + 344     0x10023be3a <+362>:  movsd  0x3bf56(%rip), %xmm1      ; witness table offset swift.valistbuilder.__allocating_init (swift.valistbuilder.type)() -> swift.valistbuilder + 336     0x10023be42 <+370>:  movq   -0x88(%rbp), %rax     0x10023be49 <+377>:  movq   %rax, -0x48(%rbp)     0x10023be4d <+381>:  callq  0x10023c160               ; swifttest.randomnumber (swift.double, upperbound : swift.double) -> swift.double @ main.swift:11     0x10023be52 <+386>:  movsd  0x3bf46(%rip), %xmm1      ; witness table offset swift.valistbuilder.__allocating_init (swift.valistbuilder.type)() -> swift.valistbuilder + 344     0x10023be5a <+394>:  movsd  0x3bf36(%rip), %xmm2      ; witness table offset swift.valistbuilder.__allocating_init (swift.valistbuilder.type)() -> swift.valistbuilder + 336     0x10023be62 <+402>:  movsd  %xmm0, -0x50(%rbp)     0x10023be67 <+407>:  movsd  %xmm0, -0x90(%rbp)     0x10023be6f <+415>:  movaps %xmm1, %xmm0     0x10023be72 <+418>:  movaps %xmm2, %xmm1     0x10023be75 <+421>:  callq  0x10023c160               ; swifttest.randomnumber (swift.double, upperbound : swift.double) -> swift.double @ main.swift:11     0x10023be7a <+426>:  movsd  0x3bf16(%rip), %xmm1      ; witness table offset swift.valistbuilder.__allocating_init (swift.valistbuilder.type)() -> swift.valistbuilder + 336     0x10023be82 <+434>:  movsd  %xmm0, -0x58(%rbp)     0x10023be87 <+439>:  movsd  -0x90(%rbp), %xmm2     0x10023be8f <+447>:  mulsd  %xmm2, %xmm2     0x10023be93 <+451>:  mulsd  %xmm0, %xmm0     0x10023be97 <+455>:  addsd  %xmm0, %xmm2     0x10023be9b <+459>:  ucomisd %xmm2, %xmm1     0x10023be9f <+463>:  jb     0x10023becf               ; <+511> @ main.swift:23     0x10023bea1 <+465>:  movq   0x93320(%rip), %rax       ; swifttest.pointinsidecount : swift.int     0x10023bea8 <+472>:  incq   %rax     0x10023beab <+475>:  seto   %cl     0x10023beae <+478>:  movq   %rax, -0x98(%rbp)     0x10023beb5 <+485>:  movb   %cl, -0x99(%rbp)     0x10023bebb <+491>:  jo     0x10023c155               ; <+1157> @ main.swift:21     0x10023bec1 <+497>:  movq   -0x98(%rbp), %rax     0x10023bec8 <+504>:  movq   %rax, 0x932f9(%rip)       ; swifttest.pointinsidecount : swift.int     0x10023becf <+511>:  jmp    0x10023be0c               ; <+316> @ main.swift:17     0x10023bed4 <+516>:  movsd  0x3beb4(%rip), %xmm0      ; witness table offset swift.valistbuilder.__allocating_init (swift.valistbuilder.type)() -> swift.valistbuilder + 328     0x10023bedc <+524>:  cvtsi2sdq 0x932e3(%rip), %xmm1      ; swifttest.pointinsidecount : swift.int     0x10023bee5 <+533>:  cvtsi2sdq 0x932d2(%rip), %xmm2      ; swifttest.pointnumber : swift.int     0x10023beee <+542>:  divsd  %xmm2, %xmm1     0x10023bef2 <+546>:  mulsd  %xmm0, %xmm1     0x10023bef6 <+550>:  movsd  %xmm1, 0x932d2(%rip)      ; swifttest.result : swift.double     0x10023befe <+558>:  callq  0x10023c1b0               ; type metadata accessor swift.cvarargtype     0x10023bf03 <+563>:  movl   $0x1, %ecx     0x10023bf08 <+568>:  movl   %ecx, %edi     0x10023bf0a <+570>:  movq   %rax, %rsi     0x10023bf0d <+573>:  callq  0x100045770               ; swift._allocateuninitializedarray <a> (builtin.word) -> (swift.array<a>, builtin.rawpointer)     0x10023bf12 <+578>:  leaq   0x4865e(%rip), %rdi       ; "%.50f"     0x10023bf19 <+585>:  movl   $0x5, %ecx     0x10023bf1e <+590>:  movl   %ecx, %esi     0x10023bf20 <+592>:  movl   $0x1, %ecx     0x10023bf25 <+597>:  movq   %rdx, -0xa8(%rbp)     0x10023bf2c <+604>:  movl   %ecx, %edx     0x10023bf2e <+606>:  movq   %rax, -0xb0(%rbp)     0x10023bf35 <+613>:  callq  0x100001aa0               ; swift.string.init (swift.string.type)(_builtinstringliteral : builtin.rawpointer, bytesize : builtin.word, isascii : builtin.int1) -> swift.string     0x10023bf3a <+618>:  leaq   0x667b7(%rip), %rsi       ; protocol witness table swift.double : swift.cvarargtype in swift     0x10023bf41 <+625>:  leaq   0x6a258(%rip), %rdi       ; direct type metadata swift.double     0x10023bf48 <+632>:  addq   $0x8, %rdi     0x10023bf4f <+639>:  movq   -0xa8(%rbp), %r8     0x10023bf56 <+646>:  movq   %rdi, 0x18(%r8)     0x10023bf5a <+650>:  movq   %rsi, 0x20(%r8)     0x10023bf5e <+654>:  movsd  0x9326a(%rip), %xmm0      ; swifttest.result : swift.double     0x10023bf66 <+662>:  movsd  %xmm0, (%r8)     0x10023bf6b <+667>:  movq   %rax, %rdi     0x10023bf6e <+670>:  movq   %rdx, %rsi     0x10023bf71 <+673>:  movq   %rcx, %rdx     0x10023bf74 <+676>:  movq   -0xb0(%rbp), %rcx     0x10023bf7b <+683>:  callq  0x10002dfa0               ; ext.foundation.swift.string.init (swift.string.type)(format : swift.string, swift.array<swift.cvarargtype>...) -> swift.string     0x10023bf80 <+688>:  movq   %rax, 0x93251(%rip)       ; swifttest.pistring : swift.string     0x10023bf87 <+695>:  movq   %rdx, 0x93252(%rip)       ; swifttest.pistring : swift.string + 8     0x10023bf8e <+702>:  movq   %rcx, 0x93253(%rip)       ; swifttest.pistring : swift.string + 16 ->  0x10023bf95 <+709>:  callq  0x10023c200               ; type metadata accessor protocol<>     0x10023bf9a <+714>:  movl   $0x1, %r9d     0x10023bfa0 <+720>:  movl   %r9d, %edi     0x10023bfa3 <+723>:  movq   %rax, %rsi     0x10023bfa6 <+726>:  callq  0x100045770               ; swift._allocateuninitializedarray <a> (builtin.word) -> (swift.array<a>, builtin.rawpointer)     0x10023bfab <+731>:  movl   $0x3, %r9d     0x10023bfb1 <+737>:  movl   %r9d, %edi     0x10023bfb4 <+740>:  leaq   0x6fe25(%rip), %rcx       ; direct type metadata swift.string     0x10023bfbb <+747>:  addq   $0x8, %rcx     0x10023bfc2 <+754>:  movq   %rcx, 0x18(%rdx)     0x10023bfc6 <+758>:  movq   %rcx, %rsi     0x10023bfc9 <+761>:  movq   %rax, -0xb8(%rbp)     0x10023bfd0 <+768>:  movq   %rdx, -0xc0(%rbp)     0x10023bfd7 <+775>:  callq  0x100045770               ; swift._allocateuninitializedarray <a> (builtin.word) -> (swift.array<a>, builtin.rawpointer)     0x10023bfdc <+780>:  leaq   0x4859a(%rip), %rdi       ; "pi "     0x10023bfe3 <+787>:  movl   $0x6, %r9d     0x10023bfe9 <+793>:  movl   %r9d, %esi     0x10023bfec <+796>:  movl   $0x1, %r9d     0x10023bff2 <+802>:  movq   %rdx, -0xc8(%rbp)     0x10023bff9 <+809>:  movl   %r9d, %edx     0x10023bffc <+812>:  movq   %rax, -0xd0(%rbp)     0x10023c003 <+819>:  callq  0x100001aa0               ; swift.string.init (swift.string.type)(_builtinstringliteral : builtin.rawpointer, bytesize : builtin.word, isascii : builtin.int1) -> swift.string     0x10023c008 <+824>:  movq   %rax, %rdi     0x10023c00b <+827>:  movq   %rdx, %rsi     0x10023c00e <+830>:  movq   %rcx, %rdx     0x10023c011 <+833>:  callq  0x1000470d0               ; swift.string.init (swift.string.type)(stringinterpolationsegment : swift.string) -> swift.string     0x10023c016 <+838>:  movq   -0xc8(%rbp), %rsi     0x10023c01d <+845>:  movq   %rax, (%rsi)     0x10023c020 <+848>:  movq   %rdx, 0x8(%rsi)     0x10023c024 <+852>:  movq   %rcx, 0x10(%rsi)     0x10023c028 <+856>:  movq   0x931a9(%rip), %rdi       ; swifttest.pistring : swift.string     0x10023c02f <+863>:  movq   0x931aa(%rip), %rsi       ; swifttest.pistring : swift.string + 8     0x10023c036 <+870>:  movq   0x931ab(%rip), %rax       ; swifttest.pistring : swift.string + 16     0x10023c03d <+877>:  movq   %rdi, -0xd8(%rbp)     0x10023c044 <+884>:  movq   %rax, %rdi     0x10023c047 <+887>:  movq   %rsi, -0xe0(%rbp)     0x10023c04e <+894>:  movq   %rax, -0xe8(%rbp)     0x10023c055 <+901>:  callq  0x100268160               ; swift_unknownretain     0x10023c05a <+906>:  movq   -0xd8(%rbp), %rdi     0x10023c061 <+913>:  movq   -0xe0(%rbp), %rsi     0x10023c068 <+920>:  movq   -0xe8(%rbp), %rdx     0x10023c06f <+927>:  callq  0x1000470d0               ; swift.string.init (swift.string.type)(stringinterpolationsegment : swift.string) -> swift.string     0x10023c074 <+932>:  leaq   0x40d15(%rip), %rdi       ; ""     0x10023c07b <+939>:  xorl   %r9d, %r9d     0x10023c07e <+942>:  movl   %r9d, %esi     0x10023c081 <+945>:  movl   $0x1, %r9d     0x10023c087 <+951>:  movq   -0xc8(%rbp), %r8     0x10023c08e <+958>:  movq   %rax, 0x18(%r8)     0x10023c092 <+962>:  movq   %rdx, 0x20(%r8)     0x10023c096 <+966>:  movq   %rcx, 0x28(%r8)     0x10023c09a <+970>:  movl   %r9d, %edx     0x10023c09d <+973>:  callq  0x100001aa0               ; swift.string.init (swift.string.type)(_builtinstringliteral : builtin.rawpointer, bytesize : builtin.word, isascii : builtin.int1) -> swift.string     0x10023c0a2 <+978>:  movq   %rax, %rdi     0x10023c0a5 <+981>:  movq   %rdx, %rsi     0x10023c0a8 <+984>:  movq   %rcx, %rdx     0x10023c0ab <+987>:  callq  0x1000470d0               ; swift.string.init (swift.string.type)(stringinterpolationsegment : swift.string) -> swift.string     0x10023c0b0 <+992>:  movq   -0xc8(%rbp), %rsi     0x10023c0b7 <+999>:  movq   %rax, 0x30(%rsi)     0x10023c0bb <+1003>: movq   %rdx, 0x38(%rsi)     0x10023c0bf <+1007>: movq   %rcx, 0x40(%rsi)     0x10023c0c3 <+1011>: movq   -0xd0(%rbp), %rdi     0x10023c0ca <+1018>: callq  0x1000470c0               ; swift.string.init (swift.string.type)(stringinterpolation : swift.array<swift.string>...) -> swift.string     0x10023c0cf <+1023>: movq   -0xc0(%rbp), %rsi     0x10023c0d6 <+1030>: movq   %rax, (%rsi)     0x10023c0d9 <+1033>: movq   %rdx, 0x8(%rsi)     0x10023c0dd <+1037>: movq   %rcx, 0x10(%rsi)     0x10023c0e1 <+1041>: callq  0x10012aa70               ; swift.(print (swift.array<protocol<>>, separator : swift.string, terminator : swift.string) -> ()).(default argument 1)     0x10023c0e6 <+1046>: movq   %rax, -0xf0(%rbp)     0x10023c0ed <+1053>: movq   %rdx, -0xf8(%rbp)     0x10023c0f4 <+1060>: movq   %rcx, -0x100(%rbp)     0x10023c0fb <+1067>: callq  0x10012aa90               ; swift.(print (swift.array<protocol<>>, separator : swift.string, terminator : swift.string) -> ()).(default argument 2)     0x10023c100 <+1072>: movq   -0xb8(%rbp), %rdi     0x10023c107 <+1079>: movq   -0xf0(%rbp), %rsi     0x10023c10e <+1086>: movq   -0xf8(%rbp), %r8     0x10023c115 <+1093>: movq   %rdx, -0x108(%rbp)     0x10023c11c <+1100>: movq   %r8, %rdx     0x10023c11f <+1103>: movq   -0x100(%rbp), %r10     0x10023c126 <+1110>: movq   %rcx, -0x110(%rbp)     0x10023c12d <+1117>: movq   %r10, %rcx     0x10023c130 <+1120>: movq   %rax, %r8     0x10023c133 <+1123>: movq   -0x108(%rbp), %r9     0x10023c13a <+1130>: movq   -0x110(%rbp), %rax     0x10023c141 <+1137>: movq   %rax, (%rsp)     0x10023c145 <+1141>: callq  0x10012aab0               ; swift.print (swift.array<protocol<>>, separator : swift.string, terminator : swift.string) -> ()     0x10023c14a <+1146>: xorl   %eax, %eax     0x10023c14c <+1148>: addq   $0x120, %rsp     0x10023c153 <+1155>: popq   %rbp     0x10023c154 <+1156>: retq        0x10023c155 <+1157>: ud2         0x10023c157 <+1159>: nopw   (%rax,%rax) 

can estimate time consumption below?

the randomnumber function consists of 20 instructions, hence calculations of x , y consist of 40 instructions. adding of pointinsidecount execute several instructions, in loop there 4 ~ 5 dozen of instructions(assume 50). time consumption outside of loop can ignored.

if assume 4560u run 2 instructions per cycle in program on average, when loop count 10^8, hole time consumption 50 * 10^8 / (1.7 * 10^9 * 2)

you can't assume same ipc loops. sure loop runs 2 ipc, doesn't tell other loops. have analyse code find bottlenecks , amount of parallelism.


if can safely assume no cache-misses or branch-mispredicts, can reasonable cycle-count estimates small loops specific intel microarchitectures using iaca, intel's static code analyser. far full cycle-accurate simulation of real hardware, have own model distributing uops ports. gets sensible numbers.

you can same sort of analysis hand (including cpus iaca doesn't know about) using agner fog's instruction tables , microarchitecture guides.

things work out quite accurately when loop bottlenecked on latency of loop-carried dependency chain, or on saturating 1 execution port.

at high throughputs, there many subtle effects can bottleneck code you'd hope run @ 4 fused-domain uops per clock. frontend can sustain quite small loops (~28 or 56 uops), because uop cache has limited throughput because of uop-cache-line boundaries , uops not being in groups of 4.

significant fma performance anomaly experienced in intel broadwell processor example of how things can hard understand. you'd expect code saturate 3 vector execution ports, on haswell, , on skylake, not close on broadwell. , that's not front-end bottleneck, since loop small enough fit in loop buffer.

again, of without considering branch mispredicts or cache misses.

if sounds hard , complicated, that's because is. why benchmarks more useful static analysis. however, microbenchmarks really easy wrong. should @ asm make sure didn't screw , let compiler optimize away thing wanted test. need understand lot how cpus work avoid pitfalls, putting else slow microbenchmark, , having dominate run-time instead of thing wanted test.


Comments