Slow std::string concatenation on windows

  • A+
Category:Languages

I have a program that needs to concatenate lots of strings together (to be more precise integers converted to strings). On my Ubuntu machine (running g++ 7.3.0) the code runs in 1.5 seconds. But the code needs to be run on Windows as well (running g++ 6.3.0 using MinGW), where it takes 15 seconds to complete. Furthermore, the Ubuntu setup runs on a much slower Laptop using an i7-4712MQ CPU @ 2.30GHz, whereas the Windows machine runs on an i7-7700K CPU @ 4.20GHz.

The code to reproduce the times is shown below. I compile the code with g++ tester.cpp -O2 -o tester (or tester.exe for windows)

#include <iostream> #include <chrono>  int main(int argc, char const *argv[]) {      auto started = std::chrono::high_resolution_clock::now();     std::string str = "";     const int n = 10000000;     str.reserve(2 * n);     int a = 1;      for (int i = 0; i < n; ++i) {         str += std::to_string(a) + " ";     }      auto done = std::chrono::high_resolution_clock::now();     double secs = (double) std::chrono::duration_cast<std::chrono::milliseconds>(done-started).count() / 1000;     std::cout << "Done in " << secs << "/n";     return 0; } 

Any idea where the large performance gap might come from?

The disassemblies look like this:

Ubuntu:

.file   "tester.cpp"   .text     .align 2     .p2align 4,,15     .type   _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19, @function _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19: .LFB2389:     .cfi_startproc     pushq   %r12     .cfi_def_cfa_offset 16     .cfi_offset 12, -16     pushq   %rbp     .cfi_def_cfa_offset 24     .cfi_offset 6, -24     movq    %rsi, %r12     pushq   %rbx     .cfi_def_cfa_offset 32     .cfi_offset 3, -32     movq    %rdx, %rbx     movq    %rdi, %rbp     subq    %rsi, %rbx     subq    $16, %rsp     .cfi_def_cfa_offset 48     movq    %fs:40, %rax     movq    %rax, 8(%rsp)     xorl    %eax, %eax     cmpq    $15, %rbx     movq    %rbx, (%rsp)     ja  .L12     movq    (%rdi), %rdx     cmpq    $1, %rbx     movq    %rdx, %rax     jne .L4     movzbl  (%rsi), %eax     movb    %al, (%rdx)     movq    (%rdi), %rdx .L5:     movq    (%rsp), %rax     movq    %rax, 8(%rbp)     movb    $0, (%rdx,%rax)     movq    8(%rsp), %rax     xorq    %fs:40, %rax     jne .L13     addq    $16, %rsp     .cfi_remember_state     .cfi_def_cfa_offset 32     popq    %rbx     .cfi_def_cfa_offset 24     popq    %rbp     .cfi_def_cfa_offset 16     popq    %r12     .cfi_def_cfa_offset 8     ret .L12:     .cfi_restore_state     xorl    %edx, %edx     movq    %rsp, %rsi     call    _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm@PLT     movq    (%rsp), %rdx     movq    %rax, 0(%rbp)     movq    %rdx, 16(%rbp) .L3:     movq    %rbx, %rdx     movq    %r12, %rsi     movq    %rax, %rdi     call    memcpy@PLT     movq    0(%rbp), %rdx     jmp .L5 .L4:     testq   %rbx, %rbx     je  .L5     jmp .L3 .L13:     call    __stack_chk_fail@PLT     .cfi_endproc .LFE2389:     .size   _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19, .-_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19     .set    _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.23,_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19     .section    .text._ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z,"axG",@progbits,_ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z,comdat     .p2align 4,,15     .weak   _ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z     .type   _ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z, @function _ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z: .LFB1953:     .cfi_startproc     pushq   %rbp     .cfi_def_cfa_offset 16     .cfi_offset 6, -16     movq    %rsi, %r10     movq    %rdx, %rsi     movq    %rcx, %rdx     movq    %rsp, %rbp     .cfi_def_cfa_register 6     pushq   %r12     pushq   %rbx     .cfi_offset 12, -24     .cfi_offset 3, -32     movq    %rdi, %r12     subq    $208, %rsp     testb   %al, %al     movq    %r8, -160(%rbp)     movq    %r9, -152(%rbp)     je  .L15     movaps  %xmm0, -144(%rbp)     movaps  %xmm1, -128(%rbp)     movaps  %xmm2, -112(%rbp)     movaps  %xmm3, -96(%rbp)     movaps  %xmm4, -80(%rbp)     movaps  %xmm5, -64(%rbp)     movaps  %xmm6, -48(%rbp)     movaps  %xmm7, -32(%rbp) .L15:     movq    %fs:40, %rax     movq    %rax, -200(%rbp)     xorl    %eax, %eax     leaq    30(%rsi), %rax     leaq    -224(%rbp), %rcx     andq    $-16, %rax     movl    $32, -224(%rbp)     movl    $48, -220(%rbp)     subq    %rax, %rsp     leaq    16(%rbp), %rax     leaq    15(%rsp), %rbx     movq    %rax, -216(%rbp)     leaq    -192(%rbp), %rax     andq    $-16, %rbx     movq    %rbx, %rdi     movq    %rax, -208(%rbp)     call    *%r10     leaq    16(%r12), %rdx     movq    %r12, %rdi     movq    %rbx, %rsi     movq    %rdx, (%r12)     movslq  %eax, %rdx     addq    %rbx, %rdx     call    _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.23     movq    -200(%rbp), %rdi     xorq    %fs:40, %rdi     movq    %r12, %rax     jne .L18     leaq    -16(%rbp), %rsp     popq    %rbx     popq    %r12     popq    %rbp     .cfi_remember_state     .cfi_def_cfa 7, 8     ret .L18:     .cfi_restore_state     call    __stack_chk_fail@PLT     .cfi_endproc .LFE1953:     .size   _ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z, .-_ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z     .section    .rodata.str1.1,"aMS",@progbits,1 .LC0:     .string "" .LC1:     .string "%d" .LC2:     .string "basic_string::append" .LC3:     .string " " .LC5:     .string "Done in " .LC6:     .string "/n"     .section    .text.startup,"ax",@progbits     .p2align 4,,15     .globl  main     .type   main, @function main: .LFB1871:     .cfi_startproc     .cfi_personality 0x9b,DW.ref.__gxx_personality_v0     .cfi_lsda 0x1b,.LLSDA1871     pushq   %r15     .cfi_def_cfa_offset 16     .cfi_offset 15, -16     pushq   %r14     .cfi_def_cfa_offset 24     .cfi_offset 14, -24     pushq   %r13     .cfi_def_cfa_offset 32     .cfi_offset 13, -32     pushq   %r12     .cfi_def_cfa_offset 40     .cfi_offset 12, -40     pushq   %rbp     .cfi_def_cfa_offset 48     .cfi_offset 6, -48     pushq   %rbx     .cfi_def_cfa_offset 56     .cfi_offset 3, -56     subq    $136, %rsp     .cfi_def_cfa_offset 192     leaq    16(%rsp), %r13     movq    %fs:40, %rax     movq    %rax, 120(%rsp)     xorl    %eax, %eax     call    _ZNSt6chrono3_V212system_clock3nowEv@PLT     leaq    .LC0(%rip), %rdx     movq    %rax, (%rsp)     leaq    16(%r13), %rax     movq    %r13, %rdi     movq    %rdx, %rsi     movq    %rax, 16(%rsp) .LEHB0:     call    _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.19 .LEHE0:     movl    $20000000, %esi     movq    %r13, %rdi .LEHB1:     call    _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEm@PLT .LEHE1:     leaq    48(%rsp), %rbp     leaq    80(%rsp), %rax     movl    $10000000, %ebx     movabsq $9223372036854775807, %r14     leaq    96(%rsp), %r12     movq    %rax, 8(%rsp)     leaq    16(%rbp), %r15     jmp .L25     .p2align 4,,10     .p2align 3 .L21:     movq    %rcx, 80(%rsp)     movq    16(%rax), %rcx     movq    %rcx, 96(%rsp) .L22:     movq    8(%rax), %rcx     movb    $0, 16(%rax)     movq    %r13, %rdi     movq    %rcx, 88(%rsp)     movq    %rdx, (%rax)     movq    $0, 8(%rax)     movq    80(%rsp), %rsi     movq    88(%rsp), %rdx .LEHB2:     call    _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm@PLT .LEHE2:     movq    80(%rsp), %rdi     cmpq    %r12, %rdi     je  .L23     call    _ZdlPv@PLT .L23:     movq    48(%rsp), %rdi     cmpq    %r15, %rdi     je  .L24     call    _ZdlPv@PLT .L24:     subl    $1, %ebx     je  .L40 .L25:     movq    vsnprintf@GOTPCREL(%rip), %rsi     leaq    .LC1(%rip), %rcx     movl    $1, %r8d     movl    $16, %edx     movq    %rbp, %rdi     xorl    %eax, %eax .LEHB3:     call    _ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_mPKS8_P13__va_list_tagEmSB_z .LEHE3:     cmpq    %r14, 56(%rsp)     je  .L41     leaq    .LC3(%rip), %rsi     movl    $1, %edx     movq    %rbp, %rdi .LEHB4:     call    _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm@PLT .LEHE4:     movq    %r12, 80(%rsp)     movq    (%rax), %rcx     leaq    16(%rax), %rdx     cmpq    %rdx, %rcx     jne .L21     movdqu  16(%rax), %xmm0     movaps  %xmm0, 96(%rsp)     jmp .L22     .p2align 4,,10     .p2align 3 .L40:     call    _ZNSt6chrono3_V212system_clock3nowEv@PLT     subq    (%rsp), %rax     movabsq $4835703278458516699, %rdx     leaq    .LC5(%rip), %rsi     pxor    %xmm0, %xmm0     leaq    _ZSt4cout(%rip), %rdi     movq    %rax, %rcx     imulq   %rdx     sarq    $63, %rcx     sarq    $18, %rdx     subq    %rcx, %rdx     cvtsi2sdq   %rdx, %xmm0     movl    $8, %edx     divsd   .LC4(%rip), %xmm0     movsd   %xmm0, (%rsp) .LEHB5:     call    _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@PLT     movsd   (%rsp), %xmm0     leaq    _ZSt4cout(%rip), %rdi     call    _ZNSo9_M_insertIdEERSoT_@PLT     leaq    .LC6(%rip), %rsi     movq    %rax, %rdi     call    _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc@PLT .LEHE5:     movq    16(%rsp), %rdi     addq    $16, %r13     cmpq    %r13, %rdi     je  .L26     call    _ZdlPv@PLT .L26:     xorl    %eax, %eax     movq    120(%rsp), %rbx     xorq    %fs:40, %rbx     jne .L42     addq    $136, %rsp     .cfi_remember_state     .cfi_def_cfa_offset 56     popq    %rbx     .cfi_def_cfa_offset 48     popq    %rbp     .cfi_def_cfa_offset 40     popq    %r12     .cfi_def_cfa_offset 32     popq    %r13     .cfi_def_cfa_offset 24     popq    %r14     .cfi_def_cfa_offset 16     popq    %r15     .cfi_def_cfa_offset 8     ret .L41:     .cfi_restore_state     leaq    .LC2(%rip), %rdi .LEHB6:     call    _ZSt20__throw_length_errorPKc@PLT .LEHE6: .L35:     movq    %rax, %rbx .L29:     movq    48(%rsp), %rdi     addq    $16, %rbp     cmpq    %rbp, %rdi     je  .L31     call    _ZdlPv@PLT .L31:     movq    16(%rsp), %rdi     addq    $16, %r13     cmpq    %r13, %rdi     je  .L32     call    _ZdlPv@PLT .L32:     movq    %rbx, %rdi .LEHB7:     call    _Unwind_Resume@PLT .LEHE7: .L34:     movq    %rax, %rbx     jmp .L31 .L36:     movq    8(%rsp), %rdx     movq    80(%rsp), %rdi     movq    %rax, %rbx     addq    $16, %rdx     cmpq    %rdx, %rdi     je  .L29     call    _ZdlPv@PLT     jmp .L29 .L42:     call    __stack_chk_fail@PLT     .cfi_endproc .LFE1871:     .globl  __gxx_personality_v0     .section    .gcc_except_table,"a",@progbits .LLSDA1871:     .byte   0xff     .byte   0xff     .byte   0x1     .uleb128 .LLSDACSE1871-.LLSDACSB1871 .LLSDACSB1871:     .uleb128 .LEHB0-.LFB1871     .uleb128 .LEHE0-.LEHB0     .uleb128 0     .uleb128 0     .uleb128 .LEHB1-.LFB1871     .uleb128 .LEHE1-.LEHB1     .uleb128 .L34-.LFB1871     .uleb128 0     .uleb128 .LEHB2-.LFB1871     .uleb128 .LEHE2-.LEHB2     .uleb128 .L36-.LFB1871     .uleb128 0     .uleb128 .LEHB3-.LFB1871     .uleb128 .LEHE3-.LEHB3     .uleb128 .L34-.LFB1871     .uleb128 0     .uleb128 .LEHB4-.LFB1871     .uleb128 .LEHE4-.LEHB4     .uleb128 .L35-.LFB1871     .uleb128 0     .uleb128 .LEHB5-.LFB1871     .uleb128 .LEHE5-.LEHB5     .uleb128 .L34-.LFB1871     .uleb128 0     .uleb128 .LEHB6-.LFB1871     .uleb128 .LEHE6-.LEHB6     .uleb128 .L35-.LFB1871     .uleb128 0     .uleb128 .LEHB7-.LFB1871     .uleb128 .LEHE7-.LEHB7     .uleb128 0     .uleb128 0 .LLSDACSE1871:     .section    .text.startup     .size   main, .-main     .p2align 4,,15     .type   _GLOBAL__sub_I_main, @function _GLOBAL__sub_I_main: .LFB2369:     .cfi_startproc     leaq    _ZStL8__ioinit(%rip), %rdi     subq    $8, %rsp     .cfi_def_cfa_offset 16     call    _ZNSt8ios_base4InitC1Ev@PLT     movq    _ZNSt8ios_base4InitD1Ev@GOTPCREL(%rip), %rdi     leaq    __dso_handle(%rip), %rdx     leaq    _ZStL8__ioinit(%rip), %rsi     addq    $8, %rsp     .cfi_def_cfa_offset 8     jmp __cxa_atexit@PLT     .cfi_endproc .LFE2369:     .size   _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main     .section    .init_array,"aw"     .align 8     .quad   _GLOBAL__sub_I_main     .local  _ZStL8__ioinit     .comm   _ZStL8__ioinit,1,1     .section    .rodata.cst8,"aM",@progbits,8     .align 8 .LC4:     .long   0     .long   1083129856     .hidden DW.ref.__gxx_personality_v0     .weak   DW.ref.__gxx_personality_v0     .section    .data.DW.ref.__gxx_personality_v0,"awG",@progbits,DW.ref.__gxx_personality_v0,comdat     .align 8     .type   DW.ref.__gxx_personality_v0, @object     .size   DW.ref.__gxx_personality_v0, 8 DW.ref.__gxx_personality_v0:     .quad   __gxx_personality_v0     .hidden __dso_handle     .ident  "GCC: (Ubuntu 7.3.0-16ubuntu3) 7.3.0"     .section    .note.GNU-stack,"",@progbits 

Windows:

.file   "tester.cpp"     .text     .p2align 4,,15     .def    ___tcf_0;   .scl    3;  .type   32; .endef ___tcf_0: LFB2556:     .cfi_startproc     movl    $__ZStL8__ioinit, %ecx     jmp __ZNSt8ios_base4InitD1Ev     .cfi_endproc LFE2556:     .section .rdata,"dr"     .align 4 LC0:     .ascii "basic_string::_M_construct null not valid/0"     .text     .align 2     .p2align 4,,15     .def    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.29;    .scl    3;  .type   32; .endef __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.29: LFB2587:     .cfi_startproc     pushl   %edi     .cfi_def_cfa_offset 8     .cfi_offset 7, -8     pushl   %esi     .cfi_def_cfa_offset 12     .cfi_offset 6, -12     movl    %ecx, %esi     pushl   %ebx     .cfi_def_cfa_offset 16     .cfi_offset 3, -16     subl    $32, %esp     .cfi_def_cfa_offset 48     movl    48(%esp), %edi     movl    52(%esp), %ebx     testl   %edi, %edi     jne L5     testl   %ebx, %ebx     je  L5     movl    $LC0, (%esp)     call    __ZSt19__throw_logic_errorPKc     .p2align 4,,10 L5:     subl    %edi, %ebx     cmpl    $15, %ebx     movl    %ebx, 28(%esp)     ja  L22     movl    (%esi), %edx     cmpl    $1, %ebx     movl    %edx, %eax     je  L23     testl   %ebx, %ebx     jne L6 L8:     movl    28(%esp), %eax     movl    %eax, 4(%esi)     movb    $0, (%edx,%eax)     addl    $32, %esp     .cfi_remember_state     .cfi_def_cfa_offset 16     popl    %ebx     .cfi_restore 3     .cfi_def_cfa_offset 12     popl    %esi     .cfi_restore 6     .cfi_def_cfa_offset 8     popl    %edi     .cfi_restore 7     .cfi_def_cfa_offset 4     ret $8     .p2align 4,,10 L22:     .cfi_restore_state     leal    28(%esp), %eax     movl    $0, 4(%esp)     movl    %esi, %ecx     movl    %eax, (%esp)     call    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERjj     .cfi_def_cfa_offset 40     subl    $8, %esp     .cfi_def_cfa_offset 48     movl    %eax, (%esi)     movl    28(%esp), %edx     movl    %edx, 8(%esi) L6:     movl    %ebx, 8(%esp)     movl    %edi, 4(%esp)     movl    %eax, (%esp)     call    _memcpy     movl    (%esi), %edx     jmp L8     .p2align 4,,10 L23:     movzbl  (%edi), %eax     movb    %al, (%edx)     movl    (%esi), %edx     jmp L8     .cfi_endproc LFE2587:     .def    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.21;   .scl    3;  .type   32; .endef     .set    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.21,__ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.29     .section    .text$_ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_jPKS8_PcEjSB_z,"x"     .linkonce discard     .p2align 4,,15     .globl  __ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_jPKS8_PcEjSB_z     .def    __ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_jPKS8_PcEjSB_z;    .scl    2;  .type   32; .endef __ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_jPKS8_PcEjSB_z: LFB2177:     .cfi_startproc     pushl   %ebp     .cfi_def_cfa_offset 8     .cfi_offset 5, -8     movl    %esp, %ebp     .cfi_def_cfa_register 5     pushl   %esi     pushl   %ebx     subl    $16, %esp     .cfi_offset 6, -12     .cfi_offset 3, -16     movl    16(%ebp), %edx     movl    8(%ebp), %esi     leal    30(%edx), %eax     andl    $-16, %eax     call    ___chkstk_ms     subl    %eax, %esp     leal    24(%ebp), %eax     leal    31(%esp), %ebx     movl    %edx, 4(%esp)     movl    %eax, 12(%esp)     movl    20(%ebp), %eax     andl    $-16, %ebx     movl    %ebx, (%esp)     movl    %eax, 8(%esp)     call    *12(%ebp)     leal    8(%esi), %edx     addl    %ebx, %eax     movl    %esi, %ecx     movl    %edx, (%esi)     movl    %eax, 4(%esp)     movl    %ebx, (%esp)     call    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPcEEvT_S7_St20forward_iterator_tag.isra.29     subl    $8, %esp     leal    -8(%ebp), %esp     movl    %esi, %eax     popl    %ebx     .cfi_restore 3     popl    %esi     .cfi_restore 6     popl    %ebp     .cfi_restore 5     .cfi_def_cfa 4, 4     ret     .cfi_endproc LFE2177:     .def    ___main;    .scl    2;  .type   32; .endef     .section .rdata,"dr" LC1:     .ascii "/0" LC2:     .ascii "%d/0" LC3:     .ascii "basic_string::append/0" LC4:     .ascii " /0"     .def    ___divdi3;  .scl    2;  .type   32; .endef LC6:     .ascii "Done in /0" LC7:     .ascii "/12/0"     .section    .text.startup,"x"     .p2align 4,,15     .globl  _main     .def    _main;  .scl    2;  .type   32; .endef _main: LFB2111:     .cfi_startproc     .cfi_personality 0,___gxx_personality_v0     .cfi_lsda 0,LLSDA2111     leal    4(%esp), %ecx     .cfi_def_cfa 1, 0     andl    $-16, %esp     pushl   -4(%ecx)     pushl   %ebp     .cfi_escape 0x10,0x5,0x2,0x75,0     movl    %esp, %ebp     pushl   %edi     pushl   %esi     pushl   %ebx     pushl   %ecx     .cfi_escape 0xf,0x3,0x75,0x70,0x6     .cfi_escape 0x10,0x7,0x2,0x75,0x7c     .cfi_escape 0x10,0x6,0x2,0x75,0x78     .cfi_escape 0x10,0x3,0x2,0x75,0x74     subl    $152, %esp     call    ___main     call    __ZNSt6chrono3_V212system_clock3nowEv     leal    -96(%ebp), %ecx     movl    %eax, -136(%ebp)     leal    -88(%ebp), %eax     movl    $LC1, 4(%esp)     movl    $LC1, (%esp)     movl    %edx, -132(%ebp)     movl    %eax, -96(%ebp) LEHB0:     call    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.isra.21 LEHE0:     leal    -96(%ebp), %ecx     subl    $8, %esp     movl    $20000000, (%esp) LEHB1:     call    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEj LEHE1:     subl    $4, %esp     movl    $10000000, %edi     leal    -72(%ebp), %esi     leal    -40(%ebp), %ebx     jmp L32     .p2align 4,,10 L28:     movl    %ecx, -48(%ebp)     movl    8(%eax), %ecx     movl    %ecx, -40(%ebp) L29:     movl    4(%eax), %ecx     movb    $0, 8(%eax)     movl    %ecx, -44(%ebp)     movl    %edx, (%eax)     leal    -96(%ebp), %ecx     movl    $0, 4(%eax)     movl    -44(%ebp), %eax     movl    %eax, 4(%esp)     movl    -48(%ebp), %eax     movl    %eax, (%esp) LEHB2:     call    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcj LEHE2:     movl    -48(%ebp), %eax     subl    $8, %esp     cmpl    %ebx, %eax     je  L30     movl    %eax, (%esp)     call    __ZdlPv L30:     movl    -72(%ebp), %eax     leal    -64(%ebp), %edx     cmpl    %edx, %eax     je  L31     movl    %eax, (%esp)     call    __ZdlPv L31:     subl    $1, %edi     je  L46 L32:     movl    $1, 16(%esp)     movl    $LC2, 12(%esp)     movl    $16, 8(%esp)     movl    $_vsnprintf, 4(%esp)     movl    %esi, (%esp) LEHB3:     call    __ZN9__gnu_cxx12__to_xstringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEET_PFiPT0_jPKS8_PcEjSB_z LEHE3:     cmpl    $2147483647, -68(%ebp)     je  L47     movl    $1, 4(%esp)     movl    $LC4, (%esp)     movl    %esi, %ecx LEHB4:     call    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcj LEHE4:     movl    %ebx, -48(%ebp)     movl    (%eax), %ecx     leal    8(%eax), %edx     subl    $8, %esp     cmpl    %edx, %ecx     jne L28     movl    12(%eax), %ecx     movl    %ecx, -120(%ebp)     movl    16(%eax), %ecx     movl    %ecx, -124(%ebp)     movl    20(%eax), %ecx     movl    %ecx, -128(%ebp)     movl    8(%eax), %ecx     movl    %ecx, -40(%ebp)     movl    -120(%ebp), %ecx     movl    %ecx, -36(%ebp)     movl    -124(%ebp), %ecx     movl    %ecx, -32(%ebp)     movl    -128(%ebp), %ecx     movl    %ecx, -28(%ebp)     jmp L29     .p2align 4,,10 L46:     call    __ZNSt6chrono3_V212system_clock3nowEv     subl    -136(%ebp), %eax     movl    $1000000, 8(%esp)     sbbl    -132(%ebp), %edx     movl    $0, 12(%esp)     movl    %eax, (%esp)     movl    %edx, 4(%esp)     call    ___divdi3     movl    %eax, -120(%ebp)     movl    %edx, -116(%ebp)     fildq   -120(%ebp)     movl    $8, 8(%esp)     movl    $LC6, 4(%esp)     movl    $__ZSt4cout, (%esp)     fdivs   LC5     fstpl   -120(%ebp) LEHB5:     call    __ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_i     fldl    -120(%ebp)     movl    $__ZSt4cout, %ecx     fstpl   (%esp)     call    __ZNSo9_M_insertIdEERSoT_     subl    $8, %esp     movl    $LC7, 4(%esp)     movl    %eax, (%esp)     call    __ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc LEHE5:     movl    -96(%ebp), %eax     leal    -88(%ebp), %edi     cmpl    %edi, %eax     je  L43     movl    %eax, (%esp)     call    __ZdlPv L43:     leal    -16(%ebp), %esp     xorl    %eax, %eax     popl    %ecx     .cfi_remember_state     .cfi_restore 1     .cfi_def_cfa 1, 0     popl    %ebx     .cfi_restore 3     popl    %esi     .cfi_restore 6     popl    %edi     .cfi_restore 7     popl    %ebp     .cfi_restore 5     leal    -4(%ecx), %esp     .cfi_def_cfa 4, 4     ret L47:     .cfi_restore_state     movl    $LC3, (%esp) LEHB6:     call    __ZSt20__throw_length_errorPKc LEHE6: L41:     movl    %eax, %ebx L36:     movl    -72(%ebp), %eax     leal    -64(%ebp), %edx     cmpl    %edx, %eax     je  L38     movl    %eax, (%esp)     call    __ZdlPv L38:     movl    -96(%ebp), %eax     leal    -88(%ebp), %edi     cmpl    %edi, %eax     je  L39     movl    %eax, (%esp)     call    __ZdlPv L39:     movl    %ebx, (%esp) LEHB7:     call    __Unwind_Resume LEHE7: L42:     movl    %eax, %esi     movl    -48(%ebp), %eax     cmpl    %ebx, %eax     je  L35     movl    %eax, (%esp)     call    __ZdlPv L35:     movl    %esi, %ebx     jmp L36 L40:     movl    %eax, %ebx     jmp L38     .cfi_endproc LFE2111:     .def    ___gxx_personality_v0;  .scl    2;  .type   32; .endef     .section    .gcc_except_table,"w" LLSDA2111:     .byte   0xff     .byte   0xff     .byte   0x1     .uleb128 LLSDACSE2111-LLSDACSB2111 LLSDACSB2111:     .uleb128 LEHB0-LFB2111     .uleb128 LEHE0-LEHB0     .uleb128 0     .uleb128 0     .uleb128 LEHB1-LFB2111     .uleb128 LEHE1-LEHB1     .uleb128 L40-LFB2111     .uleb128 0     .uleb128 LEHB2-LFB2111     .uleb128 LEHE2-LEHB2     .uleb128 L42-LFB2111     .uleb128 0     .uleb128 LEHB3-LFB2111     .uleb128 LEHE3-LEHB3     .uleb128 L40-LFB2111     .uleb128 0     .uleb128 LEHB4-LFB2111     .uleb128 LEHE4-LEHB4     .uleb128 L41-LFB2111     .uleb128 0     .uleb128 LEHB5-LFB2111     .uleb128 LEHE5-LEHB5     .uleb128 L40-LFB2111     .uleb128 0     .uleb128 LEHB6-LFB2111     .uleb128 LEHE6-LEHB6     .uleb128 L41-LFB2111     .uleb128 0     .uleb128 LEHB7-LFB2111     .uleb128 LEHE7-LEHB7     .uleb128 0     .uleb128 0 LLSDACSE2111:     .section    .text.startup,"x"     .p2align 4,,15     .def    __GLOBAL__sub_I_main;   .scl    3;  .type   32; .endef __GLOBAL__sub_I_main: LFB2557:     .cfi_startproc     subl    $28, %esp     .cfi_def_cfa_offset 32     movl    $__ZStL8__ioinit, %ecx     call    __ZNSt8ios_base4InitC1Ev     movl    $___tcf_0, (%esp)     call    _atexit     addl    $28, %esp     .cfi_def_cfa_offset 4     ret     .cfi_endproc LFE2557:     .section    .ctors,"w"     .align 4     .long   __GLOBAL__sub_I_main .lcomm __ZStL8__ioinit,1,1     .section .rdata,"dr"     .align 4 LC5:     .long   1148846080     .ident  "GCC: (MinGW.org GCC-6.3.0-1) 6.3.0"     .def    __ZNSt8ios_base4InitD1Ev;   .scl    2;  .type   32; .endef     .def    __ZSt19__throw_logic_errorPKc;  .scl    2;  .type   32; .endef     .def    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERjj;   .scl    2;  .type   32; .endef     .def    _memcpy;    .scl    2;  .type   32; .endef     .def    __ZNSt6chrono3_V212system_clock3nowEv;  .scl    2;  .type   32; .endef     .def    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEj;   .scl    2;  .type   32; .endef     .def    __ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcj;  .scl    2;  .type   32; .endef     .def    __ZdlPv;    .scl    2;  .type   32; .endef     .def    _vsnprintf; .scl    2;  .type   32; .endef     .def    __ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_i; .scl    2;  .type   32; .endef     .def    __ZNSo9_M_insertIdEERSoT_;  .scl    2;  .type   32; .endef     .def    __ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc;   .scl    2;  .type   32; .endef     .def    __ZSt20__throw_length_errorPKc; .scl    2;  .type   32; .endef     .def    __Unwind_Resume;    .scl    2;  .type   32; .endef     .def    __ZNSt8ios_base4InitC1Ev;   .scl    2;  .type   32; .endef     .def    _atexit;    .scl    2;  .type   32; .endef 

 


Quick look at disassembly shows that Windows version uses movl (i. e. long word, 32 bit move) and Linux version uses movq (quad word, 64 bit) and SSE registers xmm.

My bet is that on Linux, you compile for x86-64, while on Windows you target 32 bit x86.

x86-64 includes SSE2 extension, while x86 does not, so MinGW defaults to no-SSE mode.

If that's the case, building with 64 bit toolchain on Windows should result in comparable performance. Alternatively, you might enable SSE for 32 bit builds (-msse2 compiler flag, if I remember correctly).

Comment

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen: