aqui vai uma comparação de cflags, faça um teste e um bom estudo delas:
codigo-fonte = teste.c
#include <stdio.h>
main ()
{
printf("Hello world\n");
return 0;
}
-------------------------------------------------------------------------------------------------------------------------------------
gcc -fverbose-asm -S teste.c -o a.s
cat a.s:
.file "teste.c"
# GNU C (Ubuntu 4.4.3-4ubuntu5) version 4.4.3 (x86_64-linux-gnu)
# compiled by GNU C version 4.4.3, GMP version 4.3.2, MPFR version 2.4.2-p1.
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: teste.c -D_FORTIFY_SOURCE=2 -mtune=generic
# -auxbase-strip a.s -fverbose-asm -fstack-protector
# options enabled: -falign-loops -fargument-alias
# -fasynchronous-unwind-tables -fauto-inc-dec -fbranch-count-reg -fcommon
# -fdwarf2-cfi-asm -fearly-inlining -feliminate-unused-debug-types
# -ffunction-cse -fgcse-lm -fident -finline-functions-called-once
# -fira-share-save-slots -fira-share-spill-slots -fivopts
# -fkeep-static-consts -fleading-underscore -fmath-errno
# -fmerge-debug-strings -fmove-loop-invariants -fpeephole
# -freg-struct-return -fsched-interblock -fsched-spec
# -fsched-stalled-insns-dep -fsigned-zeros -fsplit-ivs-in-unroller
# -fstack-protector -ftrapping-math -ftree-cselim -ftree-loop-im
# -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops=
# -ftree-reassoc -ftree-scev-cprop -ftree-switch-conversion
# -ftree-vect-loop-version -funit-at-a-time -funwind-tables
# -fvect-cost-model -fverbose-asm -fzero-initialized-in-bss
# -m128bit-long-double -m64 -m80387 -maccumulate-outgoing-args
# -malign-stringops -mfancy-math-387 -mfp-ret-in-387 -mfused-madd -mglibc
# -mieee-fp -mmmx -mno-sse4 -mpush-args -mred-zone -msse -msse2
# -mtls-direct-seg-refs
# Compiler executable checksum: 2129e1a56226bd6e8f7af5e0a3ff467d
.section .rodata
.LC0:
.string "Hello world"
.text
.globl main
.type main, @function
main:
.LFB0:
.cfi_startproc
pushq %rbp #
.cfi_def_cfa_offset 16
movq %rsp, %rbp #,
.cfi_offset 6, -16
.cfi_def_cfa_register 6
movl $.LC0, %edi #,
call puts #
movl $0, %eax #, D.2045
leave
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu 4.4.3-4ubuntu5) 4.4.3"
.section .note.GNU-stack,"",@progbits
---------------------------------------------------------------------------------------------------------------------------------------------------------------------
gcc -O2 -march=k8 -fverbose-asm -S teste.c -o march_k8.s
cat march_k8.s:
.file "teste.c"
# GNU C (Ubuntu 4.4.3-4ubuntu5) version 4.4.3 (x86_64-linux-gnu)
# compiled by GNU C version 4.4.3, GMP version 4.3.2, MPFR version 2.4.2-p1.
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: teste.c -D_FORTIFY_SOURCE=2 -march=k8 -auxbase-strip
# march_k8.s -O2 -fverbose-asm -fstack-protector
# options enabled: -falign-loops -fargument-alias
# -fasynchronous-unwind-tables -fauto-inc-dec -fbranch-count-reg
# -fcaller-saves -fcommon -fcprop-registers -fcrossjumping
# -fcse-follow-jumps -fdefer-pop -fdelete-null-pointer-checks
# -fdwarf2-cfi-asm -fearly-inlining -feliminate-unused-debug-types
# -fexpensive-optimizations -fforward-propagate -ffunction-cse -fgcse
# -fgcse-lm -fguess-branch-probability -fident -fif-conversion
# -fif-conversion2 -findirect-inlining -finline
# -finline-functions-called-once -finline-small-functions -fipa-cp
# -fipa-pure-const -fipa-reference -fira-share-save-slots
# -fira-share-spill-slots -fivopts -fkeep-static-consts
# -fleading-underscore -fmath-errno -fmerge-constants -fmerge-debug-strings
# -fmove-loop-invariants -fomit-frame-pointer -foptimize-register-move
# -foptimize-sibling-calls -fpeephole -fpeephole2 -freg-struct-return
# -fregmove -freorder-blocks -freorder-functions -frerun-cse-after-loop
# -fsched-interblock -fsched-spec -fsched-stalled-insns-dep
# -fschedule-insns2 -fsigned-zeros -fsplit-ivs-in-unroller
# -fsplit-wide-types -fstack-protector -fstrict-aliasing -fstrict-overflow
# -fthread-jumps -ftoplevel-reorder -ftrapping-math -ftree-builtin-call-dce
# -ftree-ccp -ftree-ch -ftree-copy-prop -ftree-copyrename -ftree-cselim
# -ftree-dce -ftree-dominator-opts -ftree-dse -ftree-fre -ftree-loop-im
# -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops=
# -ftree-pre -ftree-reassoc -ftree-scev-cprop -ftree-sink -ftree-sra
# -ftree-switch-conversion -ftree-ter -ftree-vect-loop-version -ftree-vrp
# -funit-at-a-time -funwind-tables -fvect-cost-model -fverbose-asm
# -fzero-initialized-in-bss -m128bit-long-double -m3dnow -m64 -m80387
# -maccumulate-outgoing-args -malign-stringops -mfancy-math-387
# -mfp-ret-in-387 -mfused-madd -mglibc -mieee-fp -mmmx -mno-sse4
# -mpush-args -mred-zone -msse -msse2 -mtls-direct-seg-refs
# Compiler executable checksum: 2129e1a56226bd6e8f7af5e0a3ff467d
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "Hello world\n"
.text
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB22:
.cfi_startproc
subq $8, %rsp #,
.cfi_def_cfa_offset 16
movl $.LC0, %esi #,
movl $1, %edi #,
xorl %eax, %eax #
call __printf_chk #
xorl %eax, %eax #
addq $8, %rsp #,
ret
.cfi_endproc
.LFE22:
.size main, .-main
.ident "GCC: (Ubuntu 4.4.3-4ubuntu5) 4.4.3"
.section .note.GNU-stack,"",@progbits
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
gcc -O2 -march=native -fverbose-asm -S teste.c -o march_native.s
cat march_native.s:
.file "teste.c"
# GNU C (Ubuntu 4.4.3-4ubuntu5) version 4.4.3 (x86_64-linux-gnu)
# compiled by GNU C version 4.4.3, GMP version 4.3.2, MPFR version 2.4.2-p1.
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: teste.c -D_FORTIFY_SOURCE=2 -march=amdfam10 -mcx16
# -msahf -mpopcnt --param l1-cache-size=64 --param l1-cache-line-size=64
# --param l2-cache-size=512 -mtune=amdfam10 -auxbase-strip march_native.s
# -O2 -fverbose-asm -fstack-protector
# options enabled: -falign-loops -fargument-alias
# -fasynchronous-unwind-tables -fauto-inc-dec -fbranch-count-reg
# -fcaller-saves -fcommon -fcprop-registers -fcrossjumping
# -fcse-follow-jumps -fdefer-pop -fdelete-null-pointer-checks
# -fdwarf2-cfi-asm -fearly-inlining -feliminate-unused-debug-types
# -fexpensive-optimizations -fforward-propagate -ffunction-cse -fgcse
# -fgcse-lm -fguess-branch-probability -fident -fif-conversion
# -fif-conversion2 -findirect-inlining -finline
# -finline-functions-called-once -finline-small-functions -fipa-cp
# -fipa-pure-const -fipa-reference -fira-share-save-slots
# -fira-share-spill-slots -fivopts -fkeep-static-consts
# -fleading-underscore -fmath-errno -fmerge-constants -fmerge-debug-strings
# -fmove-loop-invariants -fomit-frame-pointer -foptimize-register-move
# -foptimize-sibling-calls -fpeephole -fpeephole2 -freg-struct-return
# -fregmove -freorder-blocks -freorder-functions -frerun-cse-after-loop
# -fsched-interblock -fsched-spec -fsched-stalled-insns-dep
# -fschedule-insns2 -fsigned-zeros -fsplit-ivs-in-unroller
# -fsplit-wide-types -fstack-protector -fstrict-aliasing -fstrict-overflow
# -fthread-jumps -ftoplevel-reorder -ftrapping-math -ftree-builtin-call-dce
# -ftree-ccp -ftree-ch -ftree-copy-prop -ftree-copyrename -ftree-cselim
# -ftree-dce -ftree-dominator-opts -ftree-dse -ftree-fre -ftree-loop-im
# -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops=
# -ftree-pre -ftree-reassoc -ftree-scev-cprop -ftree-sink -ftree-sra
# -ftree-switch-conversion -ftree-ter -ftree-vect-loop-version -ftree-vrp
# -funit-at-a-time -funwind-tables -fvect-cost-model -fverbose-asm
# -fzero-initialized-in-bss -m128bit-long-double -m3dnow -m64 -m80387 -mabm
# -maccumulate-outgoing-args -malign-stringops -mcx16 -mfancy-math-387
# -mfp-ret-in-387 -mfused-madd -mglibc -mieee-fp -mmmx -mno-sse4 -mpopcnt
# -mpush-args -mred-zone -msahf -msse -msse2 -msse3 -msse4a
# -mtls-direct-seg-refs
# Compiler executable checksum: 2129e1a56226bd6e8f7af5e0a3ff467d
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "Hello world\n"
.text
.p2align 5,,31
.globl main
.type main, @function
main:
.LFB22:
.cfi_startproc
subq $8, %rsp #,
.cfi_def_cfa_offset 16
movl $.LC0, %esi #,
movl $1, %edi #,
xorl %eax, %eax #
call __printf_chk #
xorl %eax, %eax #
addq $8, %rsp #,
ret
.cfi_endproc
.LFE22:
.size main, .-main
.ident "GCC: (Ubuntu 4.4.3-4ubuntu5) 4.4.3"
.section .note.GNU-stack,"",@progbits
dá pra ver as diferenças das instruções que vai na cflag, pelo que vi o march=native é o mais completo,
tenho aqui um AMD athlon II x2 M300
o gcc é a versão 4.4.3 do ubuntu 10.04 64 bits
vale a pena testar.