# Version : 0.0.4 # Author : Cedric.Bilat@he-arc.ch # # Attention # # (A1) Dans les d�finitions de variables ci-dessous, m�fiez-vous des espaces � la fin! # (A2) Laisser espace apr�s le += de surcharge : exemple : xxx+= yyyy # ifndef __CUDA_GCC_PUBLIC_OPTION_MK__ __CUDA_GCC_PUBLIC_OPTION_MK__=true ############################################### # Cuda GCC Linux # ############################################### ################################## # public # ################################## ######################## # gcc # ######################## #Option de compilation # Version minimale : vide # Surcharger automatiquement (par exemple en fonction du type de la target) ################# # Warning # ################# #CXXFLAGS+= -w # Disable #CXXFLAGS+= -W # Print extra warning messages #CXXFLAGS+= -Wall # Enable all #CXXFLAGS+= -Wunused-variable # show unused-variable #CXXFLAGS+= -Wunused-function # show unused-function #CXXFLAGS+= -Wunused-parameter # show unused-parameter ################# # Optimisation # ################# # Common X86 et ARM CXX_OPTIMISATION+= -O3 # ####### # x86 # ####### X86_CXX_OPTIMISATION+= -frerun-loop-opt# #X86_CXX_OPTIMISATION+= --fgcse# X86_CXX_OPTIMISATION+= -fomit-frame-pointer# # CPU specific #X86_CXXFLAGS+= -march=native # Detection automatique arch #X86_CXX_OPTIMISATION+= -march=core2 X86_CXX_OPTIMISATION+= -march=corei7 # cuda1 #X86_CXX_OPTIMISATION+= -march=corei7-avx # cuda2, mieux? #X86_CXX_OPTIMISATION+= -march=sandybridge # i7 work #X86_CXX_OPTIMISATION+= -march=ivybridge # i7 home #X86_CXX_OPTIMISATION+= -march=haswell ####### # ARM # ####### ARM_CXXFLAGS+=# ################# # linkage # ################# # Version minimale : vide (sera surcharg�e) # Surcharger automatiquement (par exemple en fonction du type de la target) CXXLDFLAGS+=# ######################## # nvcc # ######################## ################# # Target # ################# # NVCCFLAGS # https://developer.nvidia.com/cuda-gpus # Quadro Fx4600 : sm_10 # Quadro nvs140M : sm_11 # GTX_295 : sm_13 # GTX 580 sm_20 # Tesla m2090 sm_20 # Quadro6000 sm_20 # Quadroplex 7000 sm_20 # GTX 680 sm_30 # Quadro k5000 sm_30 # Tesla k20c sm_35 # Gforce titan sm_35 # Quadro k6000 sm_35 # Tesla k40 sm_35 # NVCCFLAGS possibilities: # # -arch= Generate PTX for capability x.y # -code= Generate binary for capability x.y, by default same as -arch # -gencode arch=...,code=... Same as -arch and -code, but may be repeated (executable is bigger, and compilation time longer) # # Target : Syntaxe light (shorcut) # : Avantage : short,easy, quick # : Incovenient : only one target can be specify! # NVCCFLAGS+= -arch=sm_20 # # Target : Syntaxe full # : Avantage : many target can be specify! # : : Allow jit compilation at runtime, for deploying in a gpu with higher arch than arch use for developpement. # # : Option : -arch : Virtual architecture # : generate PTX (ptx code is the same as byte code in java) # : Like java, a jit compilation at runtime is performed. # : Jit compilatioonce is performed only once, fortunately a cache is used to persist a final binary image. # # : Option : -code : Real architecture # : nvcc embeded a compiled code image in the executable for each specified architecture -arch, # : Code image is a true binary load image for each real architecture (present at compilation time?), and ptx code for earch virtual architecture (not present at compilation time?) # # # # Enable or disable all following lignes #NVCCFLAGS+= -gencode arch=compute_20,code=sm_20 #idem NVCCFLAGS+= -arch=sm_20 #NVCCFLAGS+= -gencode arch=compute_30,code=sm_30 #idem NVCCFLAGS+= -arch=sm_30 #NVCCFLAGS+= -gencode arch=compute_35,code=sm_35 #idem NVCCFLAGS+= -arch=sm_35 #NVCCFLAGS+= -gencode arch=compute_50,code=sm_50 #idem NVCCFLAGS+= -arch=sm_50 NVCCFLAGS+= -m64 # ################# # Optimisation # ################# NVCCFLAGS+= -use_fast_math #idem -ftz=true -prec_div=false -prec_sqrt=false NVCCFLAGS+= --fmad=true # ################# # linkage # ################# # Version minimale : vide (sera surcharg�e) # Surcharger automatiquement (par exemple en fonction du type de la target) NVCCLDFLAGS+=# ################# # debug # ################# #decommneter la ligne pour # (1) empecher l'optimisation # (2) activer les flag de debug #DEBUG=true #NVCCFLAGS+= -dryrun #NVCCFLAGS+= -v ################# # extension # ################# #Injection de variable dans le code # same as #define XXX YYY # same as -DXXX #Attention, pas mettre -D, ni #define, que XXX CODE_DEFINE_VARIABLES+=# # Injection automatique de lib windows standard # true (ou n'importe quoi) pour activer! # rien pour desactiver # Version minimale : vide (avec # pour le montrer) EXCLUDE_LIBRARY_FILES+=# ADD_LIBRARY_FILES+=# ############################################### # End # ############################################### endif#__CUDA_GCC_PUBLIC_OPTION_MK__