WCudaMSE/BUILDER/makefile/public/cuda/cudaGCC.mk

   1 # Version       : 0.0.4
   2 # Author        : Cedric.Bilat@he-arc.ch
   3 #
   4 # Attention
   5 #
   6 #       (A1)    Dans les d�finitions de variables ci-dessous, m�fiez-vous des espaces � la fin!
   7 #       (A2)    Laisser espace  apr�s le += de surcharge : exemple : xxx+= yyyy
   8 #
   9
  10 ifndef __CUDA_GCC_PUBLIC_OPTION_MK__
  11 __CUDA_GCC_PUBLIC_OPTION_MK__=true
  12
  13 ###############################################
  14 #                       Cuda GCC Linux                                    #
  15 ###############################################
  16
  17 ##################################
  18 #               public                       #
  19 ##################################
  20
  21 ########################
  22 #               gcc                        #
  23 ########################
  24
  25 #Option de compilation
  26 #       Version minimale : vide
  27 #       Surcharger automatiquement (par exemple en fonction du type de la target)
  28
  29 #################
  30 #       Warning     #
  31 #################
  32
  33 #CXXFLAGS+= -w                                  # Disable
  34 #CXXFLAGS+= -W                                  # Print extra warning messages
  35 #CXXFLAGS+= -Wall                               # Enable all
  36 #CXXFLAGS+= -Wunused-variable   # show unused-variable
  37 #CXXFLAGS+= -Wunused-function   # show unused-function
  38 #CXXFLAGS+= -Wunused-parameter  # show unused-parameter
  39
  40 #################
  41 # Optimisation  #
  42 #################
  43
  44 # Common X86 et ARM
  45 CXX_OPTIMISATION+= -O3 #
  46
  47 #######
  48 # x86 #
  49 #######
  50
  51 X86_CXX_OPTIMISATION+= -frerun-loop-opt#
  52 #X86_CXX_OPTIMISATION+= --fgcse#
  53 X86_CXX_OPTIMISATION+= -fomit-frame-pointer#
  54
  55 # CPU specific
  56 #X86_CXXFLAGS+= -march=native                                           # Detection automatique arch
  57 #X86_CXX_OPTIMISATION+= -march=core2
  58 X86_CXX_OPTIMISATION+= -march=corei7                            # cuda1
  59 #X86_CXX_OPTIMISATION+= -march=corei7-avx                       # cuda2, mieux?
  60 #X86_CXX_OPTIMISATION+= -march=sandybridge                      # i7 work
  61 #X86_CXX_OPTIMISATION+= -march=ivybridge                        # i7 home
  62 #X86_CXX_OPTIMISATION+= -march=haswell
  63
  64 #######
  65 # ARM #
  66 #######
  67
  68 ARM_CXXFLAGS+=#
  69
  70 #################
  71 # linkage       #
  72 #################
  73
  74 #       Version minimale : vide (sera surcharg�e)
  75 #       Surcharger automatiquement (par exemple en fonction du type de la target)
  76
  77 CXXLDFLAGS+=#
  78
  79 ########################
  80 #               nvcc               #
  81 ########################
  82
  83 #################
  84 # Target       #
  85 #################
  86
  87 # NVCCFLAGS
  88 # https://developer.nvidia.com/cuda-gpus
  89 # Quadro Fx4600 :                               sm_10
  90 # Quadro nvs140M :                              sm_11
  91 # GTX_295 :                                     sm_13
  92 # GTX 580                                               sm_20
  93 # Tesla m2090                                   sm_20
  94 # Quadro6000                                    sm_20
  95 # Quadroplex 7000                               sm_20
  96 # GTX 680                                               sm_30
  97 # Quadro k5000                                  sm_30
  98 # Tesla k20c                                    sm_35
  99 # Gforce titan                                  sm_35
 100 # Quadro k6000                                  sm_35
 101 # Tesla k40                                             sm_35
 102
 103 # NVCCFLAGS possibilities:
 104 #
 105 #       -arch=<compute_xy>                              Generate PTX for capability x.y
 106 #       -code=<sm_xy>                                   Generate binary for capability x.y, by default same as -arch
 107 #       -gencode arch=...,code=...              Same as -arch and -code, but may be repeated (executable is bigger, and compilation time longer)
 108 #
 109 # Target : Syntaxe light (shorcut)
 110 #                : Avantage     : short,easy, quick
 111 #                : Incovenient  : only one target can be specify!
 112 #
 113 NVCCFLAGS+= -arch=sm_20
 114 #
 115 # Target : Syntaxe full
 116 #                : Avantage     : many target can be specify!
 117 #                :                              : Allow jit compilation at runtime, for deploying in a gpu with higher arch than arch use for developpement.
 118 #
 119 #                : Option : -arch : Virtual architecture
 120 #                                                 : generate PTX (ptx code is the same as byte code in java)
 121 #                                                 : Like java,  a jit compilation at runtime is performed.
 122 #                                                 : Jit compilatioonce is performed only once, fortunately a cache is used to persist a final binary image.
 123 #
 124 #                : Option : -code : Real architecture
 125 #                                                 : nvcc embeded a compiled code image in the executable for each specified architecture -arch,
 126 #                                                 : Code image is a true binary load image for each real architecture (present at compilation time?), and ptx code for earch virtual architecture (not present at compilation time?)
 127 #
 128 #
 129 #
 130 # Enable or disable all following lignes
 131 #NVCCFLAGS+= -gencode arch=compute_20,code=sm_20 #idem NVCCFLAGS+= -arch=sm_20
 132 #NVCCFLAGS+= -gencode arch=compute_30,code=sm_30 #idem NVCCFLAGS+= -arch=sm_30
 133 #NVCCFLAGS+= -gencode arch=compute_35,code=sm_35 #idem NVCCFLAGS+= -arch=sm_35
 134 #NVCCFLAGS+= -gencode arch=compute_50,code=sm_50 #idem NVCCFLAGS+= -arch=sm_50
 135
 136 NVCCFLAGS+= -m64 #
 137
 138 #################
 139 # Optimisation  #
 140 #################
 141
 142 NVCCFLAGS+= -use_fast_math      #idem -ftz=true -prec_div=false -prec_sqrt=false
 143 NVCCFLAGS+= --fmad=true #
 144
 145 #################
 146 # linkage       #
 147 #################
 148
 149 #       Version minimale : vide (sera surcharg�e)
 150 #       Surcharger automatiquement (par exemple en fonction du type de la target)
 151
 152 NVCCLDFLAGS+=#
 153
 154 #################
 155 # debug         #
 156 #################
 157
 158 #decommneter la ligne pour
 159 #       (1) empecher l'optimisation
 160 #       (2) activer les flag de debug
 161
 162 #DEBUG=true
 163
 164 #NVCCFLAGS+= -dryrun
 165 #NVCCFLAGS+= -v
 166
 167 #################
 168 # extension     #
 169 #################
 170
 171 #Injection de variable dans le code
 172 #       same as #define XXX YYY
 173 #       same as -DXXX
 174 #Attention, pas mettre -D, ni #define, que XXX
 175
 176 CODE_DEFINE_VARIABLES+=#
 177
 178 # Injection automatique de lib windows standard
 179 #               true (ou n'importe quoi) pour activer!
 180 #               rien pour desactiver
 181 #               Version minimale : vide (avec # pour le montrer)
 182
 183 EXCLUDE_LIBRARY_FILES+=#
 184 ADD_LIBRARY_FILES+=#
 185
 186 ###############################################
 187 #                                       End                                               #
 188 ###############################################
 189
 190 endif#__CUDA_GCC_PUBLIC_OPTION_MK__