Ajout de l'ensemble du workspace.
[GPU.git] / WCudaMSE / BilatTools_Cuda / src / core / cudatools / header / device / reduction / reductionADDLock.h
1 #ifndef REDUCTION_ADD_LOCK_H_
2 #define REDUCTION_ADD_LOCK_H_
3
4 #include "Indice1D.h"
5 #include "Lock.h"
6
7 /*----------------------------------------------------------------------*\
8 |* Declaration *|
9 \*---------------------------------------------------------------------*/
10
11 /*--------------------------------------*\
12 |* Imported *|
13 \*-------------------------------------*/
14
15
16
17 /*--------------------------------------*\
18 |* Public *|
19 \*-------------------------------------*/
20
21 /**
22 * Hyp
23 * (H1) atomicAdd n'existe pas pour T , sinon utiliser la version in reductionADD.h
24 * (H2) tabBlock size is a power of 2
25 * (H3) tabBlock is already fill with data
26 *
27 * Usage LockGPU
28 *
29 * // variable global .cu
30 * __device__ int mutex=0; // Attention à l'initialisation
31 *
32 * // Variable local inside kernel (same .cu as variable mutex)
33 * LockGPU lock=LockGPU(&mutex);
34 */
35 template <typename T>
36 __device__ void reductionADD(T* tabBlock, T* ptrDevResultat,Lock* ptrlock);
37
38 /*--------------------------------------*\
39 |* Private *|
40 \*-------------------------------------*/
41
42 template <typename T>
43 static __device__ void reductionIntraBlock(T* tabBlock);
44
45 template <typename T>
46 static __device__ void reductionInterblock(T* tabBlock, T* ptrDevResultat,Lock* ptrlock);
47
48 /*----------------------------------------------------------------------*\
49 |* Implementation *|
50 \*---------------------------------------------------------------------*/
51
52 /*--------------------------------------*\
53 |* Public *|
54 \*-------------------------------------*/
55
56 template <typename T>
57 __device__ void reductionADD(T* tabBlock, T* ptrDevResultat,Lock* ptrlock)
58 {
59 reductionIntraBlock(tabBlock);
60
61 __syncthreads();
62
63 reductionInterblock(tabBlock, ptrDevResultat,ptrlock);
64 }
65
66 /*--------------------------------------*\
67 |* Private *|
68 \*-------------------------------------*/
69
70 /**
71 * Hyp :
72 * (H1) length(tabBlock) est une puissance de 2 ! ie blockDim.x est une puissance de 2
73 * (H2) gride et Block de dim 1
74 * (H3) T est un type simple suppoter par atomicAdd
75 *
76 */
77 template <typename T>
78 __device__ void reductionIntraBlock(T* tabBlock)
79 {
80
81 //v1
82 // int midle = blockDim.x / 2;
83 // int tidLocal = threadIdx.x;
84 //
85 // // int tidLocal = Indice1D::tidLocal()
86 //
87 // while (midle >= 1)
88 // {
89 //
90 // if (tidLocal < midle)
91 // {
92 // tabBlock[tidLocal] += tabBlock[tidLocal + midle];
93 // }
94 //
95 // __syncthreads();
96 //
97 // //midle /= 2;
98 // midle>>=1;
99 // }
100
101 //v2
102 int midle = blockDim.x / 2;
103 int tidLocal = threadIdx.x;
104
105 //a 64 on ne divise plus et on a besoin de 32 thread pour finir de reduire le 64 premières cases
106 while (midle >= 64)
107 {
108
109 if (tidLocal < midle)
110 {
111 tabBlock[tidLocal] += tabBlock[tidLocal + midle];
112 }
113
114 __syncthreads();
115
116 //midle /= 2;
117 midle>>=1;
118 }
119
120 // Utilisation des 32 thread d'un warp pour finir la reduction
121 if(tidLocal<32)
122 {
123 // no __syncthreads() necessary after exah of the following lines as long as we acces the data via a pointzer decalred as volatile
124 // because teh 32 therad in each warp execute in a locked-step with each other
125 volatile T* ptrData=tabBlock;
126
127 ptrData[tidLocal]+=ptrData[tidLocal+32];
128 ptrData[tidLocal]+=ptrData[tidLocal+16];
129 ptrData[tidLocal]+=ptrData[tidLocal+8];
130 ptrData[tidLocal]+=ptrData[tidLocal+4];
131 ptrData[tidLocal]+=ptrData[tidLocal+2];
132 ptrData[tidLocal]+=ptrData[tidLocal+1];
133 }
134
135 }
136
137 /**
138 * Hyp : ptrDevResultat iniotaiuliasé avec 0 !!
139 */
140 template <typename T>
141 __device__ void reductionInterblock(T* tabBlock, T* ptrDevResultat,Lock* ptrlock)
142 {
143 if (threadIdx.x == 0)
144 {
145 ptrlock->lock();
146 *ptrDevResultat+= tabBlock[0];
147 ptrlock->unlock();
148 }
149 }
150
151 #endif
152
153 /*----------------------------------------------------------------------*\
154 |* End *|
155 \*---------------------------------------------------------------------*/