WCudaMSE/BilatTools_Cuda/src/core/cudatools/header/mixte/LockMixte.h

   1 #ifndef LOCK_MIXTE_H_
   2 #define LOCK_MIXTE_H_
   3
   4 #include "cudaTools.h"
   5
   6 /*----------------------------------------------------------------------*\
   7  |*                     Declaration                                     *|
   8  \*---------------------------------------------------------------------*/
   9
  10 /*--------------------------------------*\
  11  |*             Public                  *|
  12  \*-------------------------------------*/
  13
  14
  15
  16 /**
  17  * General :
  18  *
  19  *      A Mutex will act like something of a traffic signal that governs access to some resources.
  20  *      When a thread reads a 0 from the mutex,it interprets this valu as a "green lignt" indicating that no other thread is using the memory.
  21  *      Therefore, the thread is free to lock the memory and make whatever changes it desires,free of interference from other threads.
  22  *      To lock the memory location in nquestion, the thread writes a 1 to the mutex.
  23  *      This will act as a "red light" for potentially competing threads.
  24  *      The competing threads must then wait until the owner has written a 0 to the mutex beforfe they can attempt to modify the locked memory.
  25  *
  26  * Interet
  27  *      On utilise un lock lorsque l'opération qui doit être synchroniser ne possède pas d'opérateur atomic (comme atomicADD, ...),
  28  *      ou lorsqui'il s'agit de plusieurs opérations à synchroniser (joue alors le role de section critique)
  29  *
  30  * Note :
  31  *
  32  *      Lock ne laisse aucune trace coté host, il s'instancie only coté device: Code moins invasif
  33  *      LockMixte laisse une trace coté host. Code plus invasif
  34  *
  35  * Use (host) :
  36  *
  37  *      LockMixte lock();
  38  *      kernel<<<dg,db>>>(...,lock,...); // pas besoin de memory managment MM
  39  *
  40  * Use (device):
  41  *
  42  *      lock.lock();
  43  *      doSomething();
  44  *      lock.unlock();
  45  *
  46  */
  47 class LockMixte
  48     {
  49
  50         /*--------------------------------------*\
  51          |*             Constructor             *|
  52          \*-------------------------------------*/
  53
  54     public :
  55
  56         LockMixte(void)
  57             {
  58             int state = 0;
  59             HANDLE_ERROR(cudaMalloc((void**) &ptrDev_mutex, sizeof(int)));
  60             HANDLE_ERROR(cudaMemcpy(ptrDev_mutex, &state, sizeof(int), cudaMemcpyHostToDevice));
  61             }
  62
  63         /**
  64          * Observation:
  65          *
  66          *      Comme Lock est passer par valeur au kernel,ie par copie,deux objets au total sont vivants, un sur le cpu et sun sur le gpu.
  67          *      Le destructeur sera donc appeler 2x,la premiere fois, lors de la fin du kernel sur le gpu, la seconde fois sur le cpu.
  68          *      Comme cudaFree peut etre appeler sur le cpu ou gpu, le code ci-dessous est valide, mais sans le HANDLE_ERROR qui n'a aucun sens
  69          *      (affichage d'un message d'error) sur le GPU et qui est donc enlever ici.
  70          *      Heureusement cudaFree peut être appeler plusieurs fois de suite sans probleme,mais un seul appel suffirait!
  71          *
  72          * Attention:
  73          *
  74          *      Sur le GPU, il ne faut jamais passer Lock par valeur, car sinon, la premiere instance detruite, detruit ptrDev_mutex !!
  75          *      et le mutex devient alors inutilisable!
  76          */
  77         ~LockMixte(void)
  78             {
  79            // HANDLE_ERROR(cudaFree(ptrDev_mutex)); // HANDLE_ERROR : pas possible car executer aussi sur le GPU
  80             cudaFree(ptrDev_mutex);
  81             }
  82
  83         /*--------------------------------------*\
  84          |*             Methodes                *|
  85          \*-------------------------------------*/
  86
  87     public :
  88
  89         __device__ void lock(void)
  90             {
  91             // Goal :
  92             //          if (*ptrDev_mutex==0) {*ptrDev_mutex==1}
  93             //          But must be thread safe!
  94             //
  95             // Solution:
  96             //          atomicCAS  = atomic Compare And Swap
  97             //          Prototype :     c atomicCAS(ptr,a,b)
  98             //          Action :        compare ptr avec a, si egale affecte b à ptr, renvoie ptr
  99
 100             // Tant que ptrDev_mutex!=0 le thread cuda boucle sur while
 101             // Des qu'il vaut 0, il met le mutex à 1 et lock se termine
 102             while (atomicCAS(ptrDev_mutex, 0, 1) != 0);
 103             }
 104
 105         __device__ void unlock(void)
 106             {
 107             // Goal :
 108             //          Put 1 in the mutex
 109             //
 110             // Solution 1:
 111             //
 112             //          *ptrDev_mutex=0;
 113             //          Aucun thread en competition ici. L'affectation n'a pas besoin d'être atomique.
 114             //          Solution satisfaisante.
 115             //
 116             // Solution 2 (prefered for symetric approach)
 117             //
 118             //          Une solution atomique
 119
 120             // Echange et renvoie la valeur originale
 121             atomicExch(ptrDev_mutex, 0);
 122             }
 123
 124         /*--------------------------------------*\
 125          |*             Attributs               *|
 126          \*-------------------------------------*/
 127
 128     private :
 129
 130         int* ptrDev_mutex; // Espace adressage GPU, en global memory GM
 131     };
 132
 133 #endif
 134
 135 /*----------------------------------------------------------------------*\
 136  |*                     End                                             *|
 137  \*---------------------------------------------------------------------*/