Ajout de l'ensemble du workspace.
[GPU.git] / WCudaMSE / Student_Cuda / src / cpp / core / 01_Hello / 02_hello_add.cu
diff --git a/WCudaMSE/Student_Cuda/src/cpp/core/01_Hello/02_hello_add.cu b/WCudaMSE/Student_Cuda/src/cpp/core/01_Hello/02_hello_add.cu
new file mode 100755 (executable)
index 0000000..70f3908
--- /dev/null
@@ -0,0 +1,104 @@
+// Attention : Extension .cu\r
+\r
+#include <iostream>\r
+#include <stdio.h>\r
+#include "cudaTools.h"\r
+#include "Device.h"\r
+\r
+using std::cout;\r
+using std::endl;\r
+\r
+/*----------------------------------------------------------------------*\\r
+ |*                    Declaration                                     *|\r
+ \*---------------------------------------------------------------------*/\r
+\r
+/*--------------------------------------*\\r
+ |*            Imported                *|\r
+ \*-------------------------------------*/\r
+\r
+/*--------------------------------------*\\r
+ |*            Public                  *|\r
+ \*-------------------------------------*/\r
+\r
+__host__  bool isAddScalarGPU_Ok(void);\r
+\r
+/*--------------------------------------*\\r
+ |*            Private                 *|\r
+ \*-------------------------------------*/\r
+\r
+__host__  static int addScalarGPU(int a, int b);\r
+__global__ static void addScalar(int a, int b, int* ptrC);\r
+\r
+/*----------------------------------------------------------------------*\\r
+ |*                    Implementation                                  *|\r
+ \*---------------------------------------------------------------------*/\r
+\r
+/*--------------------------------------*\\r
+ |*            Public                  *|\r
+ \*-------------------------------------*/\r
+\r
+__host__  bool isAddScalarGPU_Ok(void)\r
+    {\r
+    cout << endl << "[Hello Cuda 2]" << endl;\r
+\r
+    int a = 2;\r
+    int b = 7;\r
+\r
+    int sumGPU = addScalarGPU(a, b);\r
+    int sumTheorique = a + b;\r
+\r
+    cout <<"\n[CPU] "<< a << " + " << b << " = " << sumGPU << endl;\r
+\r
+    return sumGPU == sumTheorique;\r
+    }\r
+\r
+/*--------------------------------------*\\r
+ |*            Private                 *|\r
+ \*-------------------------------------*/\r
+\r
+__host__ int addScalarGPU(int a, int b)\r
+    {\r
+    int c;\r
+    int* ptrC=&c;      // on host (CPU)\r
+    int* ptrDev_c;     // on device (GPU)\r
+\r
+    // Specifier nb thread : ici 1 thread au total !\r
+    dim3 dg = dim3(1,1,1);\r
+    dim3 db = dim3(1, 1, 1);\r
+\r
+    // Debug\r
+    //Device::print(dg, db);\r
+     Device::checkDimError(dg,db);\r
+\r
+    size_t size=sizeof(int);\r
+    HANDLE_ERROR(cudaMalloc((void**) &ptrDev_c, size)); // Device memory allocation (*)\r
+\r
+    addScalar<<<dg,db>>>(a,b,ptrDev_c); // asynchrone !!\r
+    Device::checkKernelError("addScalar"); // facultatif\r
+\r
+    //v1\r
+    Device::synchronize();// Pour printf sur  GPU\r
+\r
+    //v2\r
+   // cudaDeviceSynchronize(); // Pour printf sur  GPU\r
+\r
+    // memoryManagement => barrier de synchronisation\r
+    HANDLE_ERROR(cudaMemcpy(ptrC, ptrDev_c, size, cudaMemcpyDeviceToHost));// Device -> Host\r
+    HANDLE_ERROR(cudaFree(ptrDev_c)); // device dispose memory in (*)\r
+\r
+    return c;\r
+    }\r
+\r
+\r
+__global__ void addScalar(int a, int b, int* ptrC)\r
+    {\r
+    *ptrC = a + b;\r
+\r
+    // debug\r
+    printf("[GPU] %d + %d = %d",a,b, *ptrC);\r
+    }\r
+\r
+/*----------------------------------------------------------------------*\\r
+ |*                    End                                             *|\r
+ \*---------------------------------------------------------------------*/\r
+\r