1 // Attention : Extension .cu
\r
5 #include "cudaTools.h"
\r
11 /*----------------------------------------------------------------------*\
\r
13 \*---------------------------------------------------------------------*/
\r
15 /*--------------------------------------*\
\r
17 \*-------------------------------------*/
\r
19 /*--------------------------------------*\
\r
21 \*-------------------------------------*/
\r
23 __host__ bool isAddScalarGPU_Ok(void);
\r
25 /*--------------------------------------*\
\r
27 \*-------------------------------------*/
\r
29 __host__ static int addScalarGPU(int a, int b);
\r
30 __global__ static void addScalar(int a, int b, int* ptrC);
\r
32 /*----------------------------------------------------------------------*\
\r
33 |* Implementation *|
\r
34 \*---------------------------------------------------------------------*/
\r
36 /*--------------------------------------*\
\r
38 \*-------------------------------------*/
\r
40 __host__ bool isAddScalarGPU_Ok(void)
\r
42 cout << endl << "[Hello Cuda 2]" << endl;
\r
47 int sumGPU = addScalarGPU(a, b);
\r
48 int sumTheorique = a + b;
\r
50 cout <<"\n[CPU] "<< a << " + " << b << " = " << sumGPU << endl;
\r
52 return sumGPU == sumTheorique;
\r
55 /*--------------------------------------*\
\r
57 \*-------------------------------------*/
\r
59 __host__ int addScalarGPU(int a, int b)
\r
62 int* ptrC=&c; // on host (CPU)
\r
63 int* ptrDev_c; // on device (GPU)
\r
65 // Specifier nb thread : ici 1 thread au total !
\r
66 dim3 dg = dim3(1,1,1);
\r
67 dim3 db = dim3(1, 1, 1);
\r
70 //Device::print(dg, db);
\r
71 Device::checkDimError(dg,db);
\r
73 size_t size=sizeof(int);
\r
74 HANDLE_ERROR(cudaMalloc((void**) &ptrDev_c, size)); // Device memory allocation (*)
\r
76 addScalar<<<dg,db>>>(a,b,ptrDev_c); // asynchrone !!
\r
77 Device::checkKernelError("addScalar"); // facultatif
\r
80 Device::synchronize();// Pour printf sur GPU
\r
83 // cudaDeviceSynchronize(); // Pour printf sur GPU
\r
85 // memoryManagement => barrier de synchronisation
\r
86 HANDLE_ERROR(cudaMemcpy(ptrC, ptrDev_c, size, cudaMemcpyDeviceToHost));// Device -> Host
\r
87 HANDLE_ERROR(cudaFree(ptrDev_c)); // device dispose memory in (*)
\r
93 __global__ void addScalar(int a, int b, int* ptrC)
\r
98 printf("[GPU] %d + %d = %d",a,b, *ptrC);
\r
101 /*----------------------------------------------------------------------*\
\r
103 \*---------------------------------------------------------------------*/
\r