X-Git-Url: http://git.euphorik.ch/?p=GPU.git;a=blobdiff_plain;f=WCudaMSE%2FStudent_Cuda%2Fsrc%2Fcpp%2Fcore%2F01b_AddVector%2FAddVector.cu;fp=WCudaMSE%2FStudent_Cuda%2Fsrc%2Fcpp%2Fcore%2F01b_AddVector%2FAddVector.cu;h=1a5b6ebbcb7a98a8935335ef8bc6610d120813c6;hp=0000000000000000000000000000000000000000;hb=19015d26dfb874d075516772ef531ee5e42fa213;hpb=8cb42977ea2d8904795381957474b65f15f46526 diff --git a/WCudaMSE/Student_Cuda/src/cpp/core/01b_AddVector/AddVector.cu b/WCudaMSE/Student_Cuda/src/cpp/core/01b_AddVector/AddVector.cu new file mode 100644 index 0000000..1a5b6eb --- /dev/null +++ b/WCudaMSE/Student_Cuda/src/cpp/core/01b_AddVector/AddVector.cu @@ -0,0 +1,65 @@ +#include + +#include "Indice2D.h" +#include "cudaTools.h" +#include "Device.h" + +using std::cout; +using std::endl; + +static __global__ void add(float* ptrDevV1, float* ptrDevV2, int n, float* ptrDevResult); +static __device__ float work(float v1, float v2); + +__global__ void add(float* ptrDevV1, float* ptrDevV2, int n, float* ptrDevResult) + { + const int NB_THREAD = Indice2D::nbThread(); + const int TID = Indice2D::tid(); + + int s = TID; + + while (s < n) + { + ptrDevResult[s] = work(ptrDevV1[s], ptrDevV2[s]); + s += NB_THREAD; + } + } + +__device__ float work(float v1, float v2) + { + return v1 + v2; + } + +bool addVectors() + { + // Inputs (passé en paramètre de la fonction dans un cas général). + float v1[] = { 1, 2, 3 }; + float v2[] = { 10, 20, 30 }; + + // Outputs (renvoyer de la fonction dans un cas général). + float vRes[3]; + + // Allocation coté GPU. + float* ptrDevV1, *ptrDevV2, *ptrDevVResult = 0; + const size_t vecSize = 3 * sizeof(float); + HANDLE_ERROR(cudaMalloc(&ptrDevV1, vecSize)); + HANDLE_ERROR(cudaMalloc(&ptrDevV2, vecSize)); + HANDLE_ERROR(cudaMalloc(&ptrDevVResult, vecSize)); + + HANDLE_ERROR(cudaMemset(ptrDevV1, 0, vecSize)); + HANDLE_ERROR(cudaMemset(ptrDevV2, 0, vecSize)); + HANDLE_ERROR(cudaMemset(ptrDevVResult, 0, vecSize)); + + HANDLE_ERROR(cudaMemcpy(ptrDevV1, v1, vecSize, cudaMemcpyHostToDevice)); + HANDLE_ERROR(cudaMemcpy(ptrDevV2, v2, vecSize, cudaMemcpyHostToDevice)); + + const dim3 dg(2, 2, 1); + const dim3 db(2, 2, 1); + Device::assertDim(dg, db); + + add<<>>(ptrDevV1, ptrDevV2, 3, ptrDevVResult); + + // Barrière implicite de synchronisation ('cudaMemCpy'). + HANDLE_ERROR(cudaMemcpy(vRes, ptrDevVResult, vecSize, cudaMemcpyDeviceToHost)); + + return vRes[0] == 11 && vRes[1] == 22 && vRes[2] == 33; + }