X-Git-Url: http://git.euphorik.ch/index.cgi?a=blobdiff_plain;f=WCudaMSE%2FBilatTools_Cuda%2Fsrc%2Fcore%2Fcudatools%2Fcpp%2FDevice.cpp;fp=WCudaMSE%2FBilatTools_Cuda%2Fsrc%2Fcore%2Fcudatools%2Fcpp%2FDevice.cpp;h=cd0d79885a158d612466bfe57e8f15f8afa8c074;hb=8d08c12b29c2a14684f35c023ee39e694bb80d25;hp=0000000000000000000000000000000000000000;hpb=226de81f7e1f1fbf4ac79d0d089e8a05ec7159a0;p=GPU.git diff --git a/WCudaMSE/BilatTools_Cuda/src/core/cudatools/cpp/Device.cpp b/WCudaMSE/BilatTools_Cuda/src/core/cudatools/cpp/Device.cpp new file mode 100755 index 0000000..cd0d798 --- /dev/null +++ b/WCudaMSE/BilatTools_Cuda/src/core/cudatools/cpp/Device.cpp @@ -0,0 +1,712 @@ +#include +#include +#include +#include + +#include "Device.h" +#include "cudaTools.h" +#include "Chronos.h" +#include "omp.h" +#include "StringTools.h" + +using std::string; +using std::cout; +using std::cerr; +using std::endl; +using std::flush; + +/*----------------------------------------------------------------------*\ + |* Declaration *| + \*---------------------------------------------------------------------*/ + +/*--------------------------------------*\ + |* Public *| + \*-------------------------------------*/ + +/*--------------------------------------*\ + |* Private *| + \*-------------------------------------*/ + +static void cout01(int isTrue); +static void coutBool(bool isFlag); +static int dim(const dim3& dim); + +/*----------------------------------------------------------------------*\ + |* Implementation *| + \*---------------------------------------------------------------------*/ + +/*--------------------------------------*\ + |* Public *| + \*-------------------------------------*/ + +/*--------------*\ +|* wrapper *| + \*-------------*/ + +void Device::synchronize(void) + { + cudaDeviceSynchronize(); + } + +/*--------------*\ +|* Tools *| + \*-------------*/ + +void Device::checkKernelError(const char *message) + { + cudaError_t error = cudaGetLastError(); + if (error != cudaSuccess) + { + if (message != NULL) + { + fprintf(stderr, "\n[CUDA ERROR] : Kernel Execution Failed : %s: %s\n\n", message, cudaGetErrorString(error)); + } + else + { + fprintf(stderr, "\n[CUDA ERROR] : Kernel Execution Failed : %s\n\n", cudaGetErrorString(error)); + } + exit (EXIT_FAILURE); + } + } + +void Device::checkDimError(const dim3& dg, const dim3& db) + { + assertDim(dg, db); + checkDimOptimiser(dg, db); + } + +void Device::checkDimOptimiser(const dim3& dg, const dim3& db) + { + int mpCount = Device::getMPCount(); + int warpSize = Device::getWarpSize(); + + int nbBlock = dim(dg); + int nbThreadBlock = dim(db); + + // grid + if (nbBlock < mpCount || nbBlock % mpCount != 0) + { + string messageGrid = "nbBlock = " + StringTools::toString(nbBlock) + " : Heuristic : For best performance, for nbBlock, try a multiple of #MP=" + + StringTools::toString(mpCount); + + cout << "dg" << "(" << dg.x << "," << dg.y << "," << dg.z << ") " << messageGrid << endl; + } + + // block + if (nbThreadBlock < warpSize || nbThreadBlock % warpSize != 0) // TODO kepler + { + string messageBlock = "nbThreadBlock = " + StringTools::toString(nbThreadBlock) + + " : Heuristic : For best performance, for nbThreadBlock, try a multiple of |warp|=" + StringTools::toString(warpSize); + + cout << "db" << "(" << db.x << "," << db.y << "," << db.z << ") " << messageBlock << endl; + } + } + +void Device::assertDim(const dim3& dg, const dim3& db) + { + // grid + { + dim3 dimGridMax = Device::getMaxGridDim(); + + assert(dg.x <= dimGridMax.x); + assert(dg.y <= dimGridMax.y); + assert(dg.z <= dimGridMax.z); + } + + // block + { + dim3 dimBlockMax = Device::getMaxBlockDim(); + + assert(db.x <= dimBlockMax.x); + assert(db.y <= dimBlockMax.y); + assert(db.z <= dimBlockMax.z); + } + + // Thread per block + assert(dim(db) <= getMaxThreadPerBlock()); + } + +void Device::print(const dim3& dg, const dim3& db) + { + cout << "dg" << "(" << dg.x << "," << dg.y << "," << dg.z << ") " << endl; + cout << "db" << "(" << db.x << "," << db.y << "," << db.z << ") " << endl; + } + +int Device::dim(const dim3& dim) + { + return dim.x * dim.y * dim.z; + } + +int Device::nbThread(const dim3& dg, const dim3& db) + { + return dim(dg) * dim(db); + } + +/*--------------*\ +|* get *| + \*-------------*/ + +int Device::getDeviceId(void) + { + int deviceId; + HANDLE_ERROR(cudaGetDevice(&deviceId)); + + return deviceId; + } + +int Device::getDeviceCount(void) + { + int nbDevice; + HANDLE_ERROR(cudaGetDeviceCount(&nbDevice)); + + return nbDevice; + } + +cudaDeviceProp Device::getDeviceProp(int idDevice) + { + cudaDeviceProp prop; + HANDLE_ERROR(cudaGetDeviceProperties(&prop, idDevice)); + return prop; + } + +cudaDeviceProp Device::getDeviceProp(void) + { + return getDeviceProp(getDeviceId()); + } + +/*--------------------------------------*\ + |* Secondaire *| + \*-------------------------------------*/ + +/*--------------*\ +|* get *| + \*-------------*/ + +string Device::getNameSimple(int idDevice) + { + return getDeviceProp(idDevice).name; + } + +string Device::getNameSimple() + { + return getNameSimple(getDeviceId()); + } + +string Device::getName(int idDevice) + { + string id = StringTools::toString(idDevice); + string a = StringTools::toString(getCapacityMajor(idDevice)); + string b = StringTools::toString(getCapacityMinor(idDevice)); + + return "[" + getNameSimple(idDevice) + "] id = " + id + " sm=" + a + "" + b; + } + +string Device::getName() + { + return getName(getDeviceId()); + } + +dim3 Device::getMaxGridDim(int idDevice) + { + cudaDeviceProp prop = getDeviceProp(idDevice); + + return dim3(prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]); + } + +dim3 Device::getMaxGridDim() + { + return getMaxGridDim(getDeviceId()); + } + +dim3 Device::getMaxBlockDim(int idDevice) + { + cudaDeviceProp prop = getDeviceProp(idDevice); + + return dim3(prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]); + } + +dim3 Device::getMaxBlockDim() + { + return getMaxBlockDim(getDeviceId()); + } + +int Device::getMaxThreadPerBlock(int idDevice) + { + return getDeviceProp(idDevice).maxThreadsPerBlock; + } + +int Device::getMaxThreadPerBlock() + { + return getMaxThreadPerBlock(getDeviceId()); + } + +int Device::getWarpSize(int idDevice) + { + return getDeviceProp(idDevice).warpSize; + } + +int Device::getWarpSize(void) + { + return getWarpSize(getDeviceId()); + } + +int Device::getMPCount(int idDevice) + { + return getDeviceProp(idDevice).multiProcessorCount; + } + +int Device::getMPCount(void) + { + return getMPCount(getDeviceId()); + } + +int Device::getCapacityMajor(int idDevice) + { + return getDeviceProp(idDevice).major; + } + +int Device::getCapacityMajor() + { + return getCapacityMajor(getDeviceId()); + } + +int Device::getCapacityMinor(int idDevice) + { + return getDeviceProp(idDevice).minor; + } + +int Device::getCapacityMinor() + { + return getCapacityMinor(getDeviceId()); + } + +int Device::getRuntimeVersion() + { + int version = -1; + cudaRuntimeGetVersion(&version); + return version; + + // return CUDART_VERSION; + } + +int Device::getDriverVersion(void) + { + int version = -1; + cudaDriverGetVersion(&version); + return version; + } + +int Device::getAsyncEngineCount(int idDevice) + { + return getDeviceProp(idDevice).asyncEngineCount; + } + +int Device::getAsyncEngineCount() + { + return getAsyncEngineCount(getDeviceId()); + } + +/*--------------*\ +|* is *| + \*-------------*/ + +bool Device::isCuda(void) + { + return getDeviceCount() >= 1; + } + +bool Device::isFermi(int idDevice) + { + int c = getCapacityMajor(idDevice); + + return c >= 2 && c < 3; + } + +bool Device::isFermi() + { + return isFermi(getDeviceId()); + } + +bool Device::isKepler(int idDevice) + { + int c = getCapacityMajor(idDevice); + + return c >= 3 && c < 4; + } + +bool Device::isKepler() + { + return isKepler(getDeviceId()); + } + +/** + * 64 bits only + */ +bool Device::isUVAEnable(int idDevice) + { + return getCapacityMajor() >= 2.0 && getRuntimeVersion() >= 4000; + } + +/** + * 64 bits only + */ +bool Device::isUVAEnable() + { + return isUVAEnable(getDeviceId()); + } + +bool Device::isAtomicShareMemoryEnable(int idDevice) + { + return (getCapacityMajor(idDevice) == 1 && Device::getCapacityMinor(idDevice) >= 2) || getCapacityMajor(idDevice) >= 2; + } + +bool Device::isAtomicShareMemoryEnable() + { + return isAtomicShareMemoryEnable(getDeviceId()); + } + +bool Device::isHostMapMemoryEnable(int idDevice) + { + return getDeviceProp(idDevice).canMapHostMemory; + } + +bool Device::isHostMapMemoryEnable() + { + return isHostMapMemoryEnable(getDeviceId()); + } + +bool Device::isECCEnable(int idDevice) + { + return getDeviceProp(idDevice).ECCEnabled; + } + +bool Device::isECCEnable(void) + { + return isECCEnable(getDeviceId()); + } + +bool Device::isAsyncEngine(int idDevice) + { + return getDeviceProp(idDevice).deviceOverlap; + } + +bool Device::isAsyncEngine(void) + { + return isAsyncEngine(getDeviceId()); + } + +/*--------------------------------------*\ + |* Print *| + \*-------------------------------------*/ + +void Device::print(int idDevice) + { + cudaDeviceProp prop = getDeviceProp(idDevice); + + cout << endl; + cout << "===========================================" << endl; + cout << " " << prop.name << " : id=" << idDevice << " : sm=" << prop.major << "" << prop.minor << " :" << endl; + cout << "===========================================" << endl; + + cout << endl; + cout << "Device id : " << idDevice << endl; + cout << "Name : " << prop.name << endl; + cout << "GPU capability : " << prop.major << "." << prop.minor << "" << endl; + cout << "is Fermi : "; + coutBool(isFermi(idDevice)); + cout << "is Kepler : "; + coutBool(isKepler(idDevice)); + cout << "Clock rate : " << prop.clockRate / 1000 << " MHZ" << endl; + cout << "GPU integrated on MB : "; + cout01(prop.integrated); + // cout << "ComputeMode : " << prop.computeMode << endl; + + cout << endl; + cout << "Kernel : " << endl; + cout << "Limit execution (timeout): "; + cout01(prop.kernelExecTimeoutEnabled); + + cout << endl; + cout << "Memory : " << endl; + cout << "Global Memory : " << prop.totalGlobalMem / 1024 / 1024 << " MB" << endl; + cout << "Constant Memory : " << prop.totalConstMem / 1024 << " KB" << endl; + cout << "Texture1D max size : (" << prop.maxTexture1D << ")" << endl; + cout << "Texture2D max size : (" << prop.maxTexture2D[0] << "," << prop.maxTexture2D[1] << ")" << endl; + cout << "Texture3D max size : (" << prop.maxTexture3D[0] << "," << prop.maxTexture3D[1] << "," << prop.maxTexture3D[2] << ")" << endl; + //cout << "Texture2D Array max Size : (" << ptrProp.maxTexture2DArray[0] << "," << ptrProp.maxTexture2DArray[1] << "," << ptrProp.maxTexture2DArray[2] << ")"<< endl; + cout << "Texture Alignment : " << prop.textureAlignment << " B" << endl; + cout << "Max mem pitch : " << prop.memPitch << endl; + + cout << endl; + cout << "Multiprocesseur(MP) : " << endl; + cout << "MP count : " << prop.multiProcessorCount << endl; + cout << "Shared memory per block : " << prop.sharedMemPerBlock / 1024 << " KB " << endl; + cout << "Register memory per block: " << prop.regsPerBlock / 1024 << " KB " << endl; + cout << "Max threads per block : " << prop.maxThreadsPerBlock << endl; + cout << "Max block dim : (" << prop.maxThreadsDim[0] << "," << prop.maxThreadsDim[1] << "," << prop.maxThreadsDim[2] << ")" << endl; + cout << "Max grid dim : (" << prop.maxGridSize[0] << "," << prop.maxGridSize[1] << "," << prop.maxGridSize[2] << ")" << endl; + cout << "Threads in warp : " << prop.warpSize << endl; + + cout << endl; + cout << "GPU Capacity : " << endl; + cout << "MapHostMemory : "; + cout01(isHostMapMemoryEnable()); + cout << "AtomicOperation sharedMemory : "; + cout01(isAtomicShareMemoryEnable()); + + cout << "UVA (Unified Virtual Addressing) : "; + cout01(isUVAEnable()); + + cout << "ECCEnabled : "; + cout01(prop.ECCEnabled); + + cout << "Concurrent bidirectional copy (DMA only!) : "; + cout01(prop.deviceOverlap); + if (prop.deviceOverlap) + { + cout << "Concurrent bidirectional copy : host->device // device->host : " << getAsyncEngineCount(idDevice) << endl; + } + + cout << "Concurrent Kernels (fermi 16x) : "; + cout01(prop.concurrentKernels); + + cout << endl; + if (Device::getDeviceCount() >= 2) + { + printP2PmatrixCompatibility(); + } + + cout << "Cuda Runtime version : " << getRuntimeVersion() << endl; + cout << "Cuda Driver version : " << getDriverVersion() << endl; + cout << endl; + // cout << "=================== end ========================" << endl; + +// cout << endl; +// cout << "===========================================" << endl; +// cout << "Cuda Runtime version : " << getRuntimeVersion() << endl; +// cout << "===========================================" << endl; +// cout << endl; + } + +void Device::print() + { + print(getDeviceId()); + } + +void Device::printAll() + { + cout << "\nList of all GPU available :" << endl; + printAllSimple(); + + cout << endl << "Details :" << endl; + int deviceCount = getDeviceCount(); + + for (int id = 0; id < deviceCount; id++) + { + print(id); + } + + printCurrent(); + } + +void Device::printAllSimple() + { + cout << endl; + cout << "==============================================================" << endl; + cout << "[CUDA] : List GPU Available : cuda version = " << getRuntimeVersion() <= 2) + { + cout << "P2P enable : symetric matrix (" << n << "x" << n << "):" << endl; + } + + for (int i = 0; i < n; i++) + { + for (int j = 0; j < n; j++) + { + if (i != j) + { + cout << *ptrMatrixP2PCompatibility << " "; + } + else + { + cout << " "; + } + + if (*ptrMatrixP2PCompatibility) + { + int flaginutile = 0; + HANDLE_ERROR(cudaSetDevice(i)); + HANDLE_ERROR(cudaDeviceEnablePeerAccess(j, flaginutile)); + } + ptrMatrixP2PCompatibility++; + } + } + + delete[] matrixP2PCompatibility; + } + +int* Device::p2pMatrixCompatibility() + { + int n = Device::getDeviceCount(); + + int* matrixP2PCompatibility = new int[n * n]; + int* ptrMatrixP2PCompatibility = matrixP2PCompatibility; + + for (int i = 0; i < n; i++) + { + for (int j = 0; j < n; j++) + { + int isP2PAutorized01; + HANDLE_ERROR(cudaDeviceCanAccessPeer(&isP2PAutorized01, i, j)); + + *ptrMatrixP2PCompatibility++ = isP2PAutorized01; + } + } + + return matrixP2PCompatibility; + } + +/*--------------------------------------*\ + |* Private *| + \*-------------------------------------*/ + +void cout01(int isTrue) + { + if (isTrue) + cout << "True" << endl; + else + cout << "False" << endl; + } + +void coutBool(bool isFlag) + { + if (isFlag) + cout << "True" << endl; + else + cout << "False" << endl; + } + +/*----------------------------------------------------------------------*\ + |* End *| + \*---------------------------------------------------------------------*/ +