From fd0031be0a39a5d902750affaff6322fcd5229b1 Mon Sep 17 00:00:00 2001
From: gburri <gregory.burri@master.hes-so.ch>
Date: Sat, 10 Jan 2015 18:20:27 +0100
Subject: [PATCH] Ajout du support du multi-GPU pour Mandelbrot.

---
 .../moo/device/FractalDevice.cu               | 12 +--
 .../moo/device/FractalDevice.h                |  2 +-
 .../02_Mandelbrot_Julia/moo/host/Fractal.cu   | 76 +++++++++++++++----
 .../02_Mandelbrot_Julia/moo/host/Fractal.h    | 23 ++++--
 .../provider/FractalProvider.cpp              |  8 +-
 .../provider/FractalProvider.h                |  4 +-
 .../src/cpp/core/mainGL.cpp                   | 18 +++--
 7 files changed, 103 insertions(+), 40 deletions(-)

diff --git a/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/moo/device/FractalDevice.cu b/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/moo/device/FractalDevice.cu
index a98ed36..1b326db 100755
--- a/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/moo/device/FractalDevice.cu
+++ b/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/moo/device/FractalDevice.cu
@@ -12,11 +12,11 @@
 using std::cout;
 using std::endl;
 
-__device__ void fractal(uchar4* ptrDevPixels, int w, int h, const DomaineMath& domaineMath, int n, const FractalMath& fractalMath)
+__device__ void fractal(uchar4* ptrDevPixels, int w, int hFrom, int hTo, const DomaineMath& domaineMath, int n, const FractalMath& fractalMath)
     {
     const int TID = Indice2D::tid();
     const int NB_THREAD = Indice2D::nbThread();
-    const int WH = w * h;
+    const int WH = w * (hTo - hFrom);
 
     uchar4 color;
     color.z = 255; // Par défaut, l'image est opaque.
@@ -31,7 +31,7 @@ __device__ void fractal(uchar4* ptrDevPixels, int w, int h, const DomaineMath& d
 
         // (i,j) domaine écran
         // (x,y) domaine math
-        domaineMath.toXY(pixelI, pixelJ, &x, &y); //  (i,j) -> (x,y)
+        domaineMath.toXY(pixelI + hFrom, pixelJ, &x, &y); //  (i,j) -> (x,y)
 
         fractalMath.colorXY(&color, x, y);
 
@@ -41,15 +41,15 @@ __device__ void fractal(uchar4* ptrDevPixels, int w, int h, const DomaineMath& d
         }
     }
 
-__global__ void fractalMandelbrot(uchar4* ptrDevPixels, int w, int h, DomaineMath domaineMath, int n)
+__global__ void fractalMandelbrot(uchar4* ptrDevPixels, int w, int hFrom, int hTo, DomaineMath domaineMath, int n)
     {
     FractalMandelbrotMath fractalMath(n);
-    fractal(ptrDevPixels, w, h, domaineMath, n, fractalMath);
+    fractal(ptrDevPixels, w, hFrom, hTo, domaineMath, n, fractalMath);
     }
 
 __global__ void fractalJulia(uchar4* ptrDevPixels, int w, int h, DomaineMath domaineMath, int n, float c_r, float c_i)
     {
     FractalJuliaMath fractalMath(n, c_r, c_i);
-    fractal(ptrDevPixels, w, h, domaineMath, n, fractalMath);
+    fractal(ptrDevPixels, w, 0, h, domaineMath, n, fractalMath);
     }
 
diff --git a/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/moo/device/FractalDevice.h b/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/moo/device/FractalDevice.h
index 137aab2..44bf422 100644
--- a/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/moo/device/FractalDevice.h
+++ b/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/moo/device/FractalDevice.h
@@ -3,7 +3,7 @@
 
 #include "DomaineMath.h"
 
-__global__ void fractalMandelbrot(uchar4* ptrDevPixels, int w, int h, DomaineMath domaineMath, int n);
+__global__ void fractalMandelbrot(uchar4* ptrDevPixels, int w, int hFrom, int hTo, DomaineMath domaineMath, int n);
 __global__ void fractalJulia(uchar4* ptrDevPixels, int w, int h, DomaineMath domaineMath, int n, float c_r, float c_i);
 
 #endif
diff --git a/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/moo/host/Fractal.cu b/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/moo/host/Fractal.cu
index bfaff4b..67c8b74 100755
--- a/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/moo/host/Fractal.cu
+++ b/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/moo/host/Fractal.cu
@@ -1,17 +1,19 @@
 #include "Fractal.h"
 
 #include <iostream>
+#include <sstream>
 #include <assert.h>
 using namespace std;
 
+#include <omp.h>
+
 #include "FractalDevice.h"
 #include "Device.h"
 
 Fractal::Fractal(int w, int h) :
         w(w), h(h),
         dg(8, 8, 1),
-        db(16, 16, 1),
-        title("Fractal Cuda")
+        db(16, 16, 1)
     {
     //print(dg, db);
     Device::assertDim(dg, db);
@@ -43,22 +45,38 @@ DomaineMath* Fractal::getDomaineMathInit()
     return this->ptrDomaineMathInit;
     }
 
-/**
- * Override
- */
-string Fractal::getTitle()
-    {
-    return this->title;
-    }
-
 /////
 
-FractalMandelbrot::FractalMandelbrot(int w, int h, int dn) :
+FractalMandelbrot::FractalMandelbrot(int w, int h, int dn, bool multiGPU) :
         Fractal(w, h),
         variateurAnimationN(IntervalI(10, 100), dn),
-        n(0)
+        n(0),
+        multiGPU(multiGPU)
     {
+    // Constuit le titre dynamiquement.
+    ostringstream titleStream;
+    titleStream << "Fractal Mandelbrot (multi-GPU " << (this->multiGPU ? "activated" : "not activated") << ")";
+    this->title = titleStream.str();
+
     this->ptrDomaineMathInit = new DomaineMath(-2, -1.3, 0.8, 1.3);
+
+    if (this->multiGPU)
+        {
+        const int nbDevice = Device::getDeviceCount();
+
+        this->hDevices = h / nbDevice;
+        this->hFirstDevice = h - ((nbDevice - 1) * this->hDevices);
+
+        // Allocation de la mémoire sur chaque GPU (sauf le premier pour lequel 'ptrDevPixels' est automatiquement alloué à l'appel de 'runGPU(..)').
+        this->ptrDevPixelsMultGPU = new uchar4*[nbDevice - 1];
+        for (int i = 0; i < nbDevice - 1; ++i)
+            {
+            HANDLE_ERROR(cudaSetDevice(i + 1));
+            HANDLE_ERROR(cudaMalloc(&this->ptrDevPixelsMultGPU[i], sizeof(uchar4) * w * this->hDevices));
+            }
+
+        HANDLE_ERROR(cudaSetDevice(0));
+        }
     }
 
 void FractalMandelbrot::animationStep()
@@ -78,9 +96,36 @@ void FractalMandelbrot::getValues(float* values)
     values[0] = float(this->n);
     }
 
+string FractalMandelbrot::getTitle()
+    {
+    return this->title;
+    }
+
 void FractalMandelbrot::runGPU(uchar4* ptrDevPixels, const DomaineMath& domaineMath)
     {
-    fractalMandelbrot<<<dg,db>>>(ptrDevPixels, this->w, this->h, domaineMath, static_cast<int>(this->n));
+
+    if (this->multiGPU)
+        {
+        HANDLE_ERROR(cudaSetDevice(0));
+        fractalMandelbrot<<<dg,db>>>(ptrDevPixels, this->w, 0, this->hFirstDevice, domaineMath, this->n);
+
+        const int nbDevice = Device::getDeviceCount();
+
+        // Rend chaque tranche par un GPU différent puis copie chaque tranche dans la mémoire du premier GPU.
+        #pragma omp parallel for
+        for (int i = 0; i < nbDevice - 1; ++i)
+            {
+            HANDLE_ERROR(cudaSetDevice(i + 1));
+            fractalMandelbrot<<<dg,db>>>(this->ptrDevPixelsMultGPU[i], this->w, i * this->hDevices + this->hFirstDevice, (i + 1) * this->hDevices + this->hFirstDevice, domaineMath, this->n);
+            HANDLE_ERROR(cudaMemcpy(ptrDevPixels + this->w * this->hFirstDevice + i * this->w * this->hDevices, this->ptrDevPixelsMultGPU[i], sizeof(uchar4) * this->w * this->hDevices, cudaMemcpyDeviceToDevice));
+            }
+
+        HANDLE_ERROR(cudaSetDevice(0));
+        }
+    else
+        {
+        fractalMandelbrot<<<dg,db>>>(ptrDevPixels, this->w, 0, this->h, domaineMath, this->n);
+        }
     }
 
 /////
@@ -116,6 +161,11 @@ void FractalJulia::getValues(float* values)
     values[1] = this->z_i;
     }
 
+string FractalJulia::getTitle()
+    {
+    return "Fractal Julia";
+    }
+
 void FractalJulia::runGPU(uchar4* ptrDevPixels, const DomaineMath& domaineMath)
     {
     fractalJulia<<<dg,db>>>(ptrDevPixels, this->w, this->h, domaineMath, this->n, this->z_r, this->z_i);
diff --git a/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/moo/host/Fractal.h b/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/moo/host/Fractal.h
index 5ad7b83..41813e2 100755
--- a/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/moo/host/Fractal.h
+++ b/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/moo/host/Fractal.h
@@ -21,8 +21,6 @@ class Fractal : public AnimableFonctionel_I
         int getH() /*override*/;
         DomaineMath* getDomaineMathInit() /*override*/;
 
-        string getTitle(void) /*override*/;
-
     protected:
         // Inputs
         const int w;
@@ -33,25 +31,32 @@ class Fractal : public AnimableFonctionel_I
         const dim3 db;
 
         DomaineMath* ptrDomaineMathInit;
-
-        // Outputs
-        const string title;
     };
 
 class FractalMandelbrot : public Fractal
     {
     public:
-        FractalMandelbrot(int w, int h, int dn);
+        FractalMandelbrot(int w, int h, int dn, bool multiGPU = false);
         void animationStep();
 
         std::vector<std::string> getNames();
         void getValues(float* values);
 
+        std::string getTitle();
+
     private:
-        void runGPU(uchar4* ptrDevPixels, const DomaineMath& domaineMath) /*override*/;
+        void runGPU(uchar4* ptrDevPixels, const DomaineMath& domaineMath);
 
         VariateurI variateurAnimationN;
         int n;
+
+        string title;
+
+        // Utilisé uniquement dans le cadre du multi-GPU.
+        bool multiGPU;
+        uchar4** ptrDevPixelsMultGPU; // La mémoire alloué pour les GPU autres que le premier
+        int hFirstDevice; // Hauteur de l'image à traiter par le premier GPU.
+        int hDevices; // Hauteur de l'image à traiter par les autres GPU.
     };
 
 class FractalJulia : public Fractal
@@ -63,8 +68,10 @@ class FractalJulia : public Fractal
         std::vector<std::string> getNames();
         void getValues(float* values);
 
+        std::string getTitle();
+
     private:
-        void runGPU(uchar4* ptrDevPixels, const DomaineMath& domaineMath) /*override*/;
+        void runGPU(uchar4* ptrDevPixels, const DomaineMath& domaineMath);
 
         const int n;
 
diff --git a/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/provider/FractalProvider.cpp b/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/provider/FractalProvider.cpp
index ba94d73..fc82c54 100755
--- a/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/provider/FractalProvider.cpp
+++ b/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/provider/FractalProvider.cpp
@@ -1,11 +1,11 @@
 #include "FractalProvider.h"
 
-Fractal* MandelbrotProvider::create()
+Fractal* MandelbrotProvider::create(bool multiGPU)
     {
     int dw = 16 * 50;
     int dh = 16 * 30;
 
-    return new FractalMandelbrot(dw, dh, 1);
+    return new FractalMandelbrot(dw, dh, 1, multiGPU);
     }
 
 Fractal* JuliaProvider::create()
@@ -16,10 +16,10 @@ Fractal* JuliaProvider::create()
     return new FractalJulia(dw, dh, 300, -0.745, -0.32, -0.09, 0.1);
     }
 
-ImageFonctionel* MandelbrotProvider::createGL()
+ImageFonctionel* MandelbrotProvider::createGL(bool multiGPU)
     {
     ColorRGB_01* ptrColorTitre = new ColorRGB_01(0, 0, 100);
-    return new ImageFonctionel(create(), ptrColorTitre); // both ptr destroy by destructor of ImageFonctionel
+    return new ImageFonctionel(create(multiGPU), ptrColorTitre); // both ptr destroy by destructor of ImageFonctionel
     }
 
 ImageFonctionel* JuliaProvider::createGL()
diff --git a/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/provider/FractalProvider.h b/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/provider/FractalProvider.h
index eca7b3c..f29e4cb 100755
--- a/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/provider/FractalProvider.h
+++ b/WCudaMSE/Student_Cuda_Image/src/cpp/core/02_Mandelbrot_Julia/provider/FractalProvider.h
@@ -7,8 +7,8 @@
 class MandelbrotProvider
     {
     public:
-	static Fractal* create();
-	static ImageFonctionel* createGL();
+	static Fractal* create(bool multiGPU);
+	static ImageFonctionel* createGL(bool multiGPU);
     };
 
 
diff --git a/WCudaMSE/Student_Cuda_Image/src/cpp/core/mainGL.cpp b/WCudaMSE/Student_Cuda_Image/src/cpp/core/mainGL.cpp
index fe29a86..b15ca7b 100755
--- a/WCudaMSE/Student_Cuda_Image/src/cpp/core/mainGL.cpp
+++ b/WCudaMSE/Student_Cuda_Image/src/cpp/core/mainGL.cpp
@@ -19,19 +19,25 @@ template <class TOutput, class TProvider>
 class Viewer
     {
     private:
-        TOutput* ptrProvider;
+        TOutput* ptrOutput;
         GLUTImageViewers viewer;
 
     public:
         Viewer(bool isAnimation, bool isSelection, int pxFrame, int pyFrame):
-            ptrProvider(TProvider::createGL()),
-            viewer(ptrProvider, isAnimation, isSelection, pxFrame, pyFrame)
+        ptrOutput(TProvider::createGL()),
+            viewer(ptrOutput, isAnimation, isSelection, pxFrame, pyFrame)
+            {
+            }
+
+        Viewer(TOutput* output, bool isAnimation, bool isSelection, int pxFrame, int pyFrame):
+            ptrOutput(output),
+            viewer(ptrOutput, isAnimation, isSelection, pxFrame, pyFrame)
             {
             }
 
         ~Viewer()
             {
-            delete this->ptrProvider;
+            delete this->ptrOutput;
             }
     };
 
@@ -39,13 +45,13 @@ int mainGL(void)
     {
     // Viewer<Rippling0Image, Rippling0Provider> rippling0(true, true, 10, 10);
     // Viewer<Image, RipplingProvider> rippling0(true, true, 10, 10);
-    Viewer<ImageFonctionel, MandelbrotProvider> fractalMandelbrot(true, true, 20, 20);
+    Viewer<ImageFonctionel, MandelbrotProvider> fractalMandelbrot(MandelbrotProvider::createGL(true), true, true, 20, 20);
     // Viewer<ImageFonctionel, JuliaProvider> fractalJulia(true, true, 30, 30);
     // Viewer<ImageFonctionel, NewtonProvider> newtown(true, true, 20, 20);
     // Viewer<Image, HeatTransfertProvider> heatTransfert(true, false, 20, 20);
     // Viewer<ImageFonctionel, RayTracingProvider> rayTracing(true, true, 20, 20);
 
-    GLUTImageViewers::runALL(); // Bloquant, Tant qu'une fenetre est ouverte
+    GLUTImageViewers::runALL(); // Bloquant, Tant qu'une fenetre est ouverte.
 
     return EXIT_SUCCESS;
     }
-- 
2.49.0