Documentation/ma_autocount/plasmodium-autocount-0.1.py

   1 # -*- coding: utf-8 -*-
   2 #!/usr/bin/env python
   3
   4 #    Copyright (C) 2010 Monash University
   5 #    This program is free software; you can redistribute it and/or modify
   6 #    it under the terms of the GNU General Public License as published by
   7 #    the Free Software Foundation; either version 2 of the License, or
   8 #    (at your option) any later version.
   9 #
  10 #    This program is distributed in the hope that it will be useful,
  11 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 #    GNU General Public License for more details.
  14 #
  15 #    You should have received a copy of the GNU General Public License
  16 #    along with this program; if not, write to the Free Software
  17 #    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18
  19
  20 """
  21
  22 Identify red blood cells and red blood cells infected with plasmodium in a set of images.
  23
  24 """
  25
  26 from PIL import Image
  27 import sys, numpy, os, random
  28 from scipy import ndimage
  29
  30 Help = """
  31
  32 Usage:
  33
  34     python %s [parameter adjustments] <image_directory>
  35
  36 Images should be in TIFF, PNG, or BMP format.
  37
  38 Results will be placed in a new directory named <image_directory>_output.
  39
  40 If the images were annotated using Cell Counting Aid, the hand annotations
  41 will be shown as circles in the output.
  42
  43 Parameter adjustments take the form
  44
  45   variable_name=value
  46
  47 See the top of the source code for parameters and their default values.
  48 These have been tuned for our microscope setup, optimal parameters for
  49 your setup may differ.
  50
  51 """
  52
  53 # How many subprocesses to run at once
  54 N_processors = 1
  55
  56 # ===========
  57 # Diagnostics
  58 #
  59 # What to show in diagnostic output images
  60
  61 Show_edges = True
  62 Show_stain = False
  63 Show_infection = False
  64
  65 Show_hand_marks = False
  66 Show_computer_marks = True
  67 Show_cell_shapes = True
  68
  69
  70 # =================
  71 # Image scaling
  72
  73 Resize = False
  74
  75 # Tailles pour les images dans '08.10.2015'
  76 Resize_width = 1296 #680
  77 Resize_height = 972 #512
  78
  79 # Tailles pour les images dans '22.05.2015' (50%)
  80 #Resize_width = 1296
  81 #Resize_height = 972
  82
  83
  84 # [GB] Hemato
  85 # Resize_width = 2864
  86 # Resize_height = 2885
  87
  88
  89 # ==========================================
  90 # Image enhancement and fg/bg discrimination
  91 #
  92 # Blur slightly to reduce noise, and unsharp-mask
  93 #
  94 # gaussian_blur(image, a) - b * gaussian_blur(image, c)
  95
  96 Enhance_a = 1.0
  97 Enhance_b = 0.75
  98 Enhance_c = 20.0
  99
 100 # Larger = enlarge background area, smaller = enlarge foreground area
 101 Background_weight = 0.915 #&median
 102
 103
 104 # =============================
 105 # Stain and infection detection
 106
 107 Dark_stain_level = 0.839
 108
 109 Stain_level = 0.9
 110 Stain_bg_blur = 10.0
 111 Stain_spread_required = 3.0
 112
 113 Infection_level = 0.762
 114 Infection_bg_blur = 2.0
 115 Infection_pixels_required = 1
 116
 117
 118
 119 # ====================
 120 # Cell size parameters
 121
 122 # Size of cells, for Hough transform
 123 Hough_min_radius = 5
 124 Hough_max_radius = 32 # 20
 125
 126 # Portion of cell used for classification as infected/weird
 127 # (may be abutting an infected cell, so be cautious)
 128 # (as a proportion of the true radius)
 129 Cell_inside_radius = 0.9
 130
 131 # Do not allow another cell center to be detected within this radius
 132 Cell_suppression_radius = 1.05 # 1.25
 133
 134
 135 # =======================
 136 # Peculiar cell filtering
 137
 138 # White blood cells appear to be composed entirely of dark staining, filter them out
 139 Max_dark_stain_proportion = 0.728
 140
 141 # Minimum/maximum radius (as determined by Hough transform)
 142 Min_cell_radius = 11 # 9
 143 Max_cell_radius = 32 # 20
 144
 145 # Proportion of radius that center of mass can be away from Hough determined center
 146 Max_offcenter = 0.5
 147
 148
 149 _offset_cache = { }
 150 def offsets(inner_radius, outer_radius):
 151     """
 152     Generate a list of offsets for pixels within a circular region
 153     """
 154
 155     key = (inner_radius,outer_radius)
 156     if key not in _offset_cache:
 157         result = [ ]
 158         int_radius = int(outer_radius)+1
 159         for y in xrange(-int_radius,int_radius+1):
 160             for x in xrange(-int_radius,int_radius+1):
 161                 if inner_radius*inner_radius <= y*y+x*x <= outer_radius*outer_radius:
 162                     result.append((y,x))
 163         _offset_cache[key] = numpy.array(result)
 164     return _offset_cache[key]
 165
 166
 167 def clipped_coordinates(inner_radius,outer_radius, y,x, height,width):
 168     """
 169     Coordinates contained within a ring or circle, clipped to rectangular bounds.
 170     """
 171
 172     coords = offsets(inner_radius,outer_radius) + [[y,x]]
 173     valid = (
 174         (coords[:,0] >= 0) &
 175         (coords[:,0] < height) &
 176         (coords[:,1] >= 0) &
 177         (coords[:,1] < width)
 178     )
 179     return coords[valid]
 180
 181
 182 def set(array, y, x, value):
 183     """
 184     Set an element of an array, if it is in bounds, otherwise silently do nothing
 185     """
 186
 187     if y >= 0 and x >= 0 and y < array.shape[0] and x < array.shape[1]:
 188         array[y,x] = value
 189
 190
 191 def mark_hand(array, filename_in):
 192     """
 193     If there is a file of hand marked cells from Cell Counting Aid, mark them.
 194     """
 195
 196     hand_name = os.path.splitext(filename_in)[0] + '.txt'
 197     if not os.path.exists(hand_name): return
 198
 199     f = open(hand_name,'rU')
 200
 201     #Skip header line
 202     f.readline()
 203
 204     for line in f:
 205         parts = line.rstrip('\n').split(',')
 206         x,y,infected = parts[:3]
 207
 208         x = int( int(x)/1005.0*array.shape[1] +0.5)
 209         y = int( int(y)/592.0*array.shape[0] +0.5)
 210         if infected == 'True':
 211             if len(parts) == 3:
 212                 color = (196,0,0)
 213             else:
 214                 infection_type = parts[3]
 215                 if infection_type == '1':
 216                     color = (196, 196, 0)
 217                 elif infection_type == '3':
 218                     color = (196, 0, 0)
 219                 else:
 220                     raise Exception('Unknown infection type')
 221         else:
 222             color = (0,0,196)
 223         for i in (-2,-1,0,1,2):
 224             set(array,y-3,x+i,color)
 225             set(array,y+3,x+i,color)
 226             set(array,y+i,x-3,color)
 227             set(array,y+i,x+3,color)
 228
 229
 230 def gaussian_blur(array, amount):
 231     """
 232     Gaussian blur each channel of a color image.
 233     """
 234     output = numpy.empty(array.shape, array.dtype)
 235     for i in xrange(array.shape[2]):
 236         ndimage.gaussian_filter(array[:,:,i],amount,output=output[:,:,i])
 237     return output
 238
 239
 240 def status(word):
 241     pass
 242     #sys.stdout.write(word+' ')
 243     #sys.stdout.flush()
 244
 245
 246 def process(filename_in, filename_out, filename_coords, name):
 247     # print 'File: ', filename_in
 248
 249     # Load image
 250     i = Image.open(filename_in)
 251
 252     print 'Processing', filename_in, '->', filename_out
 253
 254     # Resize (mostly done for speed increase)
 255     i = i.resize((Resize_width,Resize_height), Image.ANTIALIAS) if Resize else i
 256
 257     # Convert to numpy array
 258     a = numpy.asarray(i).astype('float64') # [GB] Shape: (512, 680, 3)
 259     height = a.shape[0]
 260     width = a.shape[1]
 261
 262     a_gray = numpy.sum(a, 2) # [GB] Somme de chaque composante RGB (non-normalisé)
 263
 264     # Make a copy of the array to doodle various diagnostic markings on
 265     a_output = a.copy()
 266
 267     # If there are hand-marked cells, show them in the output image
 268     if Show_hand_marks:
 269         mark_hand(a_output, filename_in)
 270
 271
 272     # Denoise/unsharp mask
 273     # [GB] (Concerne les trois channels RGB).
 274     a_enhanced = gaussian_blur(a, Enhance_a) - Enhance_b * gaussian_blur(a, Enhance_c)
 275
 276     # Split into foreground and background using k-medians
 277     status('fg/bg')
 278
 279     # [GB] Chaque canal de couleur est linéarisé dans un tableau.
 280     a_raveled = numpy.reshape(a_enhanced,(width*height,a_enhanced.shape[2]))
 281
 282     # [GB] Moyenne de chaque canal.
 283     average = numpy.average(a_raveled, axis=0)
 284
 285     # Initial guess of the medians
 286     color_bg = average * 1.5 # [GB] Plus clair
 287     color_fg = average * 0.5 # [GB] Plus sombre
 288
 289     # Fifty times: (0 to 4).
 290     for i in xrange(5):
 291         d_bg = a_raveled - color_bg[None,:]
 292         d_bg *= d_bg
 293         d_bg = numpy.sum(d_bg, 1)  # [GB] Somme des carrés des couleurs
 294         d_fg = a_raveled - color_fg[None,:]
 295         d_fg *= d_fg
 296         d_fg = numpy.sum(d_fg, 1)
 297         fg = d_fg * Background_weight < d_bg
 298         color_fg = numpy.median(a_raveled[fg,:],axis=0) # [GB] axis = 0 -> par colonne (ordonnées). Le résultat est alors 3 valeurs, une pour chaque couleur
 299         color_bg = numpy.median(a_raveled[~fg,:],axis=0)
 300
 301     fg = numpy.reshape(fg, (height,width)) # [GB] Image binaire du premier plan.
 302
 303     edge = ndimage.maximum_filter(fg,size=3) != ndimage.minimum_filter(fg,size=3) # [GB] image binaire des contours du premier plan.
 304
 305     # [GB] délinéarisation des tableaux résultant du k-median (niveau de gris).
 306     d_fg = numpy.sqrt(numpy.reshape(d_fg, (height,width)))
 307     d_bg = numpy.sqrt(numpy.reshape(d_bg, (height,width)))
 308
 309     # Staining is taken as foreground pixels that differ markedly from the mean foreground color
 310     # (also, they must be darker in the red and green channels)
 311     # The background intensity is simply a useful basis for scaling
 312     # (should be invariant under changes in brightness)
 313     mag_bg = numpy.average(color_bg)
 314
 315     # [GB] Les "dark stain" :
 316     # * sont d'une intensité plus élevée que la moyenne du bg multiplié par un facteur (Dark_stain_level)
 317     # * les valeurs rouges sont plus petites que la mediane du premier-plan pour les rouges
 318     # * les valeurs vertes sont plus petites que la médiane du premier-plan pour les verts
 319     # * appartiennent au foreground
 320     dark_stain = (
 321         (d_fg > mag_bg*Dark_stain_level) &
 322         (a_enhanced[:,:,0] < color_fg[0]) & # [GB] Red channel
 323         (a_enhanced[:,:,1] < color_fg[1]) & # [GB] Green channel
 324         fg
 325     )
 326
 327
 328     fg_float = fg.astype('float64')
 329     green = a[:,:,1] * fg_float
 330     green_local_bg = ndimage.gaussian_filter(green, Stain_bg_blur)
 331     green_local_bg_divisor = ndimage.gaussian_filter(fg_float, Stain_bg_blur)
 332     stain = (
 333         (green * green_local_bg_divisor < green_local_bg * Stain_level) &
 334         fg
 335     )
 336
 337     # Detect infection as lumpiness in the green channel (green is most affected by staining)
 338     # Must also be within a stained region
 339     green_local_bg = ndimage.gaussian_filter(green, Infection_bg_blur)
 340     green_local_bg_divisor = ndimage.gaussian_filter(fg_float, Infection_bg_blur)
 341     infection = (
 342         (green * green_local_bg_divisor < green_local_bg * Infection_level) &
 343         stain
 344     )
 345
 346     # Show edges and infection on output image
 347     if Show_edges:
 348         a_output[edge,:] += 64
 349     if Show_stain:
 350         a_output[stain,2] += 128
 351     if Show_infection:
 352         a_output[infection,0] += 128
 353
 354     #a_output[stain, 1] += 128
 355
 356
 357     # Hough transform
 358     status('hough')
 359
 360     x_edge = ndimage.convolve(a_gray,
 361         [[-1,0,1],
 362          [-2,0,2],
 363          [-1,0,1]])
 364     y_edge = ndimage.convolve(a_gray,
 365         [[-1,-2,-1],
 366          [0,0,0],
 367          [1,2,1]])
 368     mag = numpy.sqrt(x_edge*x_edge+y_edge*y_edge)
 369     x_dir = x_edge / mag
 370     y_dir = y_edge / mag
 371
 372     # Vectorized for enhanced performance and incomprehensibility:
 373     ys = []
 374     xs = []
 375     for y in xrange(height):
 376         for x in xrange(width):
 377             if edge[y,x]:
 378                ys.append(y)
 379                xs.append(x)
 380     ys = numpy.array(ys)
 381     xs = numpy.array(xs)
 382     indexes = numpy.arange(len(ys))
 383
 384     def do_hough(radius):
 385         hough = numpy.zeros((height,width), 'float64')
 386
 387         oys = (ys + y_dir[ys,xs]*radius + 0.5).astype('int')
 388         oxs = (xs + x_dir[ys,xs]*radius + 0.5).astype('int')
 389         valid = (
 390             (oys >= 0) &
 391             (oys < height) &
 392             (oxs >= 0) &
 393             (oxs < width)
 394         )
 395         for i in indexes[valid]:
 396             hough[oys[i],oxs[i]] += 1.
 397
 398         #for y in xrange(height):
 399         #    for x in xrange(width):
 400         #        if edge[y,x]:
 401         #            oy = y + y_dir[y,x]*radius
 402         #            ox = x + x_dir[y,x]*radius
 403         #            ioy = int(oy+0.5)
 404         #            iox = int(ox+0.5)
 405         #            if ioy>0 and iox>0 and ioy<height and iox<width:
 406         #                hough[ioy,iox] += 1.
 407
 408         # Blur the transformed image a little
 409         # (more blurring allows greater deviation from circularity)
 410         # (if you alter this you will also need to alter thresh, below)
 411
 412         hough = ndimage.gaussian_filter(hough, 1.0)
 413
 414         return hough
 415
 416     hough = numpy.zeros((height,width),'float64')
 417     hough_radius = numpy.zeros((height,width),'int')
 418     for i in xrange(Hough_min_radius,Hough_max_radius+1):
 419         #print 'Hough radius', i
 420         this_hough = do_hough(i)
 421         better = this_hough > hough
 422         hough_radius[better] = i
 423         hough[better] = this_hough[better]
 424
 425     # Show result of Hough transform in output image
 426     #a_output[:,:,:] = hough[:,:,None]*20
 427
 428     # Find and classify peaks in Hough transform image
 429     thresh = 1.5 #Hmm
 430     candidates = [ ]
 431     for y in xrange(height):
 432         for x in xrange(width):
 433             if hough[y, x] > thresh:
 434                 candidates.append((hough[y,x], y, x))
 435     candidates.sort(reverse = True)
 436     hough
 437
 438
 439     status('classify')
 440
 441     file_coords = open(filename_coords,'wt')
 442     file_coords.write('Autocount on %dx%d image\n' % (width,height))
 443
 444     suppress = numpy.zeros((height,width), 'bool')
 445     claim_strength = numpy.zeros((height,width), 'float64') + 1e30
 446     cells = [] # [(y,x)]
 447     n = 0
 448     n_infected = 0
 449     for _,y,x in candidates:
 450         radius = hough_radius[y,x]
 451
 452         # Make sure candidate is not near a previously detected cell,
 453         # and does not abut the edge of the image
 454         if not suppress[y,x] and \
 455            y >= radius and x >= radius and y < height-radius and x < width-radius:
 456             coords = clipped_coordinates(0, radius * Cell_inside_radius, y, x, height, width)
 457
 458             #Only include foreground
 459             this_fg = fg[coords[:,0], coords[:,1]] # [GB] Booleans
 460             coords = coords[this_fg]
 461
 462             this_strength = ((coords[:,0]-y)**2 + (coords[:,1]-x)**2)
 463             claim_strength[coords[:,0],coords[:,1]] = numpy.minimum(
 464                 claim_strength[coords[:,0],coords[:,1]],
 465                 this_strength
 466             )
 467
 468             cells.append((y, x, coords, this_strength))
 469
 470
 471         # Suppress detection of cells too close to here
 472         # (note: we do this irrespective of whether we treated this location as a hit)
 473         coords = clipped_coordinates(0,radius * Cell_suppression_radius, y,x,height,width)
 474         suppress[coords[:,0],coords[:,1]] = True
 475
 476         #for oy, ox in offsets(0, radius*Cell_suppression_radius): #suppression_offsets:
 477         #    ny = y+oy
 478         #    nx = x+ox
 479         #    if ny >= 0 and nx >= 0 and ny < height and nx < width:
 480         #        suppress[ny,nx] = True
 481
 482     for y,x, coords, this_strength in cells:
 483         radius = hough_radius[y,x]
 484
 485         if not len(coords): continue # No fg pixels within radius
 486
 487         winners = claim_strength[coords[:,0],coords[:,1]] >= this_strength
 488         coords = coords[winners]
 489
 490         if not len(coords): continue # Did not win any foreground pixels
 491
 492         infected_pixels = 0
 493         stain_pixels = 0
 494         dark_stain_pixels = 0
 495         fg_pixels = 0
 496
 497         stain_x = 0.0
 498         stain_x2 = 0.0
 499         stain_y = 0.0
 500         stain_y2 = 0.0
 501
 502         for ny, nx in coords:
 503             if infection[ny,nx]:
 504                 infected_pixels += 1
 505             if stain[ny,nx]:
 506                 stain_pixels += 1
 507
 508                 stain_x += nx
 509                 stain_x2 += nx*nx
 510                 stain_y += ny
 511                 stain_y2 += ny*ny
 512             if dark_stain[ny,nx]:
 513                 dark_stain_pixels += 1
 514             fg_pixels += 1
 515
 516         mass_y = numpy.average(coords[:,0])
 517         mass_x = numpy.average(coords[:,1])
 518         mass_offset = numpy.sqrt( (mass_y-y)**2 + (mass_x-x)**2 )
 519
 520         is_infected = (infected_pixels >= Infection_pixels_required and stain_pixels >= 1)
 521
 522         if is_infected:
 523             stain_spread = (
 524                 stain_x2/stain_pixels - (stain_x/stain_pixels)**2 +
 525                 stain_y2/stain_pixels - (stain_y/stain_pixels)**2
 526             )
 527             is_infected = stain_spread >= Stain_spread_required
 528
 529         is_peculiar = (
 530             (radius > Max_cell_radius) or
 531             (radius < Min_cell_radius) or
 532             (mass_offset > Max_offcenter * radius) or
 533             (dark_stain_pixels >= fg_pixels*Max_dark_stain_proportion)
 534         )
 535
 536         if is_peculiar:
 537             color = (0,0,0)
 538             file_coords.write('%d,%d,Rejected\n' % (x,y))
 539         elif is_infected:
 540             n += 1
 541             n_infected += 1
 542             color = (255,0,0)
 543             file_coords.write('%d,%d,Infected\n' % (x,y))
 544         else:
 545             n += 1
 546             color = (0,0,255)
 547             file_coords.write('%d,%d,Uninfected\n' % (x,y))
 548
 549         if Show_cell_shapes:
 550             a_output[coords[:,0],coords[:,1]] += [[ random.random()*64+32 for i in (0,1,2) ]]
 551
 552         # Mark detected cell on output image
 553         if Show_computer_marks:
 554             set(a_output, y, x, color)
 555             for i in xrange(1,3):
 556                 set(a_output,y-i,x,color)
 557                 set(a_output,y+i,x,color)
 558                 set(a_output,y,x-i,color)
 559                 set(a_output,y,x+i,color)
 560
 561
 562
 563
 564     file_coords.close()
 565
 566     # Save diagnostic output image
 567     a_output = numpy.clip(a_output,0,255)
 568     i2 = Image.fromarray(a_output.astype('uint8'))
 569     i2.save(filename_out)
 570
 571
 572
 573 def read_ac(filename):
 574     f = open(filename,'rU')
 575     f.readline()
 576     n_total = 0
 577     n_infected = 0
 578     for line in f:
 579         parts = line.rstrip().split(',')
 580         if parts[2] == 'Uninfected':
 581             n_total += 1
 582         elif parts[2] == 'Infected':
 583             n_total += 1
 584             n_infected += 1
 585
 586     return n_total, n_infected
 587
 588
 589 if __name__ == '__main__':
 590     args = sys.argv[1:]
 591
 592     while len(args) > 1:
 593         name, value = args.pop(0).split('=')
 594         value = float(value)
 595         assert hasattr(sys.modules[__name__], name), 'Parameter named %s does not exist' % name
 596         setattr(sys.modules[__name__], name, value)
 597         print name, '=', value
 598
 599     if len(args) != 1:
 600         print Help % sys.argv[0]
 601         sys.exit(1)
 602
 603     if os.path.isfile(args[0]):
 604         in_dir, filename = os.path.split(args[0])
 605         filenames = [ filename ]
 606         is_subprocess = True
 607
 608     else:
 609         in_dir = args[0]
 610         filenames = os.listdir(in_dir)
 611         filenames.sort()
 612         is_subprocess = False
 613
 614     out_dir = os.path.normpath(in_dir) + '_output'
 615     if not os.path.exists(out_dir):
 616        os.mkdir(out_dir)
 617
 618     if not is_subprocess:
 619         for filename in filenames:
 620             root, ext = os.path.splitext(filename)
 621             ac_filename = os.path.join(out_dir, root+'.ac')
 622             if os.path.exists(ac_filename): os.unlink(ac_filename)
 623
 624     coord_filenames = [ ]
 625
 626     n_running = 0
 627
 628     for filename in filenames:
 629         root, ext = os.path.splitext(filename)
 630         if ext.lower() not in ('.bmp','.tif','.tiff','.png'): continue
 631
 632         filename_in = os.path.join(in_dir, filename)
 633         filename_out = os.path.join(out_dir, root+'.tif')
 634         filename_coords = os.path.join(out_dir, root+'.ac')
 635
 636         if is_subprocess or N_processors == 1:
 637             process(filename_in, filename_out, filename_coords, root)
 638         else:
 639             while n_running >= N_processors:
 640                 os.wait()
 641                 n_running -= 1
 642             os.spawnl(os.P_NOWAIT, sys.executable, sys.executable, *(sys.argv[:-1]+[filename_in]))
 643             n_running += 1
 644
 645         coord_filenames.append((filename_coords, root))
 646
 647     while n_running:
 648         os.wait()
 649         n_running -= 1
 650
 651     if not is_subprocess:
 652         result_file = open(os.path.join(out_dir,'results.txt'),'wt')
 653
 654         for filename_coords, root in coord_filenames:
 655             n_total, n_infected = read_ac(filename_coords)
 656             print >> result_file, root, n_total, n_infected
 657
 658         result_file.close()
 659
 660