d97e059b8f4cbb4d28b277f8f73dae24448d86a3
[master-thesis.git] / Documentation / ma_autocount / plasmodium-autocount-0.1.py
1 # -*- coding: utf-8 -*-
2 #!/usr/bin/env python
3
4 # Copyright (C) 2010 Monash University
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19
20 """
21
22 Identify red blood cells and red blood cells infected with plasmodium in a set of images.
23
24 """
25
26 from PIL import Image
27 import sys, numpy, os, random
28 from scipy import ndimage
29
30 Help = """
31
32 Usage:
33
34 python %s [parameter adjustments] <image_directory>
35
36 Images should be in TIFF, PNG, or BMP format.
37
38 Results will be placed in a new directory named <image_directory>_output.
39
40 If the images were annotated using Cell Counting Aid, the hand annotations
41 will be shown as circles in the output.
42
43 Parameter adjustments take the form
44
45 variable_name=value
46
47 See the top of the source code for parameters and their default values.
48 These have been tuned for our microscope setup, optimal parameters for
49 your setup may differ.
50
51 """
52
53 # How many subprocesses to run at once
54 N_processors = 1
55
56 # ===========
57 # Diagnostics
58 #
59 # What to show in diagnostic output images
60
61 Show_edges = True
62 Show_stain = False
63 Show_infection = False
64
65 Show_hand_marks = False
66 Show_computer_marks = True
67 Show_cell_shapes = True
68
69
70 # =================
71 # Image scaling
72
73 Resize = False
74
75 # Tailles pour les images dans '08.10.2015'
76 Resize_width = 1296 #680
77 Resize_height = 972 #512
78
79 # Tailles pour les images dans '22.05.2015' (50%)
80 #Resize_width = 1296
81 #Resize_height = 972
82
83
84 # [GB] Hemato
85 # Resize_width = 2864
86 # Resize_height = 2885
87
88
89 # ==========================================
90 # Image enhancement and fg/bg discrimination
91 #
92 # Blur slightly to reduce noise, and unsharp-mask
93 #
94 # gaussian_blur(image, a) - b * gaussian_blur(image, c)
95
96 Enhance_a = 1.0
97 Enhance_b = 0.75
98 Enhance_c = 20.0
99
100 # Larger = enlarge background area, smaller = enlarge foreground area
101 Background_weight = 0.915 #&median
102
103
104 # =============================
105 # Stain and infection detection
106
107 Dark_stain_level = 0.839
108
109 Stain_level = 0.9
110 Stain_bg_blur = 10.0
111 Stain_spread_required = 3.0
112
113 Infection_level = 0.762
114 Infection_bg_blur = 2.0
115 Infection_pixels_required = 1
116
117
118
119 # ====================
120 # Cell size parameters
121
122 # Size of cells, for Hough transform
123 Hough_min_radius = 5
124 Hough_max_radius = 32 # 20
125
126 # Portion of cell used for classification as infected/weird
127 # (may be abutting an infected cell, so be cautious)
128 # (as a proportion of the true radius)
129 Cell_inside_radius = 0.9
130
131 # Do not allow another cell center to be detected within this radius
132 Cell_suppression_radius = 1.05 # 1.25
133
134
135 # =======================
136 # Peculiar cell filtering
137
138 # White blood cells appear to be composed entirely of dark staining, filter them out
139 Max_dark_stain_proportion = 0.728
140
141 # Minimum/maximum radius (as determined by Hough transform)
142 Min_cell_radius = 11 # 9
143 Max_cell_radius = 32 # 20
144
145 # Proportion of radius that center of mass can be away from Hough determined center
146 Max_offcenter = 0.5
147
148
149 _offset_cache = { }
150 def offsets(inner_radius, outer_radius):
151 """
152 Generate a list of offsets for pixels within a circular region
153 """
154
155 key = (inner_radius,outer_radius)
156 if key not in _offset_cache:
157 result = [ ]
158 int_radius = int(outer_radius)+1
159 for y in xrange(-int_radius,int_radius+1):
160 for x in xrange(-int_radius,int_radius+1):
161 if inner_radius*inner_radius <= y*y+x*x <= outer_radius*outer_radius:
162 result.append((y,x))
163 _offset_cache[key] = numpy.array(result)
164 return _offset_cache[key]
165
166
167 def clipped_coordinates(inner_radius,outer_radius, y,x, height,width):
168 """
169 Coordinates contained within a ring or circle, clipped to rectangular bounds.
170 """
171
172 coords = offsets(inner_radius,outer_radius) + [[y,x]]
173 valid = (
174 (coords[:,0] >= 0) &
175 (coords[:,0] < height) &
176 (coords[:,1] >= 0) &
177 (coords[:,1] < width)
178 )
179 return coords[valid]
180
181
182 def set(array, y, x, value):
183 """
184 Set an element of an array, if it is in bounds, otherwise silently do nothing
185 """
186
187 if y >= 0 and x >= 0 and y < array.shape[0] and x < array.shape[1]:
188 array[y,x] = value
189
190
191 def mark_hand(array, filename_in):
192 """
193 If there is a file of hand marked cells from Cell Counting Aid, mark them.
194 """
195
196 hand_name = os.path.splitext(filename_in)[0] + '.txt'
197 if not os.path.exists(hand_name): return
198
199 f = open(hand_name,'rU')
200
201 #Skip header line
202 f.readline()
203
204 for line in f:
205 parts = line.rstrip('\n').split(',')
206 x,y,infected = parts[:3]
207
208 x = int( int(x)/1005.0*array.shape[1] +0.5)
209 y = int( int(y)/592.0*array.shape[0] +0.5)
210 if infected == 'True':
211 if len(parts) == 3:
212 color = (196,0,0)
213 else:
214 infection_type = parts[3]
215 if infection_type == '1':
216 color = (196, 196, 0)
217 elif infection_type == '3':
218 color = (196, 0, 0)
219 else:
220 raise Exception('Unknown infection type')
221 else:
222 color = (0,0,196)
223 for i in (-2,-1,0,1,2):
224 set(array,y-3,x+i,color)
225 set(array,y+3,x+i,color)
226 set(array,y+i,x-3,color)
227 set(array,y+i,x+3,color)
228
229
230 def gaussian_blur(array, amount):
231 """
232 Gaussian blur each channel of a color image.
233 """
234 output = numpy.empty(array.shape, array.dtype)
235 for i in xrange(array.shape[2]):
236 ndimage.gaussian_filter(array[:,:,i],amount,output=output[:,:,i])
237 return output
238
239
240 def status(word):
241 pass
242 #sys.stdout.write(word+' ')
243 #sys.stdout.flush()
244
245
246 def process(filename_in, filename_out, filename_coords, name):
247 # print 'File: ', filename_in
248
249 # Load image
250 i = Image.open(filename_in)
251
252 print 'Processing', filename_in, '->', filename_out
253
254 # Resize (mostly done for speed increase)
255 i = i.resize((Resize_width,Resize_height), Image.ANTIALIAS) if Resize else i
256
257 # Convert to numpy array
258 a = numpy.asarray(i).astype('float64') # [GB] Shape: (512, 680, 3)
259 height = a.shape[0]
260 width = a.shape[1]
261
262 a_gray = numpy.sum(a, 2) # [GB] Somme de chaque composante RGB (non-normalisé)
263
264 # Make a copy of the array to doodle various diagnostic markings on
265 a_output = a.copy()
266
267 # If there are hand-marked cells, show them in the output image
268 if Show_hand_marks:
269 mark_hand(a_output, filename_in)
270
271
272 # Denoise/unsharp mask
273 # [GB] (Concerne les trois channels RGB).
274 a_enhanced = gaussian_blur(a, Enhance_a) - Enhance_b * gaussian_blur(a, Enhance_c)
275
276 # Split into foreground and background using k-medians
277 status('fg/bg')
278
279 # [GB] Chaque canal de couleur est linéarisé dans un tableau.
280 a_raveled = numpy.reshape(a_enhanced,(width*height,a_enhanced.shape[2]))
281
282 # [GB] Moyenne de chaque canal.
283 average = numpy.average(a_raveled, axis=0)
284
285 # Initial guess of the medians
286 color_bg = average * 1.5 # [GB] Plus clair
287 color_fg = average * 0.5 # [GB] Plus sombre
288
289 # Fifty times: (0 to 4).
290 for i in xrange(5):
291 d_bg = a_raveled - color_bg[None,:]
292 d_bg *= d_bg
293 d_bg = numpy.sum(d_bg, 1) # [GB] Somme des carrés des couleurs
294 d_fg = a_raveled - color_fg[None,:]
295 d_fg *= d_fg
296 d_fg = numpy.sum(d_fg, 1)
297 fg = d_fg * Background_weight < d_bg
298 color_fg = numpy.median(a_raveled[fg,:],axis=0) # [GB] axis = 0 -> par colonne (ordonnées). Le résultat est alors 3 valeurs, une pour chaque couleur
299 color_bg = numpy.median(a_raveled[~fg,:],axis=0)
300
301 fg = numpy.reshape(fg, (height,width)) # [GB] Image binaire du premier plan.
302
303 edge = ndimage.maximum_filter(fg,size=3) != ndimage.minimum_filter(fg,size=3) # [GB] image binaire des contours du premier plan.
304
305 # [GB] délinéarisation des tableaux résultant du k-median (niveau de gris).
306 d_fg = numpy.sqrt(numpy.reshape(d_fg, (height,width)))
307 d_bg = numpy.sqrt(numpy.reshape(d_bg, (height,width)))
308
309 # Staining is taken as foreground pixels that differ markedly from the mean foreground color
310 # (also, they must be darker in the red and green channels)
311 # The background intensity is simply a useful basis for scaling
312 # (should be invariant under changes in brightness)
313 mag_bg = numpy.average(color_bg)
314
315 # [GB] Les "dark stain" :
316 # * sont d'une intensité plus élevée que la moyenne du bg multiplié par un facteur (Dark_stain_level)
317 # * les valeurs rouges sont plus petites que la mediane du premier-plan pour les rouges
318 # * les valeurs vertes sont plus petites que la médiane du premier-plan pour les verts
319 # * appartiennent au foreground
320 dark_stain = (
321 (d_fg > mag_bg*Dark_stain_level) &
322 (a_enhanced[:,:,0] < color_fg[0]) & # [GB] Red channel
323 (a_enhanced[:,:,1] < color_fg[1]) & # [GB] Green channel
324 fg
325 )
326
327
328 fg_float = fg.astype('float64')
329 green = a[:,:,1] * fg_float
330 green_local_bg = ndimage.gaussian_filter(green, Stain_bg_blur)
331 green_local_bg_divisor = ndimage.gaussian_filter(fg_float, Stain_bg_blur)
332 stain = (
333 (green * green_local_bg_divisor < green_local_bg * Stain_level) &
334 fg
335 )
336
337 # Detect infection as lumpiness in the green channel (green is most affected by staining)
338 # Must also be within a stained region
339 green_local_bg = ndimage.gaussian_filter(green, Infection_bg_blur)
340 green_local_bg_divisor = ndimage.gaussian_filter(fg_float, Infection_bg_blur)
341 infection = (
342 (green * green_local_bg_divisor < green_local_bg * Infection_level) &
343 stain
344 )
345
346 # Show edges and infection on output image
347 if Show_edges:
348 a_output[edge,:] += 64
349 if Show_stain:
350 a_output[stain,2] += 128
351 if Show_infection:
352 a_output[infection,0] += 128
353
354 #a_output[stain, 1] += 128
355
356
357 # Hough transform
358 status('hough')
359
360 x_edge = ndimage.convolve(a_gray,
361 [[-1,0,1],
362 [-2,0,2],
363 [-1,0,1]])
364 y_edge = ndimage.convolve(a_gray,
365 [[-1,-2,-1],
366 [0,0,0],
367 [1,2,1]])
368 mag = numpy.sqrt(x_edge*x_edge+y_edge*y_edge)
369 x_dir = x_edge / mag
370 y_dir = y_edge / mag
371
372 # Vectorized for enhanced performance and incomprehensibility:
373 ys = []
374 xs = []
375 for y in xrange(height):
376 for x in xrange(width):
377 if edge[y,x]:
378 ys.append(y)
379 xs.append(x)
380 ys = numpy.array(ys)
381 xs = numpy.array(xs)
382 indexes = numpy.arange(len(ys))
383
384 def do_hough(radius):
385 hough = numpy.zeros((height,width), 'float64')
386
387 oys = (ys + y_dir[ys,xs]*radius + 0.5).astype('int')
388 oxs = (xs + x_dir[ys,xs]*radius + 0.5).astype('int')
389 valid = (
390 (oys >= 0) &
391 (oys < height) &
392 (oxs >= 0) &
393 (oxs < width)
394 )
395 for i in indexes[valid]:
396 hough[oys[i],oxs[i]] += 1.
397
398 #for y in xrange(height):
399 # for x in xrange(width):
400 # if edge[y,x]:
401 # oy = y + y_dir[y,x]*radius
402 # ox = x + x_dir[y,x]*radius
403 # ioy = int(oy+0.5)
404 # iox = int(ox+0.5)
405 # if ioy>0 and iox>0 and ioy<height and iox<width:
406 # hough[ioy,iox] += 1.
407
408 # Blur the transformed image a little
409 # (more blurring allows greater deviation from circularity)
410 # (if you alter this you will also need to alter thresh, below)
411
412 hough = ndimage.gaussian_filter(hough, 1.0)
413
414 return hough
415
416 hough = numpy.zeros((height,width),'float64')
417 hough_radius = numpy.zeros((height,width),'int')
418 for i in xrange(Hough_min_radius,Hough_max_radius+1):
419 #print 'Hough radius', i
420 this_hough = do_hough(i)
421 better = this_hough > hough
422 hough_radius[better] = i
423 hough[better] = this_hough[better]
424
425 # Show result of Hough transform in output image
426 #a_output[:,:,:] = hough[:,:,None]*20
427
428 # Find and classify peaks in Hough transform image
429 thresh = 1.5 #Hmm
430 candidates = [ ]
431 for y in xrange(height):
432 for x in xrange(width):
433 if hough[y, x] > thresh:
434 candidates.append((hough[y,x], y, x))
435 candidates.sort(reverse = True)
436 hough
437
438
439 status('classify')
440
441 file_coords = open(filename_coords,'wt')
442 file_coords.write('Autocount on %dx%d image\n' % (width,height))
443
444 suppress = numpy.zeros((height,width), 'bool')
445 claim_strength = numpy.zeros((height,width), 'float64') + 1e30
446 cells = [] # [(y,x)]
447 n = 0
448 n_infected = 0
449 for _,y,x in candidates:
450 radius = hough_radius[y,x]
451
452 # Make sure candidate is not near a previously detected cell,
453 # and does not abut the edge of the image
454 if not suppress[y,x] and \
455 y >= radius and x >= radius and y < height-radius and x < width-radius:
456 coords = clipped_coordinates(0, radius * Cell_inside_radius, y, x, height, width)
457
458 #Only include foreground
459 this_fg = fg[coords[:,0], coords[:,1]] # [GB] Booleans
460 coords = coords[this_fg]
461
462 this_strength = ((coords[:,0]-y)**2 + (coords[:,1]-x)**2)
463 claim_strength[coords[:,0],coords[:,1]] = numpy.minimum(
464 claim_strength[coords[:,0],coords[:,1]],
465 this_strength
466 )
467
468 cells.append((y, x, coords, this_strength))
469
470
471 # Suppress detection of cells too close to here
472 # (note: we do this irrespective of whether we treated this location as a hit)
473 coords = clipped_coordinates(0,radius * Cell_suppression_radius, y,x,height,width)
474 suppress[coords[:,0],coords[:,1]] = True
475
476 #for oy, ox in offsets(0, radius*Cell_suppression_radius): #suppression_offsets:
477 # ny = y+oy
478 # nx = x+ox
479 # if ny >= 0 and nx >= 0 and ny < height and nx < width:
480 # suppress[ny,nx] = True
481
482 for y,x, coords, this_strength in cells:
483 radius = hough_radius[y,x]
484
485 if not len(coords): continue # No fg pixels within radius
486
487 winners = claim_strength[coords[:,0],coords[:,1]] >= this_strength
488 coords = coords[winners]
489
490 if not len(coords): continue # Did not win any foreground pixels
491
492 infected_pixels = 0
493 stain_pixels = 0
494 dark_stain_pixels = 0
495 fg_pixels = 0
496
497 stain_x = 0.0
498 stain_x2 = 0.0
499 stain_y = 0.0
500 stain_y2 = 0.0
501
502 for ny, nx in coords:
503 if infection[ny,nx]:
504 infected_pixels += 1
505 if stain[ny,nx]:
506 stain_pixels += 1
507
508 stain_x += nx
509 stain_x2 += nx*nx
510 stain_y += ny
511 stain_y2 += ny*ny
512 if dark_stain[ny,nx]:
513 dark_stain_pixels += 1
514 fg_pixels += 1
515
516 mass_y = numpy.average(coords[:,0])
517 mass_x = numpy.average(coords[:,1])
518 mass_offset = numpy.sqrt( (mass_y-y)**2 + (mass_x-x)**2 )
519
520 is_infected = (infected_pixels >= Infection_pixels_required and stain_pixels >= 1)
521
522 if is_infected:
523 stain_spread = (
524 stain_x2/stain_pixels - (stain_x/stain_pixels)**2 +
525 stain_y2/stain_pixels - (stain_y/stain_pixels)**2
526 )
527 is_infected = stain_spread >= Stain_spread_required
528
529 is_peculiar = (
530 (radius > Max_cell_radius) or
531 (radius < Min_cell_radius) or
532 (mass_offset > Max_offcenter * radius) or
533 (dark_stain_pixels >= fg_pixels*Max_dark_stain_proportion)
534 )
535
536 if is_peculiar:
537 color = (0,0,0)
538 file_coords.write('%d,%d,Rejected\n' % (x,y))
539 elif is_infected:
540 n += 1
541 n_infected += 1
542 color = (255,0,0)
543 file_coords.write('%d,%d,Infected\n' % (x,y))
544 else:
545 n += 1
546 color = (0,0,255)
547 file_coords.write('%d,%d,Uninfected\n' % (x,y))
548
549 if Show_cell_shapes:
550 a_output[coords[:,0],coords[:,1]] += [[ random.random()*64+32 for i in (0,1,2) ]]
551
552 # Mark detected cell on output image
553 if Show_computer_marks:
554 set(a_output, y, x, color)
555 for i in xrange(1,3):
556 set(a_output,y-i,x,color)
557 set(a_output,y+i,x,color)
558 set(a_output,y,x-i,color)
559 set(a_output,y,x+i,color)
560
561
562
563
564 file_coords.close()
565
566 # Save diagnostic output image
567 a_output = numpy.clip(a_output,0,255)
568 i2 = Image.fromarray(a_output.astype('uint8'))
569 i2.save(filename_out)
570
571
572
573 def read_ac(filename):
574 f = open(filename,'rU')
575 f.readline()
576 n_total = 0
577 n_infected = 0
578 for line in f:
579 parts = line.rstrip().split(',')
580 if parts[2] == 'Uninfected':
581 n_total += 1
582 elif parts[2] == 'Infected':
583 n_total += 1
584 n_infected += 1
585
586 return n_total, n_infected
587
588
589 if __name__ == '__main__':
590 args = sys.argv[1:]
591
592 while len(args) > 1:
593 name, value = args.pop(0).split('=')
594 value = float(value)
595 assert hasattr(sys.modules[__name__], name), 'Parameter named %s does not exist' % name
596 setattr(sys.modules[__name__], name, value)
597 print name, '=', value
598
599 if len(args) != 1:
600 print Help % sys.argv[0]
601 sys.exit(1)
602
603 if os.path.isfile(args[0]):
604 in_dir, filename = os.path.split(args[0])
605 filenames = [ filename ]
606 is_subprocess = True
607
608 else:
609 in_dir = args[0]
610 filenames = os.listdir(in_dir)
611 filenames.sort()
612 is_subprocess = False
613
614 out_dir = os.path.normpath(in_dir) + '_output'
615 if not os.path.exists(out_dir):
616 os.mkdir(out_dir)
617
618 if not is_subprocess:
619 for filename in filenames:
620 root, ext = os.path.splitext(filename)
621 ac_filename = os.path.join(out_dir, root+'.ac')
622 if os.path.exists(ac_filename): os.unlink(ac_filename)
623
624 coord_filenames = [ ]
625
626 n_running = 0
627
628 for filename in filenames:
629 root, ext = os.path.splitext(filename)
630 if ext.lower() not in ('.bmp','.tif','.tiff','.png'): continue
631
632 filename_in = os.path.join(in_dir, filename)
633 filename_out = os.path.join(out_dir, root+'.tif')
634 filename_coords = os.path.join(out_dir, root+'.ac')
635
636 if is_subprocess or N_processors == 1:
637 process(filename_in, filename_out, filename_coords, root)
638 else:
639 while n_running >= N_processors:
640 os.wait()
641 n_running -= 1
642 os.spawnl(os.P_NOWAIT, sys.executable, sys.executable, *(sys.argv[:-1]+[filename_in]))
643 n_running += 1
644
645 coord_filenames.append((filename_coords, root))
646
647 while n_running:
648 os.wait()
649 n_running -= 1
650
651 if not is_subprocess:
652 result_file = open(os.path.join(out_dir,'results.txt'),'wt')
653
654 for filename_coords, root in coord_filenames:
655 n_total, n_infected = read_ac(filename_coords)
656 print >> result_file, root, n_total, n_infected
657
658 result_file.close()
659
660