Save imported image in the same format (WIP)
[master-thesis.git] / Documentation / ma_autocount / plasmodium-autocount-0.1.py
1 # -*- coding: utf-8 -*-
2 #!/usr/bin/env python
3
4 # Copyright (C) 2010 Monash University
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19
20 """
21
22 Identify red blood cells and red blood cells infected with plasmodium in a set of images.
23
24 """
25
26 from PIL import Image
27 import sys, numpy, os, random
28 from scipy import ndimage
29
30 Help = """
31
32 Usage:
33
34 python %s [parameter adjustments] <image_directory>
35
36 Images should be in TIFF, PNG, or BMP format.
37
38 Results will be placed in a new directory named <image_directory>_output.
39
40 If the images were annotated using Cell Counting Aid, the hand annotations
41 will be shown as circles in the output.
42
43 Parameter adjustments take the form
44
45 variable_name=value
46
47 See the top of the source code for parameters and their default values.
48 These have been tuned for our microscope setup, optimal parameters for
49 your setup may differ.
50
51 """
52
53 # How many subprocesses to run at once
54 N_processors = 1
55
56 # ===========
57 # Diagnostics
58 #
59 # What to show in diagnostic output images
60
61 Show_edges = False
62 Show_dark_stain = False
63 Show_stain = False
64 Show_infection = False
65
66 Show_hand_marks = False
67 Show_computer_marks = True
68 Show_cell_shapes = True
69
70
71 # =================
72 # Image scaling
73
74 Resize = False
75
76 # Tailles pour les images dans '08.10.2015'
77 Resize_width = 1296 #680
78 Resize_height = 972 #512
79
80 # Tailles pour les images dans '22.05.2015' (50%)
81 #Resize_width = 1296
82 #Resize_height = 972
83
84
85 # [GB] Hemato
86 # Resize_width = 2864
87 # Resize_height = 2885
88
89
90 # ==========================================
91 # Image enhancement and fg/bg discrimination
92 #
93 # Blur slightly to reduce noise, and unsharp-mask
94 #
95 # gaussian_blur(image, a) - b * gaussian_blur(image, c)
96
97 Enhance_a = 2.0 # base: 1.0, 50x: 2.0, 100x: 4.0
98 Enhance_b = 0.75 # base: 0.75
99 Enhance_c = 40.0 # base: 20.0, 50x: 40.0, 100x: 80.0
100
101 # Larger = enlarge background area, smaller = enlarge foreground area
102 Background_weight = 0.915 #&median
103
104
105 # =============================
106 # Stain and infection detection
107
108 Dark_stain_level = 0.839
109
110 Stain_level = 0.9
111 Stain_bg_blur = 10.0
112 Stain_spread_required = 3.0
113
114 Infection_level = 0.762
115 Infection_bg_blur = 2.0
116 Infection_pixels_required = 1
117
118
119
120 # ====================
121 # Cell size parameters
122
123 # Size of cells, for Hough transform
124 Hough_min_radius = 20 # 50x: 20, 100x: 40
125 Hough_max_radius = 45 # 50x: 45, 100x: 90
126
127 # Portion of cell used for classification as infected/weird
128 # (may be abutting an infected cell, so be cautious)
129 # (as a proportion of the true radius)
130 Cell_inside_radius = 0.9
131
132 # Do not allow another cell center to be detected within this radius
133 Cell_suppression_radius = 0.8 # 1.05
134
135
136 # =======================
137 # Peculiar cell filtering
138
139 # White blood cells appear to be composed entirely of dark staining, filter them out
140 Max_dark_stain_proportion = 0.728
141
142 # Minimum/maximum radius (as determined by Hough transform)
143 Min_cell_radius = 25 # Base: 9, 50x: 25, 100x: 50
144 Max_cell_radius = 40 # Base: 20, 50x: 40, 100x: 80
145
146 # Proportion of radius that center of mass can be away from Hough determined center
147 Max_offcenter = 0.5
148
149
150 _offset_cache = { }
151 def offsets(inner_radius, outer_radius):
152 """
153 Generate a list of offsets for pixels within a circular region
154 """
155
156 key = (inner_radius,outer_radius)
157 if key not in _offset_cache:
158 result = [ ]
159 int_radius = int(outer_radius)+1
160 for y in xrange(-int_radius,int_radius+1):
161 for x in xrange(-int_radius,int_radius+1):
162 if inner_radius*inner_radius <= y*y+x*x <= outer_radius*outer_radius:
163 result.append((y,x))
164 _offset_cache[key] = numpy.array(result)
165 return _offset_cache[key]
166
167
168 def clipped_coordinates(inner_radius,outer_radius, y,x, height,width):
169 """
170 Coordinates contained within a ring or circle, clipped to rectangular bounds.
171 """
172
173 coords = offsets(inner_radius,outer_radius) + [[y,x]]
174 valid = (
175 (coords[:,0] >= 0) &
176 (coords[:,0] < height) &
177 (coords[:,1] >= 0) &
178 (coords[:,1] < width)
179 )
180 return coords[valid]
181
182
183 def set(array, y, x, value):
184 """
185 Set an element of an array, if it is in bounds, otherwise silently do nothing
186 """
187
188 if y >= 0 and x >= 0 and y < array.shape[0] and x < array.shape[1]:
189 array[y,x] = value
190
191
192 def mark_hand(array, filename_in):
193 """
194 If there is a file of hand marked cells from Cell Counting Aid, mark them.
195 """
196
197 hand_name = os.path.splitext(filename_in)[0] + '.txt'
198 if not os.path.exists(hand_name): return
199
200 f = open(hand_name,'rU')
201
202 #Skip header line
203 f.readline()
204
205 for line in f:
206 parts = line.rstrip('\n').split(',')
207 x,y,infected = parts[:3]
208
209 x = int( int(x)/1005.0*array.shape[1] +0.5)
210 y = int( int(y)/592.0*array.shape[0] +0.5)
211 if infected == 'True':
212 if len(parts) == 3:
213 color = (196,0,0)
214 else:
215 infection_type = parts[3]
216 if infection_type == '1':
217 color = (196, 196, 0)
218 elif infection_type == '3':
219 color = (196, 0, 0)
220 else:
221 raise Exception('Unknown infection type')
222 else:
223 color = (0,0,196)
224 for i in (-2,-1,0,1,2):
225 set(array,y-3,x+i,color)
226 set(array,y+3,x+i,color)
227 set(array,y+i,x-3,color)
228 set(array,y+i,x+3,color)
229
230
231 def gaussian_blur(array, amount):
232 """
233 Gaussian blur each channel of a color image.
234 """
235 output = numpy.empty(array.shape, array.dtype)
236 for i in xrange(array.shape[2]):
237 ndimage.gaussian_filter(array[:,:,i],amount, output=output[:,:,i])
238 return output
239
240
241 def status(word):
242 pass
243 #sys.stdout.write(word+' ')
244 #sys.stdout.flush()
245
246
247 def process(filename_in, filename_out, filename_coords, name):
248 # print 'File: ', filename_in
249
250 # Load image
251 i = Image.open(filename_in)
252
253 print 'Processing', filename_in, '->', filename_out
254
255 # Resize (mostly done for speed increase)
256 i = i.resize((Resize_width,Resize_height), Image.ANTIALIAS) if Resize else i
257
258 # Convert to numpy array
259 a = numpy.asarray(i).astype('float64') # [GB] Shape: (512, 680, 3)
260 height = a.shape[0]
261 width = a.shape[1]
262
263 a_gray = numpy.sum(a, 2) # [GB] Somme de chaque composante RGB (non-normalisé)
264
265 # Make a copy of the array to doodle various diagnostic markings on
266 a_output = a.copy()
267
268 # If there are hand-marked cells, show them in the output image
269 if Show_hand_marks:
270 mark_hand(a_output, filename_in)
271
272
273 # Denoise/unsharp mask
274 # [GB] (Concerne les trois channels RGB).
275 a_enhanced = gaussian_blur(a, Enhance_a) - Enhance_b * gaussian_blur(a, Enhance_c)
276
277
278 # Split into foreground and background using k-medians
279 status('fg/bg')
280
281 # [GB] Chaque canal de couleur est linéarisé dans un tableau.
282 a_raveled = numpy.reshape(a_enhanced,(width*height,a_enhanced.shape[2]))
283
284 # [GB] Moyenne de chaque canal.
285 average = numpy.average(a_raveled, axis=0)
286
287 # Initial guess of the medians
288 color_bg = average * 1.5 # [GB] Plus clair
289 color_fg = average * 0.5 # [GB] Plus sombre
290
291 # Fifty times: (0 to 4).
292 for i in xrange(5):
293 d_bg = a_raveled - color_bg[None,:]
294 d_bg *= d_bg
295 d_bg = numpy.sum(d_bg, 1) # [GB] Somme des carrés des couleurs
296 d_fg = a_raveled - color_fg[None,:]
297 d_fg *= d_fg
298 d_fg = numpy.sum(d_fg, 1)
299 fg = d_fg * Background_weight < d_bg
300 color_fg = numpy.median(a_raveled[fg,:],axis=0) # [GB] axis = 0 -> par colonne (ordonnées). Le résultat est alors 3 valeurs, une pour chaque couleur
301 color_bg = numpy.median(a_raveled[~fg,:],axis=0)
302
303 fg = numpy.reshape(fg, (height,width)) # [GB] Image binaire du premier plan.
304
305 edge = ndimage.maximum_filter(fg,size=3) != ndimage.minimum_filter(fg,size=3) # [GB] image binaire des contours du premier plan.
306
307 # [GB] délinéarisation des tableaux résultant du k-median (niveau de gris).
308 d_fg = numpy.sqrt(numpy.reshape(d_fg, (height,width)))
309 d_bg = numpy.sqrt(numpy.reshape(d_bg, (height,width)))
310
311 # Staining is taken as foreground pixels that differ markedly from the mean foreground color
312 # (also, they must be darker in the red and green channels)
313 # The background intensity is simply a useful basis for scaling
314 # (should be invariant under changes in brightness)
315 mag_bg = numpy.average(color_bg)
316
317 # [GB] Les "dark stain" :
318 # * sont d'une intensité plus élevée que la moyenne du bg multiplié par un facteur (Dark_stain_level)
319 # * les valeurs rouges sont plus petites que la mediane du premier-plan pour les rouges
320 # * les valeurs vertes sont plus petites que la médiane du premier-plan pour les verts
321 # * appartiennent au foreground
322 dark_stain = (
323 (d_fg > mag_bg*Dark_stain_level) &
324 (a_enhanced[:,:,0] < color_fg[0]) & # [GB] Red channel
325 (a_enhanced[:,:,1] < color_fg[1]) & # [GB] Green channel
326 fg
327 )
328
329 fg_float = fg.astype('float64')
330 green = a[:,:,1] * fg_float
331 green_local_bg = ndimage.gaussian_filter(green, Stain_bg_blur)
332 green_local_bg_divisor = ndimage.gaussian_filter(fg_float, Stain_bg_blur)
333 stain = (
334 (green * green_local_bg_divisor < green_local_bg * Stain_level) &
335 fg
336 )
337
338 # Detect infection as lumpiness in the green channel (green is most affected by staining)
339 # Must also be within a stained region
340 green_local_bg = ndimage.gaussian_filter(green, Infection_bg_blur)
341 green_local_bg_divisor = ndimage.gaussian_filter(fg_float, Infection_bg_blur)
342 infection = (
343 (green * green_local_bg_divisor < green_local_bg * Infection_level) &
344 stain
345 )
346
347 # Show edges and infection on output image
348 if Show_edges:
349 a_output[edge,:] += 64
350 if Show_stain:
351 a_output[stain,2] += 128
352 if Show_infection:
353 a_output[infection,0] += 128
354 if Show_dark_stain:
355 a_output[infection,0] += 128
356
357 #a_output[stain, 1] += 128
358
359
360 # Hough transform
361 status('hough')
362
363 x_edge = ndimage.convolve(a_gray,
364 [[-1,0,1],
365 [-2,0,2],
366 [-1,0,1]])
367 y_edge = ndimage.convolve(a_gray,
368 [[-1,-2,-1],
369 [0,0,0],
370 [1,2,1]])
371 mag = numpy.sqrt(x_edge*x_edge+y_edge*y_edge)
372 x_dir = x_edge / mag
373 y_dir = y_edge / mag
374
375 # Vectorized for enhanced performance and incomprehensibility:
376 ys = []
377 xs = []
378 for y in xrange(height):
379 for x in xrange(width):
380 if edge[y,x]:
381 ys.append(y)
382 xs.append(x)
383 ys = numpy.array(ys)
384 xs = numpy.array(xs)
385 indexes = numpy.arange(len(ys))
386
387 def do_hough(radius):
388 hough = numpy.zeros((height,width), 'float64')
389
390 oys = (ys + y_dir[ys,xs]*radius + 0.5).astype('int')
391 oxs = (xs + x_dir[ys,xs]*radius + 0.5).astype('int')
392 valid = (
393 (oys >= 0) &
394 (oys < height) &
395 (oxs >= 0) &
396 (oxs < width)
397 )
398 for i in indexes[valid]:
399 hough[oys[i],oxs[i]] += 1.
400
401 #for y in xrange(height):
402 # for x in xrange(width):
403 # if edge[y,x]:
404 # oy = y + y_dir[y,x]*radius
405 # ox = x + x_dir[y,x]*radius
406 # ioy = int(oy+0.5)
407 # iox = int(ox+0.5)
408 # if ioy>0 and iox>0 and ioy<height and iox<width:
409 # hough[ioy,iox] += 1.
410
411 # Blur the transformed image a little
412 # (more blurring allows greater deviation from circularity)
413 # (if you alter this you will also need to alter thresh, below)
414
415 hough = ndimage.gaussian_filter(hough, 1.5) # 1.0
416
417 return hough
418
419 hough = numpy.zeros((height,width),'float64')
420 hough_radius = numpy.zeros((height,width),'int')
421 for i in xrange(Hough_min_radius,Hough_max_radius+1):
422 #print 'Hough radius', i
423 this_hough = do_hough(i)
424 better = this_hough > hough
425 hough_radius[better] = i
426 hough[better] = this_hough[better]
427
428 # Show result of Hough transform in output image
429 #a_output[:,:,:] = hough[:,:,None]*20
430
431 # Find and classify peaks in Hough transform image
432 thresh = 0.8 #Hmm 1.5
433 candidates = [ ]
434 for y in xrange(height):
435 for x in xrange(width):
436 if hough[y, x] > thresh:
437 candidates.append((hough[y,x], y, x))
438 candidates.sort(reverse = True)
439
440
441 status('classify')
442
443 file_coords = open(filename_coords,'wt')
444 file_coords.write('Autocount on %dx%d image\n' % (width,height))
445
446 suppress = numpy.zeros((height,width), 'bool')
447 claim_strength = numpy.zeros((height,width), 'float64') + 1e30
448 cells = [] # [(y,x)]
449 n = 0
450 n_infected = 0
451 for _,y,x in candidates:
452 radius = hough_radius[y,x]
453
454 # Make sure candidate is not near a previously detected cell,
455 # and does not abut the edge of the image
456 if not suppress[y,x] and \
457 y >= radius and x >= radius and y < height-radius and x < width-radius:
458 coords = clipped_coordinates(0, radius * Cell_inside_radius, y, x, height, width)
459
460 #Only include foreground
461 this_fg = fg[coords[:,0], coords[:,1]] # [GB] Booleans
462 coords = coords[this_fg]
463
464 this_strength = ((coords[:,0]-y)**2 + (coords[:,1]-x)**2)
465 claim_strength[coords[:,0],coords[:,1]] = numpy.minimum(
466 claim_strength[coords[:,0],coords[:,1]],
467 this_strength
468 )
469
470 cells.append((y, x, coords, this_strength))
471
472
473 # Suppress detection of cells too close to here
474 # (note: we do this irrespective of whether we treated this location as a hit)
475 coords = clipped_coordinates(0,radius * Cell_suppression_radius, y,x,height,width)
476 suppress[coords[:,0],coords[:,1]] = True
477
478 #for oy, ox in offsets(0, radius*Cell_suppression_radius): #suppression_offsets:
479 # ny = y+oy
480 # nx = x+ox
481 # if ny >= 0 and nx >= 0 and ny < height and nx < width:
482 # suppress[ny,nx] = True
483
484 for y,x, coords, this_strength in cells:
485 radius = hough_radius[y,x]
486
487 if not len(coords): continue # No fg pixels within radius
488
489 winners = claim_strength[coords[:,0],coords[:,1]] >= this_strength
490 coords = coords[winners]
491
492 if not len(coords): continue # Did not win any foreground pixels
493
494 infected_pixels = 0
495 stain_pixels = 0
496 dark_stain_pixels = 0
497 fg_pixels = 0
498
499 stain_x = 0.0
500 stain_x2 = 0.0
501 stain_y = 0.0
502 stain_y2 = 0.0
503
504 for ny, nx in coords:
505 if infection[ny,nx]:
506 infected_pixels += 1
507 if stain[ny,nx]:
508 stain_pixels += 1
509
510 stain_x += nx
511 stain_x2 += nx*nx
512 stain_y += ny
513 stain_y2 += ny*ny
514 if dark_stain[ny,nx]:
515 dark_stain_pixels += 1
516 fg_pixels += 1
517
518 mass_y = numpy.average(coords[:,0])
519 mass_x = numpy.average(coords[:,1])
520 mass_offset = numpy.sqrt( (mass_y-y)**2 + (mass_x-x)**2 )
521
522 is_infected = (infected_pixels >= Infection_pixels_required and stain_pixels >= 1)
523
524 if is_infected:
525 stain_spread = (
526 stain_x2/stain_pixels - (stain_x/stain_pixels)**2 +
527 stain_y2/stain_pixels - (stain_y/stain_pixels)**2
528 )
529 is_infected = stain_spread >= Stain_spread_required
530
531 is_peculiar = (
532 (radius > Max_cell_radius) or
533 (radius < Min_cell_radius) or
534 (mass_offset > Max_offcenter * radius) or
535 (dark_stain_pixels >= fg_pixels*Max_dark_stain_proportion)
536 )
537
538 if is_peculiar:
539 color = (0,0,0)
540 file_coords.write('%d,%d,Rejected\n' % (x,y))
541 elif is_infected:
542 n += 1
543 n_infected += 1
544 color = (255,0,0)
545 file_coords.write('%d,%d,Infected\n' % (x,y))
546 else:
547 n += 1
548 color = (0,0,255)
549 file_coords.write('%d,%d,Uninfected\n' % (x,y))
550
551 if Show_cell_shapes:
552 a_output[coords[:,0],coords[:,1]] += [[ random.random()*64+32 for i in (0,1,2) ]]
553
554 # Mark detected cell on output image
555 if Show_computer_marks:
556 set(a_output, y, x, color)
557 for i in xrange(1,3):
558 set(a_output,y-i,x,color)
559 set(a_output,y+i,x,color)
560 set(a_output,y,x-i,color)
561 set(a_output,y,x+i,color)
562
563
564
565
566 file_coords.close()
567
568 # Save diagnostic output image
569 a_output = numpy.clip(a_output,0,255)
570 i2 = Image.fromarray(a_output.astype('uint8'))
571 i2.save(filename_out)
572
573 thresh_output = numpy.clip(fg, 0, 255)
574 i3 = Image.fromarray(thresh_output.astype('uint8'))
575 i3.save(filename_out + " - thresholded.tiff")
576
577
578
579 def read_ac(filename):
580 f = open(filename,'rU')
581 f.readline()
582 n_total = 0
583 n_infected = 0
584 for line in f:
585 parts = line.rstrip().split(',')
586 if parts[2] == 'Uninfected':
587 n_total += 1
588 elif parts[2] == 'Infected':
589 n_total += 1
590 n_infected += 1
591
592 return n_total, n_infected
593
594
595 if __name__ == '__main__':
596 args = sys.argv[1:]
597
598 while len(args) > 1:
599 name, value = args.pop(0).split('=')
600 value = float(value)
601 assert hasattr(sys.modules[__name__], name), 'Parameter named %s does not exist' % name
602 setattr(sys.modules[__name__], name, value)
603 print name, '=', value
604
605 if len(args) != 1:
606 print Help % sys.argv[0]
607 sys.exit(1)
608
609 if os.path.isfile(args[0]):
610 in_dir, filename = os.path.split(args[0])
611 filenames = [ filename ]
612 is_subprocess = True
613
614 else:
615 in_dir = args[0]
616 filenames = os.listdir(in_dir)
617 filenames.sort()
618 is_subprocess = False
619
620 out_dir = os.path.normpath(in_dir) + '_output'
621 if not os.path.exists(out_dir):
622 os.mkdir(out_dir)
623
624 if not is_subprocess:
625 for filename in filenames:
626 root, ext = os.path.splitext(filename)
627 ac_filename = os.path.join(out_dir, root+'.ac')
628 if os.path.exists(ac_filename): os.unlink(ac_filename)
629
630 coord_filenames = [ ]
631
632 n_running = 0
633
634 for filename in filenames:
635 root, ext = os.path.splitext(filename)
636 if ext.lower() not in ('.bmp','.tif','.tiff','.png'): continue
637
638 filename_in = os.path.join(in_dir, filename)
639 filename_out = os.path.join(out_dir, root+'.tif')
640 filename_coords = os.path.join(out_dir, root+'.ac')
641
642 if is_subprocess or N_processors == 1:
643 process(filename_in, filename_out, filename_coords, root)
644 else:
645 while n_running >= N_processors:
646 os.wait()
647 n_running -= 1
648 os.spawnl(os.P_NOWAIT, sys.executable, sys.executable, *(sys.argv[:-1]+[filename_in]))
649 n_running += 1
650
651 coord_filenames.append((filename_coords, root))
652
653 while n_running:
654 os.wait()
655 n_running -= 1
656
657 if not is_subprocess:
658 result_file = open(os.path.join(out_dir,'results.txt'),'wt')
659
660 for filename_coords, root in coord_filenames:
661 n_total, n_infected = read_ac(filename_coords)
662 print >> result_file, root, n_total, n_infected
663
664 result_file.close()
665
666