Logiciel de parasitémie lié au papier de Charles Ma.
[master-thesis.git] / Documentation / ma_autocount / plasmodium-autocount-0.1.py
1 #!/usr/bin/env python
2
3 # Copyright (C) 2010 Monash University
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
18
19 """
20
21 Identify red blood cells and red blood cells infected with plasmodium in a set of images.
22
23 """
24
25 from PIL import Image
26
27 import sys, numpy, os, random
28 from scipy import ndimage
29
30 Help = """
31
32 Usage:
33
34 python %s [parameter adjustments] <image_directory>
35
36 Images should be in TIFF, PNG, or BMP format.
37
38 Results will be placed in a new directory named <image_directory>_output.
39
40 If the images were annotated using Cell Counting Aid, the hand annotations
41 will be shown as circles in the output.
42
43 Parameter adjustments take the form
44
45 variable_name=value
46
47 See the top of the source code for parameters and their default values.
48 These have been tuned for our microscope setup, optimal parameters for
49 your setup may differ.
50
51 """
52
53 # How many subprocesses to run at once
54 N_processors = 1
55
56 # ===========
57 # Diagnostics
58 #
59 # What to show in diagnostic output images
60
61 Show_edges = False
62 Show_stain = False
63 Show_infection = False
64
65 Show_hand_marks = True
66 Show_computer_marks = True
67 Show_cell_shapes = False
68
69
70 # =================
71 # Image scaling
72
73 Resize_width = 680
74 Resize_height = 512
75
76
77 # ==========================================
78 # Image enhancement and fg/bg discrimination
79 #
80 # Blur slightly to reduce noise, and unsharp-mask
81 #
82 # gaussian_blur(image, a) - b * gaussian_blur(image, c)
83
84 Enhance_a = 1.0
85 Enhance_b = 0.75
86 Enhance_c = 20.0
87
88 # Larger = enlarge background area, smaller = enlarge foreground area
89 Background_weight = 0.915 #&median
90
91
92 # =============================
93 # Stain and infection detection
94
95 Dark_stain_level = 0.839
96
97 Stain_level = 0.9
98 Stain_bg_blur = 10.0
99 Stain_spread_required = 3.0
100
101 Infection_level = 0.762
102 Infection_bg_blur = 2.0
103 Infection_pixels_required = 1
104
105
106
107 # ====================
108 # Cell size parameters
109
110 # Size of cells, for Hough transform
111 Hough_min_radius = 5
112 Hough_max_radius = 20
113
114 # Portion of cell used for classification as infected/weird
115 # (may be abutting an infected cell, so be cautious)
116 # (as a proportion of the true radius)
117 Cell_inside_radius = 0.9
118
119 # Do not allow another cell center to be detected within this radius
120 Cell_suppression_radius = 1.25
121
122
123 # =======================
124 # Peculiar cell filtering
125
126 # White blood cells appear to be composed entirely of dark staining, filter them out
127 Max_dark_stain_proportion = 0.728
128
129 # Minimum/maximum radius (as determined by Hough transform)
130 Min_cell_radius = 9
131 Max_cell_radius = 20
132
133 # Proportion of radius that center of mass can be away from Hough determined center
134 Max_offcenter = 0.5
135
136
137
138
139
140
141 _offset_cache = { }
142 def offsets(inner_radius, outer_radius):
143 """
144 Generate a list of offsets for pixels within a circular region
145 """
146
147 key = (inner_radius,outer_radius)
148 if key not in _offset_cache:
149 result = [ ]
150 int_radius = int(outer_radius)+1
151 for y in xrange(-int_radius,int_radius+1):
152 for x in xrange(-int_radius,int_radius+1):
153 if inner_radius*inner_radius <= y*y+x*x <= outer_radius*outer_radius:
154 result.append((y,x))
155 _offset_cache[key] = numpy.array(result)
156 return _offset_cache[key]
157
158
159 def clipped_coordinates(inner_radius,outer_radius, y,x, height,width):
160 """
161 Coordinates contained within a ring or circle, clipped to rectangular bounds.
162 """
163
164 coords = offsets(inner_radius,outer_radius) + [[y,x]]
165 valid = (
166 (coords[:,0] >= 0) &
167 (coords[:,0] < height) &
168 (coords[:,1] >= 0) &
169 (coords[:,1] < width)
170 )
171 return coords[valid]
172
173
174 def set(array, y, x, value):
175 """
176 Set an element of an array, if it is in bounds, otherwise silently do nothing
177 """
178
179 if y >= 0 and x >= 0 and y < array.shape[0] and x < array.shape[1]:
180 array[y,x] = value
181
182
183 def mark_hand(array, filename_in):
184 """
185 If there is a file of hand marked cells from Cell Counting Aid, mark them.
186 """
187
188 hand_name = os.path.splitext(filename_in)[0] + '.txt'
189 if not os.path.exists(hand_name): return
190
191 f = open(hand_name,'rU')
192
193 #Skip header line
194 f.readline()
195
196 for line in f:
197 parts = line.rstrip('\n').split(',')
198 x,y,infected = parts[:3]
199
200 x = int( int(x)/1005.0*array.shape[1] +0.5)
201 y = int( int(y)/592.0*array.shape[0] +0.5)
202 if infected == 'True':
203 if len(parts) == 3:
204 color = (196,0,0)
205 else:
206 infection_type = parts[3]
207 if infection_type == '1':
208 color = (196, 196, 0)
209 elif infection_type == '3':
210 color = (196, 0, 0)
211 else:
212 raise Exception('Unknown infection type')
213 else:
214 color = (0,0,196)
215 for i in (-2,-1,0,1,2):
216 set(array,y-3,x+i,color)
217 set(array,y+3,x+i,color)
218 set(array,y+i,x-3,color)
219 set(array,y+i,x+3,color)
220
221
222 def gaussian_blur(array, amount):
223 """
224 Gaussian blur each channel of a color image.
225 """
226 output = numpy.empty(array.shape, array.dtype)
227 for i in xrange(array.shape[2]):
228 ndimage.gaussian_filter(array[:,:,i],amount,output=output[:,:,i])
229 return output
230
231
232 def status(word):
233 pass
234 #sys.stdout.write(word+' ')
235 #sys.stdout.flush()
236
237
238 def process(filename_in, filename_out, filename_coords, name):
239 # print 'File: ', filename_in
240
241 # Load image
242 i = Image.open(filename_in)
243
244 print 'Processing', filename_in, '->', filename_out
245
246 # Resize (mostly done for speed increase)
247 i = i.resize((Resize_width,Resize_height), Image.ANTIALIAS)
248
249 # Convert to numpy array
250 a = numpy.asarray(i).astype('float64')
251 height = a.shape[0]
252 width = a.shape[1]
253
254 a_gray = numpy.sum(a, 2)
255
256 # Make a copy of the array to doodle various diagnostic markings on
257 a_output = a.copy()
258
259 # If there are hand-marked cells, show them in the output image
260 if Show_hand_marks:
261 mark_hand(a_output, filename_in)
262
263
264 # Denoise/unsharp mask
265 a_enhanced = gaussian_blur(a, Enhance_a) - Enhance_b*gaussian_blur(a,Enhance_c)
266
267
268 # Split into foreground and background using k-medians
269 status('fg/bg')
270
271 a_raveled = numpy.reshape(a_enhanced,(width*height,a_enhanced.shape[2]))
272 average = numpy.average(a_raveled, axis=0)
273 # Initial guess
274 color_bg = average*1.5
275 color_fg = average*0.5
276
277 for i in xrange(5):
278 d_bg = a_raveled-color_bg[None,:]
279 d_bg *= d_bg
280 d_bg = numpy.sum(d_bg,1)
281 d_fg = a_raveled-color_fg[None,:]
282 d_fg *= d_fg
283 d_fg = numpy.sum(d_fg,1)
284 fg = d_fg*Background_weight < d_bg
285 color_fg = numpy.median(a_raveled[fg,:],axis=0)
286 color_bg = numpy.median(a_raveled[~fg,:],axis=0)
287
288 fg = numpy.reshape(fg, (height,width))
289
290 edge = ndimage.maximum_filter(fg,size=3) != ndimage.minimum_filter(fg,size=3)
291
292 d_fg = numpy.sqrt(numpy.reshape(d_fg, (height,width)))
293 d_bg = numpy.sqrt(numpy.reshape(d_bg, (height,width)))
294
295 # Staining is taken as foreground pixels that differ markedly from the mean foreground color
296 # (also, they must be darker in the red and green channels)
297 # The background intensity is simply a useful basis for scaling
298 # (should be invariant under changes in brightness)
299 mag_bg = numpy.average(color_bg)
300
301 dark_stain = (
302 (d_fg > mag_bg*Dark_stain_level) &
303 (a_enhanced[:,:,0] < color_fg[0]) &
304 (a_enhanced[:,:,1] < color_fg[1]) &
305 fg
306 )
307
308
309 fg_float = fg.astype('float64')
310 green = a[:,:,1] * fg_float
311 green_local_bg = ndimage.gaussian_filter(green, Stain_bg_blur)
312 green_local_bg_divisor = ndimage.gaussian_filter(fg_float, Stain_bg_blur)
313 stain = (
314 (green * green_local_bg_divisor < green_local_bg * Stain_level) &
315 fg
316 )
317
318 # Detect infection as lumpiness in the green channel (green is most affected by staining)
319 # Must also be within a stained region
320 green_local_bg = ndimage.gaussian_filter(green, Infection_bg_blur)
321 green_local_bg_divisor = ndimage.gaussian_filter(fg_float, Infection_bg_blur)
322 infection = (
323 (green * green_local_bg_divisor < green_local_bg * Infection_level) &
324 stain
325 )
326
327 # Show edges and infection on output image
328 if Show_edges:
329 a_output[edge,:] += 64
330 if Show_stain:
331 a_output[stain,2] += 128
332 if Show_infection:
333 a_output[infection,0] += 128
334
335
336 # Hough transform
337 status('hough')
338
339 x_edge = ndimage.convolve(a_gray,
340 [[-1,0,1],
341 [-2,0,2],
342 [-1,0,1]])
343 y_edge = ndimage.convolve(a_gray,
344 [[-1,-2,-1],
345 [0,0,0],
346 [1,2,1]])
347 mag = numpy.sqrt(x_edge*x_edge+y_edge*y_edge)
348 x_dir = x_edge / mag
349 y_dir = y_edge / mag
350
351 # Vectorized for enhanced performance and incomprehensibility:
352 ys = []
353 xs = []
354 for y in xrange(height):
355 for x in xrange(width):
356 if edge[y,x]:
357 ys.append(y)
358 xs.append(x)
359 ys = numpy.array(ys)
360 xs = numpy.array(xs)
361 indexes = numpy.arange(len(ys))
362
363 def do_hough(radius):
364 hough = numpy.zeros((height,width), 'float64')
365
366 oys = (ys + y_dir[ys,xs]*radius + 0.5).astype('int')
367 oxs = (xs + x_dir[ys,xs]*radius + 0.5).astype('int')
368 valid = (
369 (oys >= 0) &
370 (oys < height) &
371 (oxs >= 0) &
372 (oxs < width)
373 )
374 for i in indexes[valid]:
375 hough[oys[i],oxs[i]] += 1.
376
377 #for y in xrange(height):
378 # for x in xrange(width):
379 # if edge[y,x]:
380 # oy = y + y_dir[y,x]*radius
381 # ox = x + x_dir[y,x]*radius
382 # ioy = int(oy+0.5)
383 # iox = int(ox+0.5)
384 # if ioy>0 and iox>0 and ioy<height and iox<width:
385 # hough[ioy,iox] += 1.
386
387 # Blur the transformed image a little
388 # (more blurring allows greater deviation from circularity)
389 # (if you alter this you will also need to alter thresh, below)
390
391 hough = ndimage.gaussian_filter(hough, 1.0)
392
393 return hough
394
395 hough = numpy.zeros((height,width),'float64')
396 hough_radius = numpy.zeros((height,width),'int')
397 for i in xrange(Hough_min_radius,Hough_max_radius+1):
398 #print 'Hough radius', i
399 this_hough = do_hough(i)
400 better = this_hough > hough
401 hough_radius[better] = i
402 hough[better] = this_hough[better]
403
404 # Show result of Hough transform in output image
405 #a_output[:,:,:] = hough[:,:,None]*20
406
407 # Find and classify peaks in Hough transform image
408 thresh = 1.5 #Hmm
409 candidates = [ ]
410 for y in xrange(height):
411 for x in xrange(width):
412 if hough[y,x] > thresh:
413 candidates.append((hough[y,x],y,x))
414 candidates.sort(reverse=True)
415
416 status('classify')
417
418 file_coords = open(filename_coords,'wt')
419 file_coords.write('Autocount on %dx%d image\n' % (width,height))
420
421 suppress = numpy.zeros((height,width), 'bool')
422 claim_strength = numpy.zeros((height,width), 'float64') + 1e30
423 cells = [ ] # [(y,x)]
424 n = 0
425 n_infected = 0
426 n_late = 0
427 for _,y,x in candidates:
428 radius = hough_radius[y,x]
429
430 # Make sure candidate is not near a previously detected cell,
431 # and does not abut the edge of the image
432 if not suppress[y,x] and \
433 y >= radius and x >= radius and y < height-radius and x < width-radius:
434 coords = clipped_coordinates(0,radius*Cell_inside_radius, y,x,height,width)
435
436 #Only include foreground
437 this_fg = fg[coords[:,0],coords[:,1]]
438 coords = coords[this_fg]
439
440 this_strength = ((coords[:,0]-y)**2 + (coords[:,1]-x)**2)
441 claim_strength[coords[:,0],coords[:,1]] = numpy.minimum(
442 claim_strength[coords[:,0],coords[:,1]],
443 this_strength
444 )
445
446 cells.append((y,x, coords,this_strength))
447
448
449 # Suppress detection of cells too close to here
450 # (note: we do this irrespective of whether we treated this location as a hit)
451
452 coords = clipped_coordinates(0,radius*Cell_suppression_radius, y,x,height,width)
453 suppress[coords[:,0],coords[:,1]] = True
454
455 #for oy, ox in offsets(0, radius*Cell_suppression_radius): #suppression_offsets:
456 # ny = y+oy
457 # nx = x+ox
458 # if ny >= 0 and nx >= 0 and ny < height and nx < width:
459 # suppress[ny,nx] = True
460
461 for y,x, coords, this_strength in cells:
462 radius = hough_radius[y,x]
463
464 if not len(coords): continue # No fg pixels within radius
465
466 winners = claim_strength[coords[:,0],coords[:,1]] >= this_strength
467 coords = coords[winners]
468
469 if not len(coords): continue # Did not win any foreground pixels
470
471 infected_pixels = 0
472 stain_pixels = 0
473 dark_stain_pixels = 0
474 fg_pixels = 0
475
476 stain_x = 0.0
477 stain_x2 = 0.0
478 stain_y = 0.0
479 stain_y2 = 0.0
480
481 for ny, nx in coords:
482 if infection[ny,nx]:
483 infected_pixels += 1
484 if stain[ny,nx]:
485 stain_pixels += 1
486
487 stain_x += nx
488 stain_x2 += nx*nx
489 stain_y += ny
490 stain_y2 += ny*ny
491 if dark_stain[ny,nx]:
492 dark_stain_pixels += 1
493 fg_pixels += 1
494
495 mass_y = numpy.average(coords[:,0])
496 mass_x = numpy.average(coords[:,1])
497 mass_offset = numpy.sqrt( (mass_y-y)**2 + (mass_x-x)**2 )
498
499 is_infected = (infected_pixels >= Infection_pixels_required and stain_pixels >= 1)
500
501 if is_infected:
502 stain_spread = (
503 stain_x2/stain_pixels - (stain_x/stain_pixels)**2 +
504 stain_y2/stain_pixels - (stain_y/stain_pixels)**2
505 )
506 is_infected = stain_spread >= Stain_spread_required
507
508 is_peculiar = (
509 (radius > Max_cell_radius) or
510 (radius < Min_cell_radius) or
511 (mass_offset > Max_offcenter * radius) or
512 (dark_stain_pixels >= fg_pixels*Max_dark_stain_proportion)
513 )
514
515 if is_peculiar:
516 color = (0,0,0)
517 file_coords.write('%d,%d,Rejected\n' % (x,y))
518 elif is_infected:
519 n += 1
520 n_infected += 1
521 color = (255,0,0)
522 file_coords.write('%d,%d,Infected\n' % (x,y))
523 else:
524 n += 1
525 color = (0,0,255)
526 file_coords.write('%d,%d,Uninfected\n' % (x,y))
527
528 # Mark detected cell on output image
529 if Show_computer_marks:
530 set(a_output,y,x,color)
531 for i in xrange(1,3):
532 set(a_output,y-i,x,color)
533 set(a_output,y+i,x,color)
534 set(a_output,y,x-i,color)
535 set(a_output,y,x+i,color)
536
537 if Show_cell_shapes:
538 a_output[coords[:,0],coords[:,1]] += [[ random.random()*64+32 for i in (0,1,2) ]]
539
540
541
542 file_coords.close()
543
544 # Save diagnostic output image
545 a_output = numpy.clip(a_output,0,255)
546 i2 = Image.fromarray(a_output.astype('uint8'))
547 i2.save(filename_out)
548
549
550
551 def read_ac(filename):
552 f = open(filename,'rU')
553 f.readline()
554 n_total = 0
555 n_infected = 0
556 for line in f:
557 parts = line.rstrip().split(',')
558 if parts[2] == 'Uninfected':
559 n_total += 1
560 elif parts[2] == 'Infected':
561 n_total += 1
562 n_infected += 1
563
564 return n_total, n_infected
565
566
567 if __name__ == '__main__':
568 args = sys.argv[1:]
569
570 while len(args) > 1:
571 name, value = args.pop(0).split('=')
572 value = float(value)
573 assert hasattr(sys.modules[__name__], name), 'Parameter named %s does not exist' % name
574 setattr(sys.modules[__name__], name, value)
575 print name, '=', value
576
577 if len(args) != 1:
578 print Help % sys.argv[0]
579 sys.exit(1)
580
581 if os.path.isfile(args[0]):
582 in_dir, filename = os.path.split(args[0])
583 filenames = [ filename ]
584 is_subprocess = True
585
586 else:
587 in_dir = args[0]
588 filenames = os.listdir(in_dir)
589 filenames.sort()
590 is_subprocess = False
591
592 out_dir = os.path.normpath(in_dir) + '_output'
593 if not os.path.exists(out_dir):
594 os.mkdir(out_dir)
595
596 if not is_subprocess:
597 for filename in filenames:
598 root, ext = os.path.splitext(filename)
599 ac_filename = os.path.join(out_dir, root+'.ac')
600 if os.path.exists(ac_filename): os.unlink(ac_filename)
601
602 coord_filenames = [ ]
603
604 n_running = 0
605
606 for filename in filenames:
607 root, ext = os.path.splitext(filename)
608 if ext.lower() not in ('.bmp','.tif','.tiff','.png'): continue
609
610 filename_in = os.path.join(in_dir, filename)
611 filename_out = os.path.join(out_dir, root+'.tif')
612 filename_coords = os.path.join(out_dir, root+'.ac')
613
614 if is_subprocess or N_processors == 1:
615 process(filename_in, filename_out, filename_coords, root)
616 else:
617 while n_running >= N_processors:
618 os.wait()
619 n_running -= 1
620 os.spawnl(os.P_NOWAIT, sys.executable, sys.executable, *(sys.argv[:-1]+[filename_in]))
621 n_running += 1
622
623 coord_filenames.append((filename_coords, root))
624
625 while n_running:
626 os.wait()
627 n_running -= 1
628
629 if not is_subprocess:
630 result_file = open(os.path.join(out_dir,'results.txt'),'wt')
631
632 for filename_coords, root in coord_filenames:
633 n_total, n_infected = read_ac(filename_coords)
634 print >> result_file, root, n_total, n_infected
635
636 result_file.close()
637
638