%% Microarray Spot Finding Example
% This example shows a simple method for locating spots on a microarray and
% extracting the intensties of the spots. It can be downloaded from *MATLAB
% Central*.
% http://www.mathworks.com/matlabcentral
%
% Copyright 2004-2010 RBemis The MathWorks, Inc. 

%% Start with clean slate
clear           %empty workspace (no variables)
close all       %no figures
clc             %empty command window

%% Read image file
% MATLAB can read many standard image formats including TIFF, GIF and BMP
% using the |imread| command. In addition, the *Image Procesing Toolbox*
% provides support for working with specialized image file formats such as
% DICOM. This microarray image was stored as a J-PEG file. The image is
% much larger than the screen size, so |imshow| scales it down to fit and
% let's you know with a warning message.
% x = imread('MicroArraySlide.JPG');
% imageSize = size(x)
% screenSize = get(0,'ScreenSize')
% 
% iptsetpref('ImshowBorder','tight')
% imshow(x)
% title('original image')

%% Crop specified region
% Next we use |imcrop| to extract a region of interest. You can repeat this
% for all print-tip blocks for a full microarray study.
% y = imcrop(x,[622 2467 220 227]);
y=imread('test.tif');
f1 = figure('position',[40 46 285 280]);
imshow(y)

%% Display red & green layers
% This image was stored in RGB format.  We are only interested in the red
% and green planes. To extract the red plane, simply index layer 1. For the
% green plane, layer 2. Custom colormaps make visualization more intuitive.
% Notice that spot shapes are not necessarily the same in both colors.
f2 = figure('position',[265 163 647 327]);
subplot(121)
redMap = gray(256);
redMap(:,[2 3]) = 0;
subimage(y(:,:,1),redMap)
axis off
title('red (layer 1)')
subplot(122)
greenMap = gray(256);
greenMap(:,[1 3]) = 0;
subimage(y(:,:,2),greenMap)
axis off
title('green (layer 2)')

%% Convert RGB image to grayscale for spot finding
% Initially we care more about where the spots are located than their red
% and green intensities. Converting from RGB color to grayscale allows us
% to focus first on spot locations.
z = rgb2gray(y);
figure(f1)
imshow(z)

%% Create horizontal profile
% We are looking for a regular grid of spots so we start by looking at the
% mean intensity for each column of the image. This will help us identify
% where the centres of the spots are and where the gaps between the spots
% can be found.
xProfile = mean(z);
f2 = figure('position',[39 346 284 73]);
plot(xProfile)
title('horizontal profile')
axis tight

%% Estimate spot spacing by autocorrelation
% Ideally the spots would be periodicaly spaced consistently printed, but
% in practice they tend to have different sizes and intensities, so the
% horizontal profile is irregular. We can use autocorrelation to enhance
% the self similarity of the profile. The smooth result promotes peak
% finding and estimation of spot spacing. The *Signal Processing Toolbox*
% allows easy computation of the autocorrelation function using the |xcov|
% command.  

ac = xcov(xProfile);                        %unbiased autocorrelation
f3 = figure('position',[-3 427 569 94]);
plot(ac)
s1 = diff(ac([1 1:end]));                   %left slopes
s2 = diff(ac([1:end end]));                 %right slopes
maxima = find(s1>0 & s2<0);                 %peaks
estPeriod = round(median(diff(maxima)))     %nominal spacing
hold on
plot(maxima,ac(maxima),'r^')
hold off
title('autocorrelation of profile')
axis tight

%% Remove background morphologically
% We can use the spacing estimate to help design a filter to remove the
% background noise from the intensity profile. We do this with the
% |imtophat| function from the *Image Processing Toolbox*.  The |strel|
% command creates a simple rectangular 1D window or line shaped structuring
% element.
seLine = strel('line',estPeriod,0);
xProfile2 = imtophat(xProfile,seLine);
f4 = figure('position',[40 443 285 76]);
plot(xProfile2)
title('enhanced horizontal profile')
axis tight

%% Segment peaks
% Now that we have clean and anchored gaps between the peaks, we can number
% each peak region with the |bwlabel| command. These regions were segmented
% by thresholding with |im2bw|. The threshold value was automatically
% determined by statistical properties of the data using |graythresh|. This
% is a good example of image processing techniques are often useful for 1D
% data analysis.
level = graythresh(xProfile2/255)*255
bw = im2bw(xProfile2/255,level/255); 
L = bwlabel(bw);  
f5 = figure('position',[40 540 285 70]);
plot(L)
axis tight
title('labelled regions')

%% Locate centers
% We can extract the centroids of the peaks. These correspond to the
% horizontal centres of the spots. This is a common blob analysis or
% feature extraction task that can be done with |regionprops|.
stats = regionprops(L);
centroids = [stats.Centroid];
xCenters = centroids(1:2:end)
figure(f5)
hold on
plot(xCenters,1:max(L),'ro')
hold off
title('region centers')

%% Determine divisions between spots
% The midpoints between adjacent peaks provides grid point locations.
gap = diff(xCenters)/2;
first = xCenters(1)-gap(1);
xGrid = round([first xCenters(1:end)+gap([1:end end])])
figure(f2)
for i=1:length(xGrid)
  line(xGrid(i)*[1 1],ylim,'color','m')
end
title('vertical separators')

%% Transpose and repeat
% We just did the analysis on the vertical grid. Now we want to do the same
% for the horizontal spacing. To do this, we simply transpose the image and
% repeat all the steps used above. This time without intermediate graphics
% display commands in order to summarize the mathematical steps of this
% algorithm.

yProfile = mean(z');                        %peak profile
ac = xcov(yProfile);                        %cross correlation
p1 = diff(ac([1 1:end]));
p2 = diff(ac([1:end end]));
maxima = find(p1>0 & p2<0);                 %peak locations
estPeriod = round(median(diff(maxima)))     %spacing estimate
seLine = strel('line',estPeriod,0);
yProfile2 = imtophat(yProfile,seLine);      %background removed
level = graythresh(yProfile2/255);          %automatic threshold level
bw = im2bw(yProfile2/255,level);            %binarized peak regions
L = bwlabel(bw);                            %labeled regions
stats = regionprops(L);
centroids = [stats.Centroid];               %centroids
yCenters = centroids(1:2:end)               %Y parts only
gap = diff(yCenters)/2;                     %inner region half widths
first = yCenters(1)-gap(1);

% list defining vertical boundaries between spot regions
yGrid = round([first yCenters(1:end)+gap([1:end end])])

%% Put bounding boxes around each spot
% We have now found the rectangular grid. Using pairs of neighboring grid
% points we can form bounding box regions to address each spot
% individually. The position and size coordinates of each bounding box were
% tabulated for convenience into a 4-column matrix called |ROI|, which
% stands for regions of interest.
% figure(f1)
figure()
imshow(z)
line(xGrid'*[1 1],yGrid([1 end]),'color','b')
line(xGrid([1 end]),yGrid'*[1 1],'color','b')
[X,Y] = meshgrid(xGrid(1:end-1),yGrid(1:end-1));    %xGrid和yGrid中存储了分格子的数据
[dX,dY] = meshgrid(diff(xGrid),diff(yGrid));
ROI = [X(:) Y(:) dX(:) dY(:)];
% first few rows of ROI table
ROI(1:5,:)

%% Segment spots from background by thresholding
% Applying a single threshold level to the whole image so all spots are
% detected equally is generally a good idea. However, in this case is
% doesn't work so well due to large differences in spot brightness.
fSpots = figure('position',[265 163 647 327]);
subplot(121)
imshow(z)
title('gray image')
subplot(122)
bw = im2bw(z,graythresh(z));
imshow(bw)
title('global threshold')

%% Apply logarithmic transformation then threshold intensities
% One way to equalize large variations in magnitude is by transforming
% intensity values to logarithmic space. This works much better but some
% weak spots are still missed.
figure(fSpots)
subplot(121)
z2 = uint8(log(double(z)+1)/log(255)*255);
imshow(z2)
title('log intensity')
subplot(122)
bw = im2bw(z2,graythresh(z2));
imshow(bw)
title('global threshold')

%% Try local thresholding instead
% Alternatively, the bounding boxes can be used to determine local
% threshold values for each spot. The code is a little more sophisticated,
% requiring looping and indexing. Unfortunately, the results are mixed.
% Weak spots showed up well but spots with bright perimeters were as bad as
% the original global threshold before log space transformation.
figure(fSpots)
subplot(122)
bw = false(size(z));
for i=1:length(ROI)
  rows = round(ROI(i,2))+[0:(round(ROI(i,4))-1)];
  cols = round(ROI(i,1))+[0:(round(ROI(i,3))-1)];
  spot = z(rows,cols);
  bw(rows,cols) = im2bw(spot,graythresh(spot));
end
imshow(bw)
title('local threshold')

%% Logically combine local and global thresholds
% Since both have their merits, let's combine the best of both approaches.
% This can be done using logial operation on the binary masks. These spot
% segmentation results are indeed much better.
figure(fSpots)
subplot(121)
bw = im2bw(z2,graythresh(z2));
for i=1:length(ROI)
  rows = round(ROI(i,2))+[0:(round(ROI(i,4))-1)];
  cols = round(ROI(i,1))+[0:(round(ROI(i,3))-1)];
  spot = z(rows,cols);
  bw(rows,cols) = bw(rows,cols) | im2bw(spot,graythresh(spot));
end
imshow(bw)
title('combined threshold')
subplot(122)
imshow(z)
title('linear intensity')

%% Fill holes to solidify spots
% The silhouettes of some spots still contained pinholes. The whole image
% could be filled using a single call to |imfill| but this may not be a
% good idea. Notice that some spots run together. If four mutually adjacent
% spots (sharing a common corner) were all joined at their edges then a
% single function call would incorrectly fill in the common corner as well.
% To avoid that possibility, it's good insurance to fill each spot one
% bounding box region at a time by looping. Indeed, the spot segmentation
% now looks quite good.
figure(fSpots)
subplot(121)
warning off MATLAB:intConvertOverflow
for i=1:length(ROI)
  rows = round(ROI(i,2))+[0:(round(ROI(i,4))-1)];
  cols = round(ROI(i,1))+[0:(round(ROI(i,3))-1)];
  bw(rows,cols) = imfill(bw(rows,cols),'holes');
end
imshow(bw)           %最终的分割结果bw
title('filled pinholes')

%% Label spot masks by bounding box
% If the gridding went well, all spots should be a single color. The
% results here are pretty good. There is still room for improvement.
%
% TODO List:
%
% * Due to slightly irregular spacing, for some spots a few pixels were
% mislabeled. With additional processing, the algorithm could be extended
% to reclassify these stray pixels. 
%
% * The crescent shaped spot in row 8, column 4 could be completed to be
% more circular by using the 'ConvexImage' return value from |regionprops|.
%
% * The few stray pixels that are not attached to any spots could be
% removed as well.
%
% However, in this case the spot segmentation is good enough to proceed.
L = zeros(size(bw));
for i=1:length(ROI)
  rows = ROI(i,2)+[0:(ROI(i,4)-1)];
  cols = ROI(i,1)+[0:(ROI(i,3)-1)];
  rectMask = L(rows,cols);
  spotMask = bw(rows,cols);
  rectMask(spotMask) = i;
  L(rows,cols) = rectMask;
end
map = [0 0 0; 0.5+0.5*rand(length(ROI),3)];
figure(f1)
imshow(L+1,map)

%% Extract first spot for measurement
% We will now examine the first spot closely to see how we can measure its
% red and green intensities, and ultimately quantify its gene expression
% value. The measurement technique can then be repeated for all spots.
rect = ROI(1,:);                            %[X Y dX dY]
spot = imcrop(y,rect);                      %region around spot
figure(f1)
imshow(spot,'notruesize')

%% Measure spot intensity & releative expression level
% We now simply calculate the nominal intensity over the spot for both the
% red and green layers. A measure of gene expression level can then be
% calculated from the two color intensities. Here a simple log-ratio
% measurement is shown. Other more robust measures could be used instead.
% You could also perform some analysis of the quality of the spot.
mask = imcrop(L,rect)==1;
for i=1:2
  layer = spot(:,:,i);
  intensity(i) = double(median(layer(mask)));
end
intensity
expressionLevel = log(intensity(1)/intensity(2))

%% Remove background, calculate again and compare measurements
% If you noticed, the background intensity around the spot was not zero.
% This could bias results. To see how much difference it makes, we can
% perform background subtraction around all spots, again using |imtophat|
% but this time in 2D on the image using a disk shaped structuring element.
% Then we can calculate color intensities and relative expression level
% again to see what effect background bias had on the measurement. In this
% case the measurement shows more downregulation with background removed.
seDisk = strel('disk',round(estPeriod));
spot2 = imtophat(spot,seDisk);
for i=1:2
  layer = spot2(:,:,i);
  intensity(i) = double(median(layer(mask)));
end
intensity
expressionLevel = log(intensity(1)/intensity(2))

%% Set up graphical display for results
% It is helpful to see red and green intensity values overlayed onto the
% respective color images to gain confidence that measured intensities make
% sense. It is also be helpful to overlay quantitative expression levels
% onto the original image to provide additional visual assurance of
% measurement results. The rectangular grid also helps correlate measured
% values between images. The flexibility of MATLAB's powerful *Handle
% Graphics* engine allow custom graphics like this to be set up quickly and
% easily.
f7 = figure('position',[52 94 954 425]);
ax(1) = subplot(121);
subimage(y(:,:,1),redMap)
title('red intensity')
ax(2) = subplot(122);
subimage(y(:,:,2),greenMap)
title('green intensity')
f8 = figure('position',[316 34 482 497]);
ax(3) = get(imshow(y,'notruesize'),'parent');
title('gene expression')
for i=1:3
  axes(ax(i))
  axis off
  line(xGrid'*[1 1],yGrid([1 end]),'color',0.5*[1 1 1])
  line(xGrid([1 end]),yGrid'*[1 1],'color',0.5*[1 1 1])
end

%% Repeat measurement for all spots
% We now repeat the spot extraction and intensity calculation for all the
% spots in the grid. Here the measured values were tabulated as additional
% columns beside the ROI positions for each spot into a new matrix called
% |spotData|.
figure(f7), figure(f8)
spotData = [ROI zeros(length(ROI),5)];
for i=1:length(ROI)
  spot = imcrop(y,ROI(i,:));                            %raw image
  spot2 = imtophat(spot,seDisk);                        %background removed
  mask = imcrop(L,ROI(i,:))==i;                         %spot mask
  for j=1:2
    layer = spot2(:,:,j);                               %color layer
    intensity(j) = double(median(layer(mask)));
    text(ROI(i,1)+ROI(i,3)/2,ROI(i,2)+ROI(i,4)/2,sprintf('%.0f',intensity(j)),...
      'color','y','HorizontalAlignment','center','parent',ax(j))
    rawLayer = spot(:,:,j);
    rawIntensity(j) = double(median(layer(mask)));
  end
  expression = log(intensity(1)/intensity(2));
  text(ROI(i,1)+ROI(i,3)/2,ROI(i,2)+ROI(i,4)/2,sprintf('%.2f',expression),...
    'color','w','HorizontalAlignment','center','parent',ax(3))
  drawnow
  spotData(i,5:9) = [intensity(:)' expression rawIntensity(:)'];
end

%% Export spot data to Excel spreadsheet
% MATLAB can write to many standard formats. We will use |xlswrite| to save
% the |spotData| to an Excel workbook.
%
% TODO list: 
%
% * prepend column names first (see |xlswrite| doc example using cell arrays)
%
% * programmatically open spreadsheet in Excel (see |winopen| doc)
xlswrite('microarray.xls',spotData)