%% Microarray Spot Finding Example % This example shows a simple method for locating spots on a microarray and % extracting the intensties of the spots. It can be downloaded from *MATLAB % Central*. % http://www.mathworks.com/matlabcentral % % Copyright 2004-2010 RBemis The MathWorks, Inc. %% Start with clean slate clear %empty workspace (no variables) close all %no figures clc %empty command window %% Read image file % MATLAB can read many standard image formats including TIFF, GIF and BMP % using the |imread| command. In addition, the *Image Procesing Toolbox* % provides support for working with specialized image file formats such as % DICOM. This microarray image was stored as a J-PEG file. The image is % much larger than the screen size, so |imshow| scales it down to fit and % let's you know with a warning message. % x = imread('MicroArraySlide.JPG'); % imageSize = size(x) % screenSize = get(0,'ScreenSize') % % iptsetpref('ImshowBorder','tight') % imshow(x) % title('original image') %% Crop specified region % Next we use |imcrop| to extract a region of interest. You can repeat this % for all print-tip blocks for a full microarray study. % y = imcrop(x,[622 2467 220 227]); y=imread('test.tif'); f1 = figure('position',[40 46 285 280]); imshow(y) %% Display red & green layers % This image was stored in RGB format. We are only interested in the red % and green planes. To extract the red plane, simply index layer 1. For the % green plane, layer 2. Custom colormaps make visualization more intuitive. % Notice that spot shapes are not necessarily the same in both colors. f2 = figure('position',[265 163 647 327]); subplot(121) redMap = gray(256); redMap(:,[2 3]) = 0; subimage(y(:,:,1),redMap) axis off title('red (layer 1)') subplot(122) greenMap = gray(256); greenMap(:,[1 3]) = 0; subimage(y(:,:,2),greenMap) axis off title('green (layer 2)') %% Convert RGB image to grayscale for spot finding % Initially we care more about where the spots are located than their red % and green intensities. Converting from RGB color to grayscale allows us % to focus first on spot locations. z = rgb2gray(y); figure(f1) imshow(z) %% Create horizontal profile % We are looking for a regular grid of spots so we start by looking at the % mean intensity for each column of the image. This will help us identify % where the centres of the spots are and where the gaps between the spots % can be found. xProfile = mean(z); f2 = figure('position',[39 346 284 73]); plot(xProfile) title('horizontal profile') axis tight %% Estimate spot spacing by autocorrelation % Ideally the spots would be periodicaly spaced consistently printed, but % in practice they tend to have different sizes and intensities, so the % horizontal profile is irregular. We can use autocorrelation to enhance % the self similarity of the profile. The smooth result promotes peak % finding and estimation of spot spacing. The *Signal Processing Toolbox* % allows easy computation of the autocorrelation function using the |xcov| % command. ac = xcov(xProfile); %unbiased autocorrelation f3 = figure('position',[-3 427 569 94]); plot(ac) s1 = diff(ac([1 1:end])); %left slopes s2 = diff(ac([1:end end])); %right slopes maxima = find(s1>0 & s2<0); %peaks estPeriod = round(median(diff(maxima))) %nominal spacing hold on plot(maxima,ac(maxima),'r^') hold off title('autocorrelation of profile') axis tight %% Remove background morphologically % We can use the spacing estimate to help design a filter to remove the % background noise from the intensity profile. We do this with the % |imtophat| function from the *Image Processing Toolbox*. The |strel| % command creates a simple rectangular 1D window or line shaped structuring % element. seLine = strel('line',estPeriod,0); xProfile2 = imtophat(xProfile,seLine); f4 = figure('position',[40 443 285 76]); plot(xProfile2) title('enhanced horizontal profile') axis tight %% Segment peaks % Now that we have clean and anchored gaps between the peaks, we can number % each peak region with the |bwlabel| command. These regions were segmented % by thresholding with |im2bw|. The threshold value was automatically % determined by statistical properties of the data using |graythresh|. This % is a good example of image processing techniques are often useful for 1D % data analysis. level = graythresh(xProfile2/255)*255 bw = im2bw(xProfile2/255,level/255); L = bwlabel(bw); f5 = figure('position',[40 540 285 70]); plot(L) axis tight title('labelled regions') %% Locate centers % We can extract the centroids of the peaks. These correspond to the % horizontal centres of the spots. This is a common blob analysis or % feature extraction task that can be done with |regionprops|. stats = regionprops(L); centroids = [stats.Centroid]; xCenters = centroids(1:2:end) figure(f5) hold on plot(xCenters,1:max(L),'ro') hold off title('region centers') %% Determine divisions between spots % The midpoints between adjacent peaks provides grid point locations. gap = diff(xCenters)/2; first = xCenters(1)-gap(1); xGrid = round([first xCenters(1:end)+gap([1:end end])]) figure(f2) for i=1:length(xGrid) line(xGrid(i)*[1 1],ylim,'color','m') end title('vertical separators') %% Transpose and repeat % We just did the analysis on the vertical grid. Now we want to do the same % for the horizontal spacing. To do this, we simply transpose the image and % repeat all the steps used above. This time without intermediate graphics % display commands in order to summarize the mathematical steps of this % algorithm. yProfile = mean(z'); %peak profile ac = xcov(yProfile); %cross correlation p1 = diff(ac([1 1:end])); p2 = diff(ac([1:end end])); maxima = find(p1>0 & p2<0); %peak locations estPeriod = round(median(diff(maxima))) %spacing estimate seLine = strel('line',estPeriod,0); yProfile2 = imtophat(yProfile,seLine); %background removed level = graythresh(yProfile2/255); %automatic threshold level bw = im2bw(yProfile2/255,level); %binarized peak regions L = bwlabel(bw); %labeled regions stats = regionprops(L); centroids = [stats.Centroid]; %centroids yCenters = centroids(1:2:end) %Y parts only gap = diff(yCenters)/2; %inner region half widths first = yCenters(1)-gap(1); % list defining vertical boundaries between spot regions yGrid = round([first yCenters(1:end)+gap([1:end end])]) %% Put bounding boxes around each spot % We have now found the rectangular grid. Using pairs of neighboring grid % points we can form bounding box regions to address each spot % individually. The position and size coordinates of each bounding box were % tabulated for convenience into a 4-column matrix called |ROI|, which % stands for regions of interest. % figure(f1) figure() imshow(z) line(xGrid'*[1 1],yGrid([1 end]),'color','b') line(xGrid([1 end]),yGrid'*[1 1],'color','b') [X,Y] = meshgrid(xGrid(1:end-1),yGrid(1:end-1)); %xGrid和yGrid中存储了分格子的数据 [dX,dY] = meshgrid(diff(xGrid),diff(yGrid)); ROI = [X(:) Y(:) dX(:) dY(:)]; % first few rows of ROI table ROI(1:5,:) %% Segment spots from background by thresholding % Applying a single threshold level to the whole image so all spots are % detected equally is generally a good idea. However, in this case is % doesn't work so well due to large differences in spot brightness. fSpots = figure('position',[265 163 647 327]); subplot(121) imshow(z) title('gray image') subplot(122) bw = im2bw(z,graythresh(z)); imshow(bw) title('global threshold') %% Apply logarithmic transformation then threshold intensities % One way to equalize large variations in magnitude is by transforming % intensity values to logarithmic space. This works much better but some % weak spots are still missed. figure(fSpots) subplot(121) z2 = uint8(log(double(z)+1)/log(255)*255); imshow(z2) title('log intensity') subplot(122) bw = im2bw(z2,graythresh(z2)); imshow(bw) title('global threshold') %% Try local thresholding instead % Alternatively, the bounding boxes can be used to determine local % threshold values for each spot. The code is a little more sophisticated, % requiring looping and indexing. Unfortunately, the results are mixed. % Weak spots showed up well but spots with bright perimeters were as bad as % the original global threshold before log space transformation. figure(fSpots) subplot(122) bw = false(size(z)); for i=1:length(ROI) rows = round(ROI(i,2))+[0:(round(ROI(i,4))-1)]; cols = round(ROI(i,1))+[0:(round(ROI(i,3))-1)]; spot = z(rows,cols); bw(rows,cols) = im2bw(spot,graythresh(spot)); end imshow(bw) title('local threshold') %% Logically combine local and global thresholds % Since both have their merits, let's combine the best of both approaches. % This can be done using logial operation on the binary masks. These spot % segmentation results are indeed much better. figure(fSpots) subplot(121) bw = im2bw(z2,graythresh(z2)); for i=1:length(ROI) rows = round(ROI(i,2))+[0:(round(ROI(i,4))-1)]; cols = round(ROI(i,1))+[0:(round(ROI(i,3))-1)]; spot = z(rows,cols); bw(rows,cols) = bw(rows,cols) | im2bw(spot,graythresh(spot)); end imshow(bw) title('combined threshold') subplot(122) imshow(z) title('linear intensity') %% Fill holes to solidify spots % The silhouettes of some spots still contained pinholes. The whole image % could be filled using a single call to |imfill| but this may not be a % good idea. Notice that some spots run together. If four mutually adjacent % spots (sharing a common corner) were all joined at their edges then a % single function call would incorrectly fill in the common corner as well. % To avoid that possibility, it's good insurance to fill each spot one % bounding box region at a time by looping. Indeed, the spot segmentation % now looks quite good. figure(fSpots) subplot(121) warning off MATLAB:intConvertOverflow for i=1:length(ROI) rows = round(ROI(i,2))+[0:(round(ROI(i,4))-1)]; cols = round(ROI(i,1))+[0:(round(ROI(i,3))-1)]; bw(rows,cols) = imfill(bw(rows,cols),'holes'); end imshow(bw) %最终的分割结果bw title('filled pinholes') %% Label spot masks by bounding box % If the gridding went well, all spots should be a single color. The % results here are pretty good. There is still room for improvement. % % TODO List: % % * Due to slightly irregular spacing, for some spots a few pixels were % mislabeled. With additional processing, the algorithm could be extended % to reclassify these stray pixels. % % * The crescent shaped spot in row 8, column 4 could be completed to be % more circular by using the 'ConvexImage' return value from |regionprops|. % % * The few stray pixels that are not attached to any spots could be % removed as well. % % However, in this case the spot segmentation is good enough to proceed. L = zeros(size(bw)); for i=1:length(ROI) rows = ROI(i,2)+[0:(ROI(i,4)-1)]; cols = ROI(i,1)+[0:(ROI(i,3)-1)]; rectMask = L(rows,cols); spotMask = bw(rows,cols); rectMask(spotMask) = i; L(rows,cols) = rectMask; end map = [0 0 0; 0.5+0.5*rand(length(ROI),3)]; figure(f1) imshow(L+1,map) %% Extract first spot for measurement % We will now examine the first spot closely to see how we can measure its % red and green intensities, and ultimately quantify its gene expression % value. The measurement technique can then be repeated for all spots. rect = ROI(1,:); %[X Y dX dY] spot = imcrop(y,rect); %region around spot figure(f1) imshow(spot,'notruesize') %% Measure spot intensity & releative expression level % We now simply calculate the nominal intensity over the spot for both the % red and green layers. A measure of gene expression level can then be % calculated from the two color intensities. Here a simple log-ratio % measurement is shown. Other more robust measures could be used instead. % You could also perform some analysis of the quality of the spot. mask = imcrop(L,rect)==1; for i=1:2 layer = spot(:,:,i); intensity(i) = double(median(layer(mask))); end intensity expressionLevel = log(intensity(1)/intensity(2)) %% Remove background, calculate again and compare measurements % If you noticed, the background intensity around the spot was not zero. % This could bias results. To see how much difference it makes, we can % perform background subtraction around all spots, again using |imtophat| % but this time in 2D on the image using a disk shaped structuring element. % Then we can calculate color intensities and relative expression level % again to see what effect background bias had on the measurement. In this % case the measurement shows more downregulation with background removed. seDisk = strel('disk',round(estPeriod)); spot2 = imtophat(spot,seDisk); for i=1:2 layer = spot2(:,:,i); intensity(i) = double(median(layer(mask))); end intensity expressionLevel = log(intensity(1)/intensity(2)) %% Set up graphical display for results % It is helpful to see red and green intensity values overlayed onto the % respective color images to gain confidence that measured intensities make % sense. It is also be helpful to overlay quantitative expression levels % onto the original image to provide additional visual assurance of % measurement results. The rectangular grid also helps correlate measured % values between images. The flexibility of MATLAB's powerful *Handle % Graphics* engine allow custom graphics like this to be set up quickly and % easily. f7 = figure('position',[52 94 954 425]); ax(1) = subplot(121); subimage(y(:,:,1),redMap) title('red intensity') ax(2) = subplot(122); subimage(y(:,:,2),greenMap) title('green intensity') f8 = figure('position',[316 34 482 497]); ax(3) = get(imshow(y,'notruesize'),'parent'); title('gene expression') for i=1:3 axes(ax(i)) axis off line(xGrid'*[1 1],yGrid([1 end]),'color',0.5*[1 1 1]) line(xGrid([1 end]),yGrid'*[1 1],'color',0.5*[1 1 1]) end %% Repeat measurement for all spots % We now repeat the spot extraction and intensity calculation for all the % spots in the grid. Here the measured values were tabulated as additional % columns beside the ROI positions for each spot into a new matrix called % |spotData|. figure(f7), figure(f8) spotData = [ROI zeros(length(ROI),5)]; for i=1:length(ROI) spot = imcrop(y,ROI(i,:)); %raw image spot2 = imtophat(spot,seDisk); %background removed mask = imcrop(L,ROI(i,:))==i; %spot mask for j=1:2 layer = spot2(:,:,j); %color layer intensity(j) = double(median(layer(mask))); text(ROI(i,1)+ROI(i,3)/2,ROI(i,2)+ROI(i,4)/2,sprintf('%.0f',intensity(j)),... 'color','y','HorizontalAlignment','center','parent',ax(j)) rawLayer = spot(:,:,j); rawIntensity(j) = double(median(layer(mask))); end expression = log(intensity(1)/intensity(2)); text(ROI(i,1)+ROI(i,3)/2,ROI(i,2)+ROI(i,4)/2,sprintf('%.2f',expression),... 'color','w','HorizontalAlignment','center','parent',ax(3)) drawnow spotData(i,5:9) = [intensity(:)' expression rawIntensity(:)']; end %% Export spot data to Excel spreadsheet % MATLAB can write to many standard formats. We will use |xlswrite| to save % the |spotData| to an Excel workbook. % % TODO list: % % * prepend column names first (see |xlswrite| doc example using cell arrays) % % * programmatically open spreadsheet in Excel (see |winopen| doc) xlswrite('microarray.xls',spotData)