feat: cDNA微阵列图像处理作业 - Python实现

实现内容:
- 网格划分:投影分析 + 自相关估周期 + 白顶帽去背景 + 质心提取
- 三种阈值分割:人工阈值、Otsu自动阈值、迭代阈值
- TV去噪(Chambolle投影算法)
- 后处理:去小连通域 + 保留最大连通域
- 完整可视化:网格叠加、阈值对比、收敛曲线、分割结果

参考MATLAB代码:NewGridAndCV/demo_GriddingAndCV.m
This commit is contained in:
2026-05-06 19:41:26 +08:00
commit b8a8ff2bc6
63 changed files with 3380 additions and 0 deletions
+432
View File
@@ -0,0 +1,432 @@
"""
cDNA微阵列图像处理 - 网格划分与阈值分割
==========================================
作业要求:
1. 分析涉及的图像处理技术
2. 编写阈值分割代码(人工阈值 / 迭代阈值 / Otsu自动阈值)
3. 选取cDNA图像进行分割
参考MATLAB代码:NewGridAndCV/demo_GriddingAndCV.m, GriddingAndCV.m
"""
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
from PIL import Image
from scipy import ndimage
from skimage import filters, morphology, color
# 中文字体设置
rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
rcParams['axes.unicode_minus'] = False
# 路径配置(使用脚本位置向上两级作为基准)
_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
_BASE_DIR = os.path.dirname(_SCRIPT_DIR) # 项目根目录
DATA_DIR = os.path.join(_BASE_DIR, 'cDNA图像处理实例', '数据', 'cDNA')
OUTPUT_DIR = os.path.join(_BASE_DIR, 'results')
# ============================================================
# 第一部分:阈值分割算法
# ============================================================
def manual_threshold(img_gray: np.ndarray, T: int) -> np.ndarray:
"""人工固定阈值分割:灰度 > T 为前景"""
return (img_gray > T).astype(np.uint8)
def otsu_threshold(img_gray: np.ndarray) -> tuple[np.ndarray, float]:
"""Otsu自动阈值分割:自动寻找最佳阈值"""
T = filters.threshold_otsu(img_gray)
binary = (img_gray > T).astype(np.uint8)
return binary, T
def iterative_threshold(img_gray: np.ndarray, tol: float = 1.0, max_iter: int = 100) -> tuple[np.ndarray, float, list]:
"""
迭代阈值分割
算法流程:
1. 初始阈值 T = 图像均值
2. 用T将像素分为前景和背景
3. 计算前景均值 μ_fg 和背景均值 μ_bg
4. 更新 T_new = (μ_fg + μ_bg) / 2
5. 若 |T_new - T| < tol,停止;否则重复
"""
T = float(np.mean(img_gray))
history = [T]
for _ in range(max_iter):
fg = img_gray[img_gray > T]
bg = img_gray[img_gray <= T]
if len(fg) == 0 or len(bg) == 0:
break
T_new = (float(np.mean(fg)) + float(np.mean(bg))) / 2.0
history.append(T_new)
if abs(T_new - T) < tol:
T = T_new
break
T = T_new
binary = (img_gray > T).astype(np.uint8)
return binary, T, history
# ============================================================
# 第二部分:TV去噪(参考tvdenoise.mChambolle投影算法)
# ============================================================
def tv_denoise(f: np.ndarray, lam: float = 12.0, tol: float = 1e-2, max_iter: int = 200) -> np.ndarray:
"""
全变分去噪 - Rudin-Osher-Fatemi模型
min TV(u) + λ/2 ||f - u||²
使用Chambolle投影算法求解
"""
f = f.astype(np.float64)
dt = 0.25
p1 = np.zeros_like(f)
p2 = np.zeros_like(f)
divp = np.zeros_like(f)
for _ in range(max_iter):
lastdivp = divp.copy()
z = divp - f * lam
z1 = np.roll(z, -1, axis=1) - z
z2 = np.roll(z, -1, axis=0) - z
denom = 1.0 + dt * np.sqrt(z1**2 + z2**2)
p1 = (p1 + dt * z1) / denom
p2 = (p2 + dt * z2) / denom
divp = p1 - np.roll(p1, 1, axis=1) + p2 - np.roll(p2, 1, axis=0)
if np.max(np.abs(divp - lastdivp)) < tol:
break
return f - divp / lam
# ============================================================
# 第三部分:网格划分(参考GriddingAndCV.m
# ============================================================
def estimate_spacing(profile: np.ndarray) -> int:
"""
通过自相关分析估计点间距
参考MATLAB: ac = xcov(xProfile); maxima = find(s1>0 & s2<0); estPeriod = median(diff(maxima))
"""
p = profile - np.mean(profile)
ac = np.correlate(p, p, mode='full')
ac = ac[len(p) - 1:] # 只取正半部分(lag >= 0
# 检测峰值:左斜率>0 且 右斜率<0(与MATLAB一致)
s1 = np.diff(ac, prepend=ac[0]) # 左斜率
s2 = np.diff(ac, append=ac[-1]) # 右斜率
maxima = np.where((s1 > 0) & (s2 < 0))[0]
# 过滤掉lag=0附近的峰和过小的峰
min_gap = len(profile) // 30 # 最小间距保护
maxima = maxima[maxima > min_gap]
if len(maxima) < 2:
return max(len(profile) // 16, 10)
spacing = int(np.round(np.median(np.diff(maxima))))
return max(spacing, 10)
def find_grid_centers(profile: np.ndarray, est_period: int) -> np.ndarray:
"""
从投影曲线中提取网格中心位置
参考MATLAB流程:
imtophat(profile, strel('line', estPeriod, 0)) → 去背景
graythresh → bwlabel → regionprops.Centroid → 提取质心
"""
# 白顶帽变换:用长度为est_period的线性结构元素去除背景
# MATLAB: seLine = strel('line', estPeriod, 0); xProfile2 = imtophat(xProfile, seLine)
# MATLAB中xProfile是1×N向量,这里用scipy的1D白顶帽实现
profile_f = profile.astype(np.float64)
selem = np.ones(est_period) # 1D线性结构元素
enhanced = ndimage.white_tophat(profile_f, structure=selem)
# 自动阈值二值化(与MATLAB graythresh一致)
if enhanced.max() == 0:
return np.array([])
T = filters.threshold_otsu(enhanced)
bw = (enhanced > T).astype(int)
# 标记连通域并提取质心(与MATLAB bwlabel + regionprops一致)
labeled, num = ndimage.label(bw)
if num == 0:
return np.array([])
centers = []
for i in range(1, num + 1):
indices = np.where(labeled == i)[0]
centers.append(float(np.mean(indices)))
return np.array(sorted(centers))
def compute_grid_lines(centers: np.ndarray) -> np.ndarray:
"""从中心点计算网格分割线(相邻中心的中点)"""
if len(centers) < 2:
return np.array([])
gaps = np.diff(centers) / 2.0
first = centers[0] - gaps[0]
last = centers[-1] + gaps[-1]
lines = [first]
for i in range(len(centers)):
if i < len(gaps):
lines.append(centers[i] + gaps[i])
else:
lines.append(last)
return np.round(lines).astype(int)
def gridding(gray: np.ndarray) -> tuple:
"""
完整网格划分流程
返回: x_grid, y_grid, col_profile, row_profile, x_period, y_period
"""
col_profile = np.mean(gray, axis=0).astype(np.float64)
row_profile = np.mean(gray, axis=1).astype(np.float64)
x_period = estimate_spacing(col_profile)
y_period = estimate_spacing(row_profile)
x_centers = find_grid_centers(col_profile, x_period)
y_centers = find_grid_centers(row_profile, y_period)
x_grid = compute_grid_lines(x_centers)
y_grid = compute_grid_lines(y_centers)
return x_grid, y_grid, col_profile, row_profile, x_period, y_period
# ============================================================
# 第四部分:后处理(参考choice.m, choosemaxobj.m
# ============================================================
def remove_small_objects(binary: np.ndarray, min_size: int = 20) -> np.ndarray:
"""去除面积小于min_size的连通域"""
labeled, num = ndimage.label(binary)
result = binary.copy()
for i in range(1, num + 1):
if np.sum(labeled == i) < min_size:
result[labeled == i] = 0
return result
def keep_largest_object(binary: np.ndarray, min_size: int = 20) -> np.ndarray:
"""只保留最大连通域"""
labeled, num = ndimage.label(binary)
if num == 0:
return binary
areas = [int(np.sum(labeled == i)) for i in range(1, num + 1)]
max_area = max(areas)
if max_area < min_size:
return np.zeros_like(binary)
max_idx = int(np.argmax(areas)) + 1
return (labeled == max_idx).astype(np.uint8)
# ============================================================
# 第五部分:可视化
# ============================================================
def plot_threshold_comparison(gray_block: np.ndarray, block_name: str = "子块") -> plt.Figure:
"""对比三种阈值方法的分割结果"""
manual_T = int(np.mean(gray_block) * 0.6)
bw_manual = manual_threshold(gray_block, manual_T)
bw_otsu, T_otsu = otsu_threshold(gray_block)
bw_iter, T_iter, _ = iterative_threshold(gray_block)
fig, axes = plt.subplots(2, 2, figsize=(10, 10))
fig.suptitle(f'{block_name} - 三种阈值分割方法对比', fontsize=14)
axes[0, 0].imshow(gray_block, cmap='gray')
axes[0, 0].set_title('原始灰度图')
axes[0, 0].axis('off')
axes[0, 1].imshow(bw_manual, cmap='gray')
axes[0, 1].set_title(f'人工阈值 (T={manual_T})')
axes[0, 1].axis('off')
axes[1, 0].imshow(bw_otsu, cmap='gray')
axes[1, 0].set_title(f'Otsu阈值 (T={T_otsu:.1f})')
axes[1, 0].axis('off')
axes[1, 1].imshow(bw_iter, cmap='gray')
axes[1, 1].set_title(f'迭代阈值 (T={T_iter:.1f})')
axes[1, 1].axis('off')
plt.tight_layout()
return fig
def plot_gridding_result(gray: np.ndarray, x_grid: np.ndarray, y_grid: np.ndarray,
col_profile: np.ndarray, row_profile: np.ndarray) -> plt.Figure:
"""绘制网格划分结果"""
fig = plt.figure(figsize=(14, 10))
fig.suptitle('cDNA微阵列网格划分结果', fontsize=14)
ax1 = fig.add_subplot(2, 2, 1)
ax1.imshow(gray, cmap='gray')
for x in x_grid:
ax1.axvline(x=x, color='cyan', linewidth=0.5, alpha=0.7)
for y in y_grid:
ax1.axhline(y=y, color='cyan', linewidth=0.5, alpha=0.7)
ax1.set_title(f'网格叠加 ({len(x_grid)-1}列 x {len(y_grid)-1}行)')
ax1.axis('off')
ax2 = fig.add_subplot(2, 2, 2)
ax2.plot(col_profile, 'b-', linewidth=0.5)
for x in x_grid:
ax2.axvline(x=x, color='r', linewidth=0.5, alpha=0.5)
ax2.set_title('列投影 (列均值)')
ax2.set_xlabel('')
ax3 = fig.add_subplot(2, 2, 3)
ax3.plot(row_profile, 'b-', linewidth=0.5)
for y in y_grid:
ax3.axvline(x=y, color='r', linewidth=0.5, alpha=0.5)
ax3.set_title('行投影 (行均值)')
ax3.set_xlabel('')
ax4 = fig.add_subplot(2, 2, 4)
ax4.hist(gray.ravel(), bins=50, color='gray', edgecolor='black', linewidth=0.3)
T_otsu = filters.threshold_otsu(gray)
ax4.axvline(x=T_otsu, color='r', linestyle='--', label=f'Otsu T={T_otsu:.0f}')
ax4.set_title('灰度直方图')
ax4.set_xlabel('灰度值')
ax4.legend()
plt.tight_layout()
return fig
def plot_full_segmentation(gray: np.ndarray, bw_result: np.ndarray, title: str = "完整分割结果") -> plt.Figure:
"""绘制完整分割结果"""
fig, axes = plt.subplots(1, 2, figsize=(12, 6))
fig.suptitle(title, fontsize=14)
axes[0].imshow(gray, cmap='gray')
axes[0].set_title('原始灰度图')
axes[0].axis('off')
axes[1].imshow(bw_result, cmap='gray')
axes[1].set_title('二值分割结果')
axes[1].axis('off')
plt.tight_layout()
return fig
# ============================================================
# 第六部分:主流程
# ============================================================
def main() -> None:
os.makedirs(OUTPUT_DIR, exist_ok=True)
# ---- 读取图像 ----
img_path = os.path.join(DATA_DIR, 'cDNA.png')
print(f"读取图像: {img_path}")
img = np.array(Image.open(img_path))
if img.ndim == 3:
gray = (color.rgb2gray(img[:, :, :3]) * 255).astype(np.uint8)
else:
gray = img
print(f"图像尺寸: {gray.shape}, 灰度范围: [{gray.min()}, {gray.max()}]")
# ---- 步骤1: 网格划分 ----
print("\n[步骤1] 网格划分...")
x_grid, y_grid, col_prof, row_prof, x_per, y_per = gridding(gray)
print(f" 列间距估计: {x_per}, 行间距估计: {y_per}")
print(f" 网格: {len(x_grid)-1} 列 x {len(y_grid)-1}")
fig_grid = plot_gridding_result(gray, x_grid, y_grid, col_prof, row_prof)
fig_grid.savefig(os.path.join(OUTPUT_DIR, 'result_gridding.png'), dpi=150, bbox_inches='tight')
print(" 保存: result_gridding.png")
# ---- 步骤2: 提取单个子块,三种阈值方法对比 ----
print("\n[步骤2] 单块阈值分割对比...")
bi, bj = len(y_grid) // 2, len(x_grid) // 2
if len(x_grid) >= 3 and len(y_grid) >= 3:
r1, r2 = y_grid[bi], y_grid[bi + 1]
c1, c2 = x_grid[bj], x_grid[bj + 1]
block = gray[r1:r2, c1:c2]
else:
h, w = gray.shape
bi, bj = 0, 0
block = gray[h // 4:3 * h // 4, w // 4:3 * w // 4]
print(f" 选取子块 [{bi},{bj}]: 尺寸{block.shape}")
fig_compare = plot_threshold_comparison(block, f"子块[{bi},{bj}]")
fig_compare.savefig(os.path.join(OUTPUT_DIR, 'result_threshold_compare.png'), dpi=150, bbox_inches='tight')
print(" 保存: result_threshold_compare.png")
# 迭代阈值收敛过程
_, T_iter, history = iterative_threshold(block)
fig_conv, ax_conv = plt.subplots(figsize=(6, 4))
ax_conv.plot(history, 'bo-', markersize=3)
ax_conv.set_title('迭代阈值收敛过程')
ax_conv.set_xlabel('迭代次数')
ax_conv.set_ylabel('阈值T')
ax_conv.axhline(y=T_iter, color='r', linestyle='--', label=f'最终T={T_iter:.1f}')
ax_conv.legend()
fig_conv.savefig(os.path.join(OUTPUT_DIR, 'result_iterative_convergence.png'), dpi=100, bbox_inches='tight')
print(" 保存: result_iterative_convergence.png")
# ---- 步骤3: 全图逐块分割(Otsu + TV去噪) ----
print("\n[步骤3] 全图逐块分割...")
bw_full = np.zeros_like(gray)
if len(x_grid) >= 2 and len(y_grid) >= 2:
for i in range(len(y_grid) - 1):
for j in range(len(x_grid) - 1):
r1, r2 = y_grid[i], y_grid[i + 1]
c1, c2 = x_grid[j], x_grid[j + 1]
blk = gray[r1:r2, c1:c2].copy()
if blk.size == 0:
continue
# 暗图像增强(参考MATLAB: 均值<5则×5<30则×1.5
blk_mean = float(np.mean(blk))
if blk_mean < 5:
blk = np.clip(blk.astype(float) * 5, 0, 255).astype(np.uint8)
elif blk_mean < 30:
blk = np.clip(blk.astype(float) * 1.5, 0, 255).astype(np.uint8)
# TV去噪
blk_denoised = tv_denoise(blk.astype(float), lam=12.0)
# Otsu分割
try:
T = filters.threshold_otsu(blk_denoised.astype(np.uint8))
bw_blk = (blk_denoised > T).astype(np.uint8)
except ValueError:
bw_blk = np.zeros(blk.shape, dtype=np.uint8)
# 后处理:保留最大连通域
bw_blk = keep_largest_object(bw_blk, min_size=8)
bw_full[r1:r2, c1:c2] = bw_blk
bw_full = remove_small_objects(bw_full, min_size=20)
fig_full = plot_full_segmentation(gray, bw_full, "全图逐块Otsu分割结果")
fig_full.savefig(os.path.join(OUTPUT_DIR, 'result_full_segmentation.png'), dpi=150, bbox_inches='tight')
print(" 保存: result_full_segmentation.png")
bw_img = Image.fromarray((bw_full * 255).astype(np.uint8))
bw_img.save(os.path.join(OUTPUT_DIR, 'result_I_bw.png'))
print(" 保存: result_I_bw.png")
# ---- 步骤4: 网格叠加图 ----
if img.ndim == 3:
overlay = img[:, :, :3].copy()
else:
overlay = np.stack([gray] * 3, axis=-1)
overlay = overlay.astype(np.uint8)
for x in x_grid:
if 0 <= x < overlay.shape[1]:
overlay[:, max(x - 1, 0):min(x + 2, overlay.shape[1]), :] = [0, 255, 255]
for y in y_grid:
if 0 <= y < overlay.shape[0]:
overlay[max(y - 1, 0):min(y + 2, overlay.shape[0]), :, :] = [0, 255, 255]
Image.fromarray(overlay).save(os.path.join(OUTPUT_DIR, 'result_gridding_overlay.png'))
print(" 保存: result_gridding_overlay.png")
print(f"\n全部完成!输出文件在: {OUTPUT_DIR}")
if __name__ == '__main__':
main()