cDNA-image-processing/src/cDNA_gridding_simple.py

"""
cDNA微阵列图像处理 —— 简化版
======================================

D:\ProgramData\anaconda3\envs\my_env\python.exe src/cDNA_gridding_simple.py

一、算法流程总览

  灰度图 ──→ Otsu求像素最佳阈值 T ──→ 百分比 = T/255（自适应）
     │
     ├─→ 投影/减阈值/过零点配对 ──→ 网格线
     │
     ├─→ 逐格 Otsu 分割 ──→ keep_largest_object（每格留最大块）
     │
     └─→ remove_small_objects（中位数25%以下判为噪声）──→ 统计斑点数

二、各步骤详解

  1. 彩色图 → 灰度图

  2. Otsu 自动阈值
     遍历灰度 0~255，每个候选 T 将像素分为前景(>T)和背景(≤T)，
     计算类内方差 w_bg×σ²_bg + w_fg×σ²_fg，选使方差最小的 T。

  3. 投影
     横轴：np.sum(每列) → 曲线，高点=斑点列，低点=空隙列
     纵轴：np.sum(每行) → 曲线，高点=斑点行，低点=空隙行

  4. 阈值 X = (max-min) × (T/255)

  5. 曲线减 X → 大于 0 = 斑点区域，小于 0 = 空隙
     过零点 = 斑点和空隙的分界线

  6. 过零点配对
     过零点交替：正→负（离开斑点）、负→正（进入下一斑点）
     配对「离开斑点 + 进入下一斑点」，中点 = 空隙中央 = 划线位置

  7. 逐格分割 + 后处理
     对每个格子独立做 Otsu → keep_largest_object（留最大块）
     → 全局 remove_small_objects（自动去噪）→ 统计斑点数

  8. 输出三栏图：左=网格，中=分割，右=后处理结果
"""

import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from skimage import color
from scipy import ndimage

# matplotlib 中文字体设置
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False

# 路径设置（从脚本位置动态推导，禁止硬编码绝对路径）
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
BASE_DIR = os.path.dirname(SCRIPT_DIR)
DATA_DIR = os.path.join(BASE_DIR, 'cDNA图像处理实例', '数据', 'cDNA')
OUTPUT_DIR = os.path.join(BASE_DIR, 'results_simple')


# ================================================================
# 函数1：Otsu 像素级阈值
# ================================================================
def otsu_threshold_pixels(gray: np.ndarray) -> int:
    """
    对图像像素做 Otsu 自动阈值检测。

    遍历灰度值 0~255，对每个候选 T：
      - 将像素分为两组：前景(>T) 和 背景(≤T)
      - 计算类内方差 = w_bg × σ²_bg + w_fg × σ²_fg
      - 选使类内方差最小的 T

    返回 T（0~255 整数）。
    """
    best_T = 0                 # 当前最佳阈值
    best_cost = float('inf')   # 当前最小类内方差
    total = gray.size          # 总像素数（用于算权重）

    for T in range(1, 255):
        # 按 T 分组
        bg = gray[gray <= T]   # 背景像素
        fg = gray[gray > T]    # 前景像素（斑点）
        w_bg = len(bg) / total # 背景占比
        w_fg = len(fg) / total # 前景占比

        if w_bg == 0 or w_fg == 0:
            continue           # 某组为空（T 太极端），跳过

        # 类内方差 = 加权平均方差
        # 方差小 = 组内像素灰度接近 = 分组效果好
        cost = w_bg * np.var(bg) + w_fg * np.var(fg)

        if cost < best_cost:
            best_cost = cost
            best_T = T

    return best_T


# ================================================================
# 函数2：网格划线
# ================================================================
def draw_grid_lines(gray: np.ndarray):
    """
    检测网格分割线。

    流程：
      Otsu 求自适应百分比 → 列/行投影 → 减阈值 → 过零点配对 → 空隙中点

    返回 (纵线, 横线, T, pct, 列投影, 行投影, 减阈值后的列投影, 减阈值后的行投影)
    """
    T = otsu_threshold_pixels(gray)  # 像素级最佳阈值
    pct = T / 255.0                  # 自适应百分比
    H, W = gray.shape

    # ---- 1. 横轴投影 ----
    col_profile = np.sum(gray, axis=0).astype(float)

    # ---- 2. 纵轴投影 ----
    row_profile = np.sum(gray, axis=1).astype(float)

    # ---- 3. 投影阈值 ----
    col_T_val = (np.max(col_profile) - np.min(col_profile)) * pct
    row_T_val = (np.max(row_profile) - np.min(row_profile)) * pct

    # ---- 4. 曲线减去阈值 ----
    col_shifted = col_profile - col_T_val
    row_shifted = row_profile - row_T_val

    # ---- 5. 过零点配对 → 空隙中线 ----
    def find_gap_lines(prof_shifted: np.ndarray) -> np.ndarray:
        """
        在减去阈值后的曲线上，配对过零点，取空隙中央。

        原理图解：
        信号: ----++++----++++----++++
              ↑   ↑    ↑   ↑
          过零点配对：离开斑点 + 进入下一个斑点
          → 中点 = 空隙中央 = 划线位置
        """
        # 每个位置是正(斑点)还是负(空隙)
        is_positive = prof_shifted > 0

        # 收集符号变化位置（过零点）
        crossings = []
        for i in range(1, len(is_positive)):
            if is_positive[i] != is_positive[i - 1]:   # 正负翻转
                crossings.append(i)

        if len(crossings) < 2:          # 过零点不足
            return np.array([])

        # 过零点交替：正→负(离开斑点), 负→正(进入下一斑点)
        # 要配对的是"离开斑点 → 进入下一斑点"，即空隙的两端
        # 如果信号开头是负，跳过第一个 crossing
        start = 1 if not is_positive[0] else 0

        lines = []
        for k in range(start, len(crossings) - 1, 2):
            if k + 1 < len(crossings):
                # crossings[k]:   正→负（离开斑点）
                # crossings[k+1]: 负→正（进入下一斑点）
                # 中点 = 空隙中央 = 划线位置
                mid = int((crossings[k] + crossings[k + 1]) / 2)
                lines.append(mid)

        return np.array(lines)

    x_lines = find_gap_lines(col_shifted)
    y_lines = find_gap_lines(row_shifted)

    return x_lines, y_lines, T, pct, col_profile, row_profile, col_shifted, row_shifted, col_T_val, row_T_val


# ================================================================
# 函数3：后处理（完全自动，无需人工设定阈值）
# ================================================================
def keep_largest_object(binary: np.ndarray) -> np.ndarray:
    """
    每个格子里只保留面积最大的连通域。

    ndimage.label 给每个白色连通域编号 → 算面积 → 只留最大那块。
    不需要设定任何阈值。
    """
    labeled, num = ndimage.label(binary)
    if num == 0:
        return np.zeros_like(binary)    # 全黑，直接返回
    # 统计每个连通域的像素数
    areas = [int(np.sum(labeled == i)) for i in range(1, num + 1)]
    # 找面积最大的编号
    max_idx = int(np.argmax(areas)) + 1
    return (labeled == max_idx).astype(np.uint8)

# ================================================================
# 函数4：自动去除小连通域（噪声）
# ================================================================
def remove_small_objects(binary: np.ndarray) -> np.ndarray:
    """
    自动去除小连通域（噪声）。

    对连通域面积分布做 Otsu 阈值检测：
    面积分布天然双峰——噪声区(几个像素) 和 真斑点区(几百像素)。
    Otsu 自动找到两峰之间的最佳分界，小于该值的视为噪声。
    换图换分辨率都自动适应，不需要手动调参。
    """
    labeled, num = ndimage.label(binary)
    if num == 0:
        return binary

    # 收集所有连通域的面积
    areas = np.array([int(np.sum(labeled == i)) for i in range(1, num + 1)])
    if len(areas) < 2:
        return binary

    # 对面积数组做 Otsu（与像素 Otsu 完全相同的原理）
    # 将面积值当作"灰度"，找到最小类内方差的分界点
    best_T, best_cost, n_total = 0, float('inf'), len(areas)
    for T in np.unique(areas):
        small = areas[areas <= T]    # 候选噪声组
        large = areas[areas > T]     # 候选真斑点组
        w_s = len(small) / n_total
        w_l = len(large) / n_total
        if w_s == 0 or w_l == 0:
            continue
        cost = w_s * np.var(small) + w_l * np.var(large)
        if cost < best_cost:
            best_cost = cost
            best_T = T

    min_size = best_T  # Otsu 自动找到的面积分界线

    # 面积不达标的连通域整块置0
    result = binary.copy()
    for i in range(1, num + 1):
        if int(np.sum(labeled == i)) < min_size:
            result[labeled == i] = 0
    return result


# ================================================================
# 主流程
# ================================================================
def main():
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    # ---- 读取图像，转为灰度 ----
    img = np.array(Image.open(os.path.join(DATA_DIR, 'cDNA.png')))
    # 原图 RGBA，取前三个通道转为 0~255 灰度图
    gray = (color.rgb2gray(img[:, :, :3]) * 255).astype(np.uint8)

    # ---- 1. 网格划线 ----
    (x_lines, y_lines, T_otsu, pct,
     col_prof, row_prof, col_shifted, row_shifted,
     col_T_val, row_T_val) = draw_grid_lines(gray)
    print(f"检测到 {len(x_lines)} 条纵线, {len(y_lines)} 条横线")
    print(f"Otsu 阈值: T={T_otsu}, 自适应百分比: {pct*100:.1f}%")

    # ---- 2. 逐格分割 + 后处理 ----
    bw_full = np.zeros_like(gray)
    for i in range(len(y_lines) - 1):
        for j in range(len(x_lines) - 1):
            r1, r2 = y_lines[i], y_lines[i + 1]
            c1, c2 = x_lines[j], x_lines[j + 1]
            blk = gray[r1:r2, c1:c2]
            if blk.size == 0:
                continue
            T = otsu_threshold_pixels(blk)
            bw_blk = (blk > T).astype(np.uint8)
            bw_blk = keep_largest_object(bw_blk)
            bw_full[r1:r2, c1:c2] = bw_blk

    bw_clean = remove_small_objects(bw_full)

    # ---- 3. 统计斑点 ----
    labeled, num = ndimage.label(bw_clean)
    spot_sizes = [int(np.sum(labeled == i)) for i in range(1, num + 1)]
    valid = [s for s in spot_sizes if s >= 10]
    print(f"检测到 {len(valid)} 个斑点")

    # ---- 4. 可视化输出（每张图独立保存）----

    # 图1：网格线叠加原图
    fig1, ax1 = plt.subplots(figsize=(8, 8))
    ax1.imshow(gray, cmap='gray')
    for x in x_lines:
        ax1.axvline(x=x, color='lime', linewidth=0.5)
    for y in y_lines:
        ax1.axhline(y=y, color='lime', linewidth=0.5)
    ax1.set_title(f'网格划分 ({len(x_lines)}x{len(y_lines)})', fontsize=13)
    ax1.axis('off')
    fig1.savefig(os.path.join(OUTPUT_DIR, '01_grid_overlay.png'), dpi=150, bbox_inches='tight')
    plt.close(fig1)

    # 图2：列投影曲线（带阈值线和过零点标记）
    fig2, ax2 = plt.subplots(figsize=(10, 4))
    xs = np.arange(len(col_prof))
    ax2.plot(xs, col_prof, 'b-', linewidth=0.6, label='col profile')
    ax2.axhline(y=col_T_val, color='orange', linestyle='--', linewidth=1,
                label=f'threshold X={col_T_val:.0f}')
    ax2.plot(xs, col_shifted, 'g-', linewidth=0.6, alpha=0.5, label='after -X')
    ax2.fill_between(xs, 0, col_shifted, where=(col_shifted > 0), color='green', alpha=0.1)
    ax2.fill_between(xs, 0, col_shifted, where=(col_shifted < 0), color='red', alpha=0.1)
    zero_idx = np.where(np.diff(col_shifted > 0) != 0)[0]
    for zi in zero_idx[:50]:
        ax2.axvline(x=zi, color='purple', linewidth=0.3, alpha=0.5)
    for xl in x_lines:
        ax2.axvline(x=xl, color='red', linewidth=0.8, alpha=0.7)
    ax2.set_title('col projection', fontsize=12)
    ax2.set_xlabel('col')
    ax2.legend(fontsize=8)
    fig2.savefig(os.path.join(OUTPUT_DIR, '02_col_projection.png'), dpi=120, bbox_inches='tight')
    plt.close(fig2)

    # 图3：行投影曲线
    fig3, ax3 = plt.subplots(figsize=(10, 4))
    ys = np.arange(len(row_prof))
    ax3.plot(row_prof, ys, 'b-', linewidth=0.6, label='row profile')
    ax3.axvline(x=row_T_val, color='orange', linestyle='--', linewidth=1,
                label=f'threshold X={row_T_val:.0f}')
    ax3.plot(row_shifted, ys, 'g-', linewidth=0.6, alpha=0.5, label='after -X')
    ax3.fill_betweenx(ys, 0, row_shifted, where=(row_shifted > 0), color='green', alpha=0.1)
    ax3.fill_betweenx(ys, 0, row_shifted, where=(row_shifted < 0), color='red', alpha=0.1)
    zero_idx_r = np.where(np.diff(row_shifted > 0) != 0)[0]
    for zi in zero_idx_r[:50]:
        ax3.axhline(y=zi, color='purple', linewidth=0.3, alpha=0.5)
    for yl in y_lines:
        ax3.axhline(y=yl, color='red', linewidth=0.8, alpha=0.7)
    ax3.set_title('row projection', fontsize=12)
    ax3.set_ylabel('row')
    ax3.legend(fontsize=8)
    fig3.savefig(os.path.join(OUTPUT_DIR, '03_row_projection.png'), dpi=120, bbox_inches='tight')
    plt.close(fig3)

    # 图4：灰度直方图 + Otsu 阈值
    fig4, ax4 = plt.subplots(figsize=(8, 5))
    ax4.hist(gray.ravel(), bins=50, color='gray', edgecolor='black', linewidth=0.3)
    ax4.axvline(x=T_otsu, color='red', linestyle='--', linewidth=2,
                label=f'Otsu T={T_otsu} (pct={pct*100:.1f}%)')
    ax4.set_title('histogram + Otsu threshold', fontsize=12)
    ax4.set_xlabel('gray value')
    ax4.set_ylabel('pixel count')
    ax4.legend()
    fig4.savefig(os.path.join(OUTPUT_DIR, '04_histogram.png'), dpi=120, bbox_inches='tight')
    plt.close(fig4)

    # 图5：逐格 Otsu 分割（后处理前）
    fig5, ax5 = plt.subplots(figsize=(8, 8))
    ax5.imshow(bw_full, cmap='gray')
    ax5.set_title('per-cell Otsu (before post-processing)', fontsize=13)
    ax5.axis('off')
    fig5.savefig(os.path.join(OUTPUT_DIR, '05_segmentation_raw.png'), dpi=150, bbox_inches='tight')
    plt.close(fig5)

    # 图6：后处理结果（最终二值图）
    fig6, ax6 = plt.subplots(figsize=(8, 8))
    ax6.imshow(bw_clean, cmap='gray')
    ax6.set_title(f'post-processed ({len(valid)} spots)', fontsize=13)
    ax6.axis('off')
    fig6.savefig(os.path.join(OUTPUT_DIR, '06_post_processed.png'), dpi=150, bbox_inches='tight')
    plt.close(fig6)

    print(f"共保存 6 张图到: {OUTPUT_DIR}")


if __name__ == '__main__':
    main()