refactor: remove_small_objects用Otsu替代中位数25%

对连通域面积分布做Otsu自动找分界,不再拍脑袋定百分比
This commit is contained in:
2026-05-08 15:31:47 +08:00
parent b07e7a1182
commit 1041a66270
4 changed files with 57 additions and 17 deletions
+25 -7
View File
@@ -200,23 +200,41 @@ def remove_small_objects(binary: np.ndarray) -> np.ndarray:
"""
自动去除小连通域(噪声)。
统计所有连通域面积的中位数,
小于中位数 25% 的视为噪声,直接剔除
连通域面积分布做 Otsu 阈值检测:
面积分布天然双峰——噪声区(几个像素) 和 真斑点区(几百像素)
Otsu 自动找到两峰之间的最佳分界,小于该值的视为噪声。
换图换分辨率都自动适应,不需要手动调参。
"""
labeled, num = ndimage.label(binary)
if num == 0:
return binary # 全黑,直接返回
return binary
# 收集所有连通域的面积
areas = [int(np.sum(labeled == i)) for i in range(1, num + 1)]
median = np.median(areas) # 面积中位数
min_size = max(1, int(median * 0.25)) # 中位数的25%,最少1像素
areas = np.array([int(np.sum(labeled == i)) for i in range(1, num + 1)])
if len(areas) < 2:
return binary
# 对面积数组做 Otsu(与像素 Otsu 完全相同的原理)
# 将面积值当作"灰度",找到最小类内方差的分界点
best_T, best_cost, n_total = 0, float('inf'), len(areas)
for T in np.unique(areas):
small = areas[areas <= T] # 候选噪声组
large = areas[areas > T] # 候选真斑点组
w_s = len(small) / n_total
w_l = len(large) / n_total
if w_s == 0 or w_l == 0:
continue
cost = w_s * np.var(small) + w_l * np.var(large)
if cost < best_cost:
best_cost = cost
best_T = T
min_size = best_T # Otsu 自动找到的面积分界线
# 面积不达标的连通域整块置0
result = binary.copy()
for i in range(1, num + 1):
if areas[i - 1] < min_size:
if int(np.sum(labeled == i)) < min_size:
result[labeled == i] = 0
return result
+19 -7
View File
@@ -207,21 +207,33 @@ def remove_small_objects(binary: np.ndarray) -> np.ndarray:
"""
自动去除小连通域。
统计所有连通域面积中位数,小于中位数 25% 的视为噪声
自动剔除,不需要人设定阈值
连通域面积分布做 Otsu 阈值检测——面积天然双峰
Otsu 自动找到噪声峰和真斑点峰之间的最佳分界,零人工参数
"""
labeled, num = ndimage.label(binary)
if num == 0:
return binary
# 统计每个连通域的面积
areas = [int(np.sum(labeled == i)) for i in range(1, num + 1)]
median_area = np.median(areas)
min_size = max(1, int(median_area * 0.25)) # 中位数的25%,最少1像素
areas = np.array([int(np.sum(labeled == i)) for i in range(1, num + 1)])
if len(areas) < 2:
return binary
best_T, best_cost, n_total = 0, float('inf'), len(areas)
for T_val in np.unique(areas):
small = areas[areas <= T_val]
large = areas[areas > T_val]
w_s = len(small) / n_total
w_l = len(large) / n_total
if w_s == 0 or w_l == 0:
continue
cost = w_s * np.var(small) + w_l * np.var(large)
if cost < best_cost:
best_cost = cost
best_T = T_val
result = binary.copy()
for i in range(1, num + 1):
if areas[i - 1] < min_size:
if int(np.sum(labeled == i)) < best_T:
result[labeled == i] = 0
return result