refactor: remove_small_objects用Otsu替代中位数25%
对连通域面积分布做Otsu自动找分界,不再拍脑袋定百分比
This commit is contained in:
@@ -200,23 +200,41 @@ def remove_small_objects(binary: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
自动去除小连通域(噪声)。
|
||||
|
||||
统计所有连通域面积的中位数,
|
||||
小于中位数 25% 的视为噪声,直接剔除。
|
||||
对连通域面积分布做 Otsu 阈值检测:
|
||||
面积分布天然双峰——噪声区(几个像素) 和 真斑点区(几百像素)。
|
||||
Otsu 自动找到两峰之间的最佳分界,小于该值的视为噪声。
|
||||
换图换分辨率都自动适应,不需要手动调参。
|
||||
"""
|
||||
labeled, num = ndimage.label(binary)
|
||||
if num == 0:
|
||||
return binary # 全黑,直接返回
|
||||
return binary
|
||||
|
||||
# 收集所有连通域的面积
|
||||
areas = [int(np.sum(labeled == i)) for i in range(1, num + 1)]
|
||||
median = np.median(areas) # 面积中位数
|
||||
min_size = max(1, int(median * 0.25)) # 中位数的25%,最少1像素
|
||||
areas = np.array([int(np.sum(labeled == i)) for i in range(1, num + 1)])
|
||||
if len(areas) < 2:
|
||||
return binary
|
||||
|
||||
# 对面积数组做 Otsu(与像素 Otsu 完全相同的原理)
|
||||
# 将面积值当作"灰度",找到最小类内方差的分界点
|
||||
best_T, best_cost, n_total = 0, float('inf'), len(areas)
|
||||
for T in np.unique(areas):
|
||||
small = areas[areas <= T] # 候选噪声组
|
||||
large = areas[areas > T] # 候选真斑点组
|
||||
w_s = len(small) / n_total
|
||||
w_l = len(large) / n_total
|
||||
if w_s == 0 or w_l == 0:
|
||||
continue
|
||||
cost = w_s * np.var(small) + w_l * np.var(large)
|
||||
if cost < best_cost:
|
||||
best_cost = cost
|
||||
best_T = T
|
||||
|
||||
min_size = best_T # Otsu 自动找到的面积分界线
|
||||
|
||||
# 面积不达标的连通域整块置0
|
||||
result = binary.copy()
|
||||
for i in range(1, num + 1):
|
||||
if areas[i - 1] < min_size:
|
||||
if int(np.sum(labeled == i)) < min_size:
|
||||
result[labeled == i] = 0
|
||||
return result
|
||||
|
||||
|
||||
Reference in New Issue
Block a user