refactor: remove_small_objects用Otsu替代中位数25%
对连通域面积分布做Otsu自动找分界,不再拍脑袋定百分比
This commit is contained in:
@@ -15,3 +15,6 @@ Thumbs.db
|
|||||||
|
|
||||||
# Obsidian
|
# Obsidian
|
||||||
.obsidian/
|
.obsidian/
|
||||||
|
|
||||||
|
# Flask
|
||||||
|
.playwright-mcp/
|
||||||
@@ -200,23 +200,41 @@ def remove_small_objects(binary: np.ndarray) -> np.ndarray:
|
|||||||
"""
|
"""
|
||||||
自动去除小连通域(噪声)。
|
自动去除小连通域(噪声)。
|
||||||
|
|
||||||
统计所有连通域面积的中位数,
|
对连通域面积分布做 Otsu 阈值检测:
|
||||||
小于中位数 25% 的视为噪声,直接剔除。
|
面积分布天然双峰——噪声区(几个像素) 和 真斑点区(几百像素)。
|
||||||
|
Otsu 自动找到两峰之间的最佳分界,小于该值的视为噪声。
|
||||||
换图换分辨率都自动适应,不需要手动调参。
|
换图换分辨率都自动适应,不需要手动调参。
|
||||||
"""
|
"""
|
||||||
labeled, num = ndimage.label(binary)
|
labeled, num = ndimage.label(binary)
|
||||||
if num == 0:
|
if num == 0:
|
||||||
return binary # 全黑,直接返回
|
return binary
|
||||||
|
|
||||||
# 收集所有连通域的面积
|
# 收集所有连通域的面积
|
||||||
areas = [int(np.sum(labeled == i)) for i in range(1, num + 1)]
|
areas = np.array([int(np.sum(labeled == i)) for i in range(1, num + 1)])
|
||||||
median = np.median(areas) # 面积中位数
|
if len(areas) < 2:
|
||||||
min_size = max(1, int(median * 0.25)) # 中位数的25%,最少1像素
|
return binary
|
||||||
|
|
||||||
|
# 对面积数组做 Otsu(与像素 Otsu 完全相同的原理)
|
||||||
|
# 将面积值当作"灰度",找到最小类内方差的分界点
|
||||||
|
best_T, best_cost, n_total = 0, float('inf'), len(areas)
|
||||||
|
for T in np.unique(areas):
|
||||||
|
small = areas[areas <= T] # 候选噪声组
|
||||||
|
large = areas[areas > T] # 候选真斑点组
|
||||||
|
w_s = len(small) / n_total
|
||||||
|
w_l = len(large) / n_total
|
||||||
|
if w_s == 0 or w_l == 0:
|
||||||
|
continue
|
||||||
|
cost = w_s * np.var(small) + w_l * np.var(large)
|
||||||
|
if cost < best_cost:
|
||||||
|
best_cost = cost
|
||||||
|
best_T = T
|
||||||
|
|
||||||
|
min_size = best_T # Otsu 自动找到的面积分界线
|
||||||
|
|
||||||
# 面积不达标的连通域整块置0
|
# 面积不达标的连通域整块置0
|
||||||
result = binary.copy()
|
result = binary.copy()
|
||||||
for i in range(1, num + 1):
|
for i in range(1, num + 1):
|
||||||
if areas[i - 1] < min_size:
|
if int(np.sum(labeled == i)) < min_size:
|
||||||
result[labeled == i] = 0
|
result[labeled == i] = 0
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|||||||
@@ -207,21 +207,33 @@ def remove_small_objects(binary: np.ndarray) -> np.ndarray:
|
|||||||
"""
|
"""
|
||||||
自动去除小连通域。
|
自动去除小连通域。
|
||||||
|
|
||||||
统计所有连通域的面积中位数,小于中位数 25% 的视为噪声,
|
对连通域面积分布做 Otsu 阈值检测——面积天然双峰,
|
||||||
自动剔除,不需要人设定阈值。
|
Otsu 自动找到噪声峰和真斑点峰之间的最佳分界,零人工参数。
|
||||||
"""
|
"""
|
||||||
labeled, num = ndimage.label(binary)
|
labeled, num = ndimage.label(binary)
|
||||||
if num == 0:
|
if num == 0:
|
||||||
return binary
|
return binary
|
||||||
|
|
||||||
# 统计每个连通域的面积
|
areas = np.array([int(np.sum(labeled == i)) for i in range(1, num + 1)])
|
||||||
areas = [int(np.sum(labeled == i)) for i in range(1, num + 1)]
|
if len(areas) < 2:
|
||||||
median_area = np.median(areas)
|
return binary
|
||||||
min_size = max(1, int(median_area * 0.25)) # 中位数的25%,最少1像素
|
|
||||||
|
best_T, best_cost, n_total = 0, float('inf'), len(areas)
|
||||||
|
for T_val in np.unique(areas):
|
||||||
|
small = areas[areas <= T_val]
|
||||||
|
large = areas[areas > T_val]
|
||||||
|
w_s = len(small) / n_total
|
||||||
|
w_l = len(large) / n_total
|
||||||
|
if w_s == 0 or w_l == 0:
|
||||||
|
continue
|
||||||
|
cost = w_s * np.var(small) + w_l * np.var(large)
|
||||||
|
if cost < best_cost:
|
||||||
|
best_cost = cost
|
||||||
|
best_T = T_val
|
||||||
|
|
||||||
result = binary.copy()
|
result = binary.copy()
|
||||||
for i in range(1, num + 1):
|
for i in range(1, num + 1):
|
||||||
if areas[i - 1] < min_size:
|
if int(np.sum(labeled == i)) < best_T:
|
||||||
result[labeled == i] = 0
|
result[labeled == i] = 0
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|||||||
+10
-3
@@ -72,11 +72,18 @@ def keep_largest_object(binary):
|
|||||||
def remove_small_objects(binary):
|
def remove_small_objects(binary):
|
||||||
L, n = ndimage.label(binary)
|
L, n = ndimage.label(binary)
|
||||||
if n == 0: return binary
|
if n == 0: return binary
|
||||||
areas = [int(np.sum(L==i)) for i in range(1,n+1)]
|
areas = np.array([int(np.sum(L==i)) for i in range(1,n+1)])
|
||||||
minsz = max(1, int(np.median(areas)*0.25))
|
if len(areas) < 2: return binary
|
||||||
|
best_T, best_cost, n_total = 0, float('inf'), len(areas)
|
||||||
|
for T in np.unique(areas):
|
||||||
|
s, l = areas[areas<=T], areas[areas>T]
|
||||||
|
w_s, w_l = len(s)/n_total, len(l)/n_total
|
||||||
|
if w_s==0 or w_l==0: continue
|
||||||
|
cost = w_s*np.var(s) + w_l*np.var(l)
|
||||||
|
if cost < best_cost: best_cost, best_T = cost, T
|
||||||
r = binary.copy()
|
r = binary.copy()
|
||||||
for i in range(1, n+1):
|
for i in range(1, n+1):
|
||||||
if areas[i-1] < minsz: r[L==i] = 0
|
if int(np.sum(L==i)) < best_T: r[L==i] = 0
|
||||||
return r
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user