-
Notifications
You must be signed in to change notification settings - Fork 23
/
kmeans_for_anchors.py
169 lines (144 loc) · 7.16 KB
/
kmeans_for_anchors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#-------------------------------------------------------------------------------------------------------#
# kmeans虽然会对数据集中的框进行聚类,但是很多数据集由于框的大小相近,聚类出来的9个框相差不大,
# 这样的框反而不利于模型的训练。因为不同的特征层适合不同大小的先验框,shape越小的特征层适合越大的先验框
# 原始网络的先验框已经按大中小比例分配好了,不进行聚类也会有非常好的效果。
#-------------------------------------------------------------------------------------------------------#
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
if __name__ == '__main__':
#-------------------------------------------------------------#
# input_shape 输入的shape大小,一定要是32的倍数
#-------------------------------------------------------------#
input_shape = [640, 640]
#-------------------------------------------------------------#
# anchors_num 先验框的数量
#-------------------------------------------------------------#
anchors_num = 9
#-------------------------------------------------------------#
# train_annotation_path 训练图片路径和标签
#-------------------------------------------------------------#
train_annotation_path = '2007_train.txt'
np.random.seed(0)
def cas_ratio(box,cluster):
ratios_of_box_cluster = box / cluster
ratios_of_cluster_box = cluster / box
ratios = np.concatenate([ratios_of_box_cluster, ratios_of_cluster_box], axis = -1)
return np.max(ratios, -1)
def avg_ratio(box,cluster):
return np.mean([np.min(cas_ratio(box[i],cluster)) for i in range(box.shape[0])])
def kmeans(box,k):
#-------------------------------------------------------------#
# 取出一共有多少框
#-------------------------------------------------------------#
row = box.shape[0]
#-------------------------------------------------------------#
# 每个框各个点的位置
#-------------------------------------------------------------#
distance = np.empty((row,k))
#-------------------------------------------------------------#
# 最后的聚类位置
#-------------------------------------------------------------#
last_clu = np.zeros((row,))
np.random.seed()
#-------------------------------------------------------------#
# 随机选5个当聚类中心
#-------------------------------------------------------------#
cluster = box[np.random.choice(row,k,replace = False)]
iter = 0
while True:
#-------------------------------------------------------------#
# 计算当前框和先验框的宽高比例
#-------------------------------------------------------------#
for i in range(row):
distance[i] = cas_ratio(box[i],cluster)
#-------------------------------------------------------------#
# 取出最小点
#-------------------------------------------------------------#
near = np.argmin(distance,axis=1)
if (last_clu == near).all():
break
#-------------------------------------------------------------#
# 求每一个类的中位点
#-------------------------------------------------------------#
for j in range(k):
cluster[j] = np.median(
box[near == j],axis=0)
last_clu = near
if iter % 5 == 0:
print('iter: {:d}. avg_ratio:{:.2f}'.format(iter, avg_ratio(box,cluster)))
iter += 1
return cluster, near
def load_data(train_annotation_path):
#---------------------------#
# 读取数据集对应的txt
#---------------------------#
with open(train_annotation_path, encoding='utf-8') as f:
train_lines = f.readlines()
data = []
#-------------------------------------------------------------#
# 对于每一个xml都寻找box
#-------------------------------------------------------------#
for line in tqdm(train_lines):
line = line.split()
#------------------------------#
# 读取图像并转换成RGB图像
#------------------------------#
image = Image.open(line[0])
#------------------------------#
# 获得图像的高宽与目标高宽
#------------------------------#
iw, ih = image.size
#------------------------------#
# 获得预测框
#------------------------------#
boxes = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
for box in boxes:
xmin = int(float(box[0])) / iw
ymin = int(float(box[1])) / ih
xmax = int(float(box[2])) / iw
ymax = int(float(box[3])) / ih
xmin = np.float64(xmin)
ymin = np.float64(ymin)
xmax = np.float64(xmax)
ymax = np.float64(ymax)
# 得到宽高
data.append([xmax - xmin, ymax - ymin])
return np.array(data)
#-------------------------------------------------------------#
# 载入所有的xml
# 存储格式为转化为比例后的width,height
#-------------------------------------------------------------#
print('Load xmls.')
data = load_data(train_annotation_path)
print('Load xmls done.')
#-------------------------------------------------------------#
# 使用k聚类算法
#-------------------------------------------------------------#
print('K-means boxes.')
cluster, near = kmeans(data, anchors_num)
print('K-means boxes done.')
data = data * np.array([input_shape[1], input_shape[0]])
cluster = cluster * np.array([input_shape[1], input_shape[0]])
#-------------------------------------------------------------#
# 绘图
#-------------------------------------------------------------#
for j in range(anchors_num):
plt.scatter(data[near == j][:,0], data[near == j][:,1])
plt.scatter(cluster[j][0], cluster[j][1], marker='x', c='black')
plt.savefig("kmeans_for_anchors.jpg")
plt.show()
print('Save kmeans_for_anchors.jpg in root dir.')
cluster = cluster[np.argsort(cluster[:, 0] * cluster[:, 1])]
print('avg_ratio:{:.2f}'.format(avg_ratio(data, cluster)))
print(cluster)
f = open("yolo_anchors.txt", 'w')
row = np.shape(cluster)[0]
for i in range(row):
if i == 0:
x_y = "%d,%d" % (cluster[i][0], cluster[i][1])
else:
x_y = ", %d,%d" % (cluster[i][0], cluster[i][1])
f.write(x_y)
f.close()