使用适当的正则表达式来处理
re.sub(r‘re’,‘’,input)可以用指定字符替换输入文本中的字符
用\t分割的文本和标签 可以处理为两个小列表之后使用 zip转化为元组
def load_dataset(self):
train_path = os.path.join(self.dataset_path, 'images_background')
for alphabet in os.listdir(train_path):
alphabet_path = os.path.join(train_path, alphabet)
for character in os.listdir(alphabet_path):
character_path = os.path.join(alphabet_path, character)
for image in os.listdir(character_path):
self.train_lines.append(os.path.join(character_path, image))
self.train_labels.append(self.types)
self.types += 1
c = random.randint(0, self.types - 1)
selected_path = lines[labels[:] == c]
这里根据随机选择的标签,选取对应标签里面所有的图片路径
这里面
image_indexes = random.sample(range(0, len(selected_path)), 3)
# 取出两张类似的图片
batch_images_path.append(selected_path[image_indexes[0]])
batch_images_path.append(selected_path[image_indexes[1]])
# 取出两张不类似的图片
batch_images_path.append(selected_path[image_indexes[2]])
# 取出与当前的小类别不同的类
different_c = list(range(self.types))
different_c.pop(c)
different_c_index = np.random.choice(range(0, self.types - 1), 1)
current_c = different_c[different_c_index[0]]
selected_path = lines[labels == current_c]
while len(selected_path)<1:
different_c_index = np.random.choice(range(0, self.types - 1), 1)
current_c = different_c[different_c_index[0]]
selected_path = lines[labels == current_c]
image_indexes = random.sample(range(0, len(selected_path)), 1)
batch_images_path.append(selected_path[image_indexes[0]])