序列预处理
首先读取图片目录里各图片的名称存放在train_id.txt文件夹中,(其实我的数据集图片名称及对应标签全都存放在一个文本文档里面)具体代码如下:
我的代码
with open(\"data/train/train.txt\", \'r\') as f:rnames = f.read().splitlines()for i in range(len(rnames)):names.append(rnames[i].split(\" \")[0])with open(\'data/train/train_id.txt\',\'w\') as f:f.writelines(lines+\'\\n\' for lines in names)f.close()print(rnames)
这是读取图片目录里各图片的名称存放在train_id.txt文件夹中的代码
import osimport randomimage_path=\"data/train/imgs/\"#你自己的图片文件路径names=os.list.dir(image_path)np.random.shuffle(names)#将图片名称顺序随机打乱with open(\'data/train/train_id\',\'w\')as f:f.writelines(lines+\'\\n\' for lines in names)f.close()print(\"txt文件创建成功!\")
序列化数据处理
import scipy.miscimport numpy as npfrom keras.utils import Sequence, to_categoricalimport ostrain_images_folder=\'data/\'valid_images_floder=\'data/\'train_labels=\'data/train/train_labels.txt\'valid_labels=\'data/valid/val_labels.txt\'batch_size=4H,W=64,64def read_labellist(labeldir):with open(labeldir, \"r\") as f:labels = f.read().splitlines()print(labels)# labels=np.asarray(labels).astype(\'float32\')# print(labels)# one_hot_labels=to_categorical(train_label,2)# print(one_hot_labels)# batch_y=[]# for i_batch in range(3):# batch_y.append(train_label[i:i+1])# i+=1# print(batch_y)return labels# names=read_labellist(images_label)#读取图片并将其转化为可输入网络的张量class DataGenSequence(Sequence):def __init__(self,usage):self.usage=usageif usage==\'train\':id_file=\'data/train/train_id.txt\'self.images_folder=train_images_folderself.labels = read_labellist(train_labels)else:id_file=\'data/valid/val_id.txt\'self.images_folder=valid_images_floderself.labels = read_labellist(valid_labels)with open(id_file,\'r\') as f:self.names=f.read().splitlines()def __len__(self):#计算每一次epoch中数据要分成几批处理return int(np.ceil(len(self.names) / float(batch_size)))def __getitem__(self, idx):i=idx*batch_size# i=0length=min(batch_size,(len(self.names)-i))# length=4batch_x=np.empty((batch_size,H,W,3),dtype=np.float32)batch_y =np.empty((batch_size,1,1,2))for i_batch in range(length):name=self.names[i]filename=os.path.join(self.images_folder,name)img=scipy.misc.imread(filename)if np.random.random_sample() > 0.5:img = np.fliplr(img)x = img / 255.y = []batch_x[i_batch,:,:,0:3]=xy.append(self.labels[i:i+1])batch_y[i_batch,0,:,:]=to_categorical(y,2)i+=1return batch_x,batch_ydef train_gen():return DataGenSequence(\'train\')def valid_gen():return DataGenSequence(\'valid\')
训练集和验证集,这部分等我把整个网络完成后再划分。
训练集,验证集已完成
输入图片维度和标签维度要相同,都是4维。
好了,我今天的博客就到这里了,等我把网络层搭建好后会再放另一篇博客呦!
如果我博客的内容对你有帮助,请给博主点个赞啊!这是我动力的来源。