《Deep Learning with python》数据增强


数据增强:利用多种能够生成可信图像的随机变换(比如,旋转、缩放、位移等),从现有的训练样本中生成更多的图像。

下面是《Deep Learning with python》中的示例代码:

import time
from keras import layers, models, optimizers
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from keras.preprocessing import image
import os

train_dir = 'datasets/cats_and_dogs_small/train'
val_dir = 'datasets/cats_and_dogs_small/validation'
test_dir = 'datasets/cats_and_dogs_small/test'
train_cats_dir = 'datasets/cats_and_dogs_small/train/cats'

# 网络搭建
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

# 网络编译
model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['acc'])

# 数据预处理
train_datagen = ImageDataGenerator(rescale=1. / 255,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=(150, 150),
                                                    batch_size=32,
                                                    class_mode='binary')

val_generator = test_datagen.flow_from_directory(val_dir,
                                                 target_size=(150, 150),
                                                 batch_size=32,
                                                 class_mode='binary')

history = model.fit(train_generator,
                    steps_per_epoch=100,
                    epochs=100,
                    validation_data=val_generator,
                    validation_steps=50)

model.save('cats_and_dogs_small_2.h5')

但是在运行之后,会出现以下警告

WARNING:tensorflow:Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches (in this case, 10000 batches). You may need to use the repeat() function when building your dataset.
WARNING:tensorflow:Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches (in this case, 50 batches). You may need to use the repeat() function when building your dataset.

然后百度一圈之后,把训练模型那块改了一下:

history = model.fit(train_generator,
                    steps_per_epoch=train_generator.samples//32,
                    epochs=100,
                    validation_data=val_generator,
                    validation_steps=val_generator.samples//32)

然后代码就正常运行了!哈哈哈。