我正在自定义 inceptionv3 架构上训练 FER2013 数据,该架构的层数和参数 (3.3M) 比普通版本 (25M) 少。图表看起来像这样:

这可能是什么解释。我已经在使用batchnorm,这些坑没有多大意义。参数比这个少的序列模型没有这种行为。
PS代码添加:
import numpy as np
import os
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import Model
from tensorflow.keras.layers import (AveragePooling2D, concatenate,
Dense, Conv2D, MaxPooling2D,
BatchNormalization, Input, Activation,
GlobalAveragePooling2D)
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, CSVLogger
IMG_HEIGHT, IMG_WIDTH = 128, 128
NB_EPOCH = 400
BATCH_SIZE = 256
model_name = "inceptionV3_module05"+"_imsize"+str(IMG_HEIGHT)+\
"_bsize"+str(BATCH_SIZE)+"_epoch"+str(NB_EPOCH)
csv_name = model_name + '_log.csv'
train_data_dir = "Training"
valid_data_dir = "PublicTest"
channel_axis=3
classes = 7
def conv2d_bn(x,
filters,
num_row,
num_col,
padding='same',
strides=(1, 1),
name=None):
if name is not None:
bn_name = name + '_bn'
conv_name = name + '_conv'
else:
bn_name = None
conv_name = None
x = Conv2D(
filters, (num_row, num_col),
strides=strides,
padding=padding,
use_bias=False,
name=conv_name)(x)
x = BatchNormalization(axis=channel_axis, scale=False, name=bn_name)(x)
x = Activation('relu', name=name)(x)
return x
inputTensor = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 1))
x = conv2d_bn(inputTensor, 32, 3, 3, strides=(2, 2))
x = conv2d_bn(x, 32, 3, 3)
x = conv2d_bn(x, 64, 3, 3)
x = MaxPooling2D((3, 3), strides=(2, 2))(x)
x = conv2d_bn(x, 80, 1, 1)
x = conv2d_bn(x, 192, 3, 3)
x = MaxPooling2D((3, 3), strides=(2, 2))(x)
# mixed 0: 35 x 35 x 256
branch1x1 = conv2d_bn(x, 64, 1, 1)
branch3x3 = conv2d_bn(x, 48, 1, 1)
branch3x3 = conv2d_bn(branch3x3, 64, 3, 3)
branch3x3dbl = conv2d_bn(x, 64, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch_pool = AveragePooling2D((3, 3),
strides=(1, 1),
padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 32, 1, 1)
x = concatenate(
[branch1x1, branch3x3, branch3x3dbl, branch_pool],
axis=channel_axis,
name='mixed0')
# mixed 1: 35 x 35 x 288
branch1x1 = conv2d_bn(x, 64, 1, 1)
branch3x3 = conv2d_bn(x, 48, 1, 1)
branch3x3 = conv2d_bn(branch3x3, 64, 3, 3)
branch3x3dbl = conv2d_bn(x, 64, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch_pool = AveragePooling2D((3, 3),
strides=(1, 1),
padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
x = concatenate(
[branch1x1, branch3x3, branch3x3dbl, branch_pool],
axis=channel_axis,
name='mixed1')
# mixed 2: 35 x 35 x 288
branch1x1 = conv2d_bn(x, 64, 1, 1)
branch3x3 = conv2d_bn(x, 48, 1, 1)
branch3x3 = conv2d_bn(branch3x3, 64, 3, 3)
branch3x3dbl = conv2d_bn(x, 64, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch_pool = AveragePooling2D((3, 3),
strides=(1, 1),
padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
x = concatenate(
[branch1x1, branch3x3, branch3x3dbl, branch_pool],
axis=channel_axis,
name='mixed2')
# mixed 3: 17 x 17 x 768
branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid')
branch3x3dbl = conv2d_bn(x, 64, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch3x3dbl = conv2d_bn(
branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid')
branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x)
x = concatenate(
[branch3x3, branch3x3dbl, branch_pool],
axis=channel_axis,
name='mixed3')
# mixed 4: 17 x 17 x 768
branch1x1 = conv2d_bn(x, 192, 1, 1)
branch7x7 = conv2d_bn(x, 128, 1, 1)
branch7x7 = conv2d_bn(branch7x7, 128, 1, 7)
branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)
branch7x7dbl = conv2d_bn(x, 128, 1, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7)
branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
branch_pool = AveragePooling2D((3, 3),
strides=(1, 1),
padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
x = concatenate(
[branch1x1, branch7x7, branch7x7dbl, branch_pool],
axis=channel_axis,
name='mixed4')
x = GlobalAveragePooling2D(name='avg_pool')(x)
x = Dense(classes, activation='softmax', name='predictions')(x)
model = Model(inputTensor,x,name="incv3v1")
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()
#%%
train_datagen = ImageDataGenerator(rescale=1./255)
valid_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
directory = train_data_dir,
batch_size=BATCH_SIZE,
target_size=(IMG_HEIGHT, IMG_WIDTH),
color_mode='grayscale',
classes = ['0','1','2','3','4','5','6'],
class_mode='categorical',
shuffle='False'
)
validation_generator = valid_datagen.flow_from_directory(
directory = valid_data_dir,
target_size=(IMG_HEIGHT, IMG_WIDTH),
batch_size=BATCH_SIZE,
color_mode='grayscale',
classes = ['0','1','2','3','4','5','6'],
class_mode='categorical',
shuffle='False'
)
csv_logger = CSVLogger(csv_name , append=True, separator=';')
callbacks_list=[csv_logger]
print("Starting to fit the model")
model.fit_generator(train_generator,
steps_per_epoch = np.ceil(train_generator.samples/BATCH_SIZE),
validation_data = validation_generator,
validation_steps = np.ceil(validation_generator.samples/BATCH_SIZE),
epochs = NB_EPOCH, verbose=1, callbacks=callbacks_list)