验证准确度图上偶尔出现深坑

数据挖掘 喀拉斯
2022-03-08 14:15:23

我正在自定义 inceptionv3 架构上训练 FER2013 数据,该架构的层数和参数 (3.3M) 比普通版本 (25M) 少。图表看起来像这样: 在此处输入图像描述

这可能是什么解释。我已经在使用batchnorm,这些坑没有多大意义。参数比这个少的序列模型没有这种行为。

PS代码添加:

import numpy as np
import os
import pandas as pd

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import  Model
from tensorflow.keras.layers import (AveragePooling2D, concatenate,  
                                     Dense, Conv2D, MaxPooling2D, 
                                     BatchNormalization, Input, Activation,
                                     GlobalAveragePooling2D)
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, CSVLogger


IMG_HEIGHT, IMG_WIDTH = 128, 128
NB_EPOCH = 400
BATCH_SIZE = 256

model_name = "inceptionV3_module05"+"_imsize"+str(IMG_HEIGHT)+\
             "_bsize"+str(BATCH_SIZE)+"_epoch"+str(NB_EPOCH)  
csv_name = model_name + '_log.csv'
train_data_dir = "Training"
valid_data_dir = "PublicTest"
channel_axis=3
classes = 7


def conv2d_bn(x,
              filters,
              num_row,
              num_col,
              padding='same',
              strides=(1, 1),
              name=None):

    if name is not None:
        bn_name = name + '_bn'
        conv_name = name + '_conv'
    else:
        bn_name = None
        conv_name = None
    x = Conv2D(
        filters, (num_row, num_col),
        strides=strides,
        padding=padding,
        use_bias=False,
        name=conv_name)(x)
    x = BatchNormalization(axis=channel_axis, scale=False, name=bn_name)(x)
    x = Activation('relu', name=name)(x)
    return x

inputTensor = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 1))

x = conv2d_bn(inputTensor, 32, 3, 3, strides=(2, 2))
x = conv2d_bn(x, 32, 3, 3)
x = conv2d_bn(x, 64, 3, 3)
x = MaxPooling2D((3, 3), strides=(2, 2))(x)

x = conv2d_bn(x, 80, 1, 1)
x = conv2d_bn(x, 192, 3, 3)
x = MaxPooling2D((3, 3), strides=(2, 2))(x)

# mixed 0: 35 x 35 x 256
branch1x1 = conv2d_bn(x, 64, 1, 1)

branch3x3 = conv2d_bn(x, 48, 1, 1)
branch3x3 = conv2d_bn(branch3x3, 64, 3, 3)

branch3x3dbl = conv2d_bn(x, 64, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)

branch_pool = AveragePooling2D((3, 3),
                                      strides=(1, 1),
                                      padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 32, 1, 1)
x = concatenate(
    [branch1x1, branch3x3, branch3x3dbl, branch_pool],
    axis=channel_axis,
    name='mixed0')

# mixed 1: 35 x 35 x 288
branch1x1 = conv2d_bn(x, 64, 1, 1)

branch3x3 = conv2d_bn(x, 48, 1, 1)
branch3x3 = conv2d_bn(branch3x3, 64, 3, 3)

branch3x3dbl = conv2d_bn(x, 64, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)

branch_pool = AveragePooling2D((3, 3),
                                      strides=(1, 1),
                                      padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
x = concatenate(
    [branch1x1, branch3x3, branch3x3dbl, branch_pool],
    axis=channel_axis,
    name='mixed1')

    # mixed 2: 35 x 35 x 288
branch1x1 = conv2d_bn(x, 64, 1, 1)

branch3x3 = conv2d_bn(x, 48, 1, 1)
branch3x3 = conv2d_bn(branch3x3, 64, 3, 3)

branch3x3dbl = conv2d_bn(x, 64, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)

branch_pool = AveragePooling2D((3, 3),
                                      strides=(1, 1),
                                      padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
x = concatenate(
    [branch1x1, branch3x3, branch3x3dbl, branch_pool],
    axis=channel_axis,
    name='mixed2')

    # mixed 3: 17 x 17 x 768
branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid')

branch3x3dbl = conv2d_bn(x, 64, 1, 1)
branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
branch3x3dbl = conv2d_bn(
    branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid')

branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x)
x = concatenate(
    [branch3x3, branch3x3dbl, branch_pool],
    axis=channel_axis,
    name='mixed3')

# mixed 4: 17 x 17 x 768
branch1x1 = conv2d_bn(x, 192, 1, 1)

branch7x7 = conv2d_bn(x, 128, 1, 1)
branch7x7 = conv2d_bn(branch7x7, 128, 1, 7)
branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)

branch7x7dbl = conv2d_bn(x, 128, 1, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7)
branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)

branch_pool = AveragePooling2D((3, 3),
                                      strides=(1, 1),
                                      padding='same')(x)
branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
x = concatenate(
    [branch1x1, branch7x7, branch7x7dbl, branch_pool],
    axis=channel_axis,
    name='mixed4')

x = GlobalAveragePooling2D(name='avg_pool')(x)
x = Dense(classes, activation='softmax', name='predictions')(x)

model = Model(inputTensor,x,name="incv3v1")



model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()
#%%

train_datagen = ImageDataGenerator(rescale=1./255)
valid_datagen = ImageDataGenerator(rescale=1./255)


train_generator = train_datagen.flow_from_directory(
    directory = train_data_dir,
    batch_size=BATCH_SIZE,
    target_size=(IMG_HEIGHT, IMG_WIDTH),    
    color_mode='grayscale',
    classes = ['0','1','2','3','4','5','6'],
    class_mode='categorical',
    shuffle='False'

)
validation_generator = valid_datagen.flow_from_directory(
    directory = valid_data_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    color_mode='grayscale',
    classes = ['0','1','2','3','4','5','6'],
    class_mode='categorical',    
    shuffle='False'
)

csv_logger = CSVLogger(csv_name , append=True, separator=';') 


callbacks_list=[csv_logger]

print("Starting to fit the model")



model.fit_generator(train_generator,
                    steps_per_epoch = np.ceil(train_generator.samples/BATCH_SIZE),
                    validation_data = validation_generator,
                    validation_steps = np.ceil(validation_generator.samples/BATCH_SIZE),
                    epochs = NB_EPOCH, verbose=1, callbacks=callbacks_list)
1个回答

一个可能的原因是您正在使用随机梯度下降 (SGD),这是一个不幸的小批量。训练样本和模型权重的特定组合不是很好。该训练批次中可能存在不成比例的异常值。