我正在尝试使用 TensorFlow 进行语义图像分割。只是为了让某些东西发挥作用,我正在拍摄这张训练图像,在该图像上训练网络一段时间,然后在同一张图像上“测试”网络,即我在该图像上严重过度拟合。在测试期间,我希望网络产生或多或少与训练它的基本事实相同的分割。但是,我得到了完全错误的预测:

我使用 Synthia 数据集。分割注释以 RGB 图像的形式出现,其中每个像素的颜色表示其类别:

由于没有 TensorFlow 函数可以自动获取 RGB 地面实况并从那里推断其类分布(为什么不呢?),我只是使用浮点颜色值作为地面实况类分布。
为什么我的代码会输出这样的任意预测图,而不是或多或少类似于相应的地面实况注释图像的东西?
我的代码如下:
from __future__ import absolute_import, division, print_function
import tensorflow as tf
import numpy as np
import os
tf.enable_eager_execution()
NUM_TRAINING_SAMPLES = 1
NUM_CLASSES = 3
BATCH_SIZE = 5
NUM_EPOCHS = 3
INPUT_SIZE = (256, 256, 3)
random_indices = np.random.choice(range(13000), NUM_TRAINING_SAMPLES)
directory_images = "C:/SYNTHIA/RGB"
directory_labels = "C:/SYNTHIA/GT"
train_images = np.array(os.listdir(directory_images))
train_labels = np.array(os.listdir(directory_labels))
train_images = train_images[random_indices]
train_labels = train_labels[random_indices]
train_images = [tf.read_file(os.path.join(directory_images, img)) for img in train_images]
train_labels = [tf.read_file(os.path.join(directory_labels, img)) for img in train_labels]
train_images = [tf.io.decode_image(img, channels=3) for img in train_images]
train_labels = [tf.io.decode_image(img, channels=3) for img in train_labels]
train_images = tf.image.resize_images(train_images, INPUT_SIZE[:2])
train_labels = tf.image.resize_images(train_labels, INPUT_SIZE[:2])
train_images = tf.image.convert_image_dtype(train_images, dtype=tf.uint8)
train_labels = train_labels / 256
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
train_dataset = train_dataset.batch(1)
train_dataset = train_dataset.repeat()
def convolve(input_layer, num_filters):
layer = tf.keras.layers.Conv2D(num_filters, (3, 3), padding='same')(input_layer)
layer = tf.keras.layers.BatchNormalization()(layer)
layer = tf.keras.layers.Activation('relu')(layer)
layer = tf.keras.layers.Conv2D(num_filters, (3, 3), padding='same')(layer)
layer = tf.keras.layers.BatchNormalization()(layer)
layer = tf.keras.layers.Activation('relu')(layer)
return layer
def downsample(input_layer, num_filters):
layer = convolve(input_layer, num_filters)
layer = tf.keras.layers.MaxPooling2D((2, 2), strides=(2, 2))(layer)
return layer
def upsample(input_layer, num_filters):
layer = tf.keras.layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_layer)
layer = tf.keras.layers.BatchNormalization()(layer)
layer = tf.keras.layers.Activation('relu')(layer)
layer = tf.keras.layers.Conv2D(num_filters, (3, 3), padding='same')(layer)
layer = tf.keras.layers.BatchNormalization()(layer)
layer = tf.keras.layers.Activation('relu')(layer)
layer = tf.keras.layers.Conv2D(num_filters, (3, 3), padding='same')(layer)
layer = tf.keras.layers.BatchNormalization()(layer)
layer = tf.keras.layers.Activation('relu')(layer)
return layer
inputs = tf.keras.layers.Input(shape=INPUT_SIZE) # 256
encoder0 = downsample(inputs, 32) # 128
encoder1 = downsample(encoder0, 64) # 64
encoder2 = downsample(encoder1, 128) # 32
encoder3 = downsample(encoder2, 256) # 16
encoder4 = downsample(encoder3, 512) # 8
center = convolve(encoder4, 1024) # center
decoder4 = upsample(center, 512) # 16
decoder3 = upsample(decoder4, 256) # 32
decoder2 = upsample(decoder3, 128) # 64
decoder1 = upsample(decoder2, 64) # 128
decoder0 = upsample(decoder1, 32) # 256
outputs = tf.keras.layers.Conv2D(NUM_CLASSES, (1, 1), activation='sigmoid')(decoder0)
model = tf.keras.Model(inputs=[inputs], outputs=[outputs]) # model = tf.keras.Model(inputs=[images], outputs=[output])
model.compile(optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
#model.summary()
model.fit(train_dataset, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, steps_per_epoch=6, callbacks=[tf.keras.callbacks.TensorBoard(log_dir='./logs')])
#random_indices = np.random.choice(range(13000), NUM_TRAINING_SAMPLES)
test_images = np.array(os.listdir(directory_images))
test_images = test_images[random_indices]
test_images = [tf.read_file(os.path.join(directory_images, img)) for img in test_images]
test_images = [tf.io.decode_image(img, channels=3) for img in test_images]
test_images = tf.image.resize_images(test_images, INPUT_SIZE[:2])
test_images = tf.image.convert_image_dtype(test_images, dtype=tf.uint8)
test_dataset = tf.data.Dataset.from_tensors(test_images)
predictions = model.predict(test_dataset, batch_size=5, steps=1)
predictions = predictions * 256
predictions = tf.image.convert_image_dtype(predictions, dtype=tf.uint8)
jpg = tf.io.encode_jpeg(predictions[0])
tf.io.write_file("C:/SYNTHIA/prediction_map.jpg", jpg)
```