一段时间以来,我一直在为神经网络苦苦挣扎。我得到了反向传播背后的数学。
仍然作为参考,我使用这里的公式。
网络学习 XOR:
训练后的预测:[0.0003508415406266712] 预期:[0.0]
但基本上没有在 MNIST 数据集上学到任何东西:
在 n 个训练示例后出错:
- 0 Total Net Error: 4.3739634316135225
- 10000 Total Net Error: 0.4876292680858326
- 20000 Total Net Error: 0.39989816082272944
- 30000 Total Net Error: 0.49507443066631834
- 40000 Total Net Error: 0.5483594859079792
- 50000 Total Net Error: 0.5135921029479789
- 59000 Total Net Error: 0.4686434346776871
[预测] [预期]
- [0.047784337754445516] [0]
- [0.09444684951344406] [0]
- [0.0902378720783441] [0]
- [0.09704810171673675] [0]
- [0.02940947956812051] [0]
- [0.12494839048272757] [1]
- [0.1512762065177885] [0]
- [0.055847446615593155] [0]
- [0.22983410239796548] [0]
- [0.09162426430286492] [0]
旧代码(可以忽略)
我已经尽可能地破坏了网络。没有矩阵或向量乘法。这里是不同类的代码:
Main File:
# Load Trainigs Data
rawImages, rawLabels, numImagePixels = get_data_and_labels("C:\\Users\\Robin\\Documents\\MNIST\\Images\\train-images.idx3-ubyte", "C:\\Users\\Robin\\Documents\\MNIST\\Labels\\train-labels.idx1-ubyte")
# Prepare Data
print("Start Preparing Data")
images = []
labels = []
for i in rawImages:
insert = []
for pixel in i:
insert.append(map0to1(pixel, 255))
images.append(insert)
for l in rawLabels:
y = [0] * 10
y[l] = 1
labels.append(y)
print("Finished Preparing Data")
# Create Network
mnistNet = Network((numImagePixels, 16, 16, 10))
# Train
print("Start Training")
for index in range(len(images)):
netError = mnistNet.train(images[index], labels[index])
if index % 10000 == 0:
print(index, " Total Net Error: ", netError)
prediction = mnistNet.predict(images[0])
print("After Training The Network Predicted:", prediction, "Expected Was:", labels[0])
class Network:
def __init__(self, topology):
# Make Layer List
self.layerList = []
# Make Input Layer
self.layerList.append(Layer(0, topology[0], 0))
# Make All Other Layers
for index in range(1, len(topology)):
self.layerList.append(Layer(index, topology[index], topology[index-1]))
def predict(self, x):
# Set x As Value Of Input Layer
self.layerList[0].setInput(x)
# Feed Through Network
for index in range(1, len(self.layerList)):
self.layerList[index].feedForward(self.layerList[index-1].getA())
# Return The Output Of The Last Layer
return self.layerList[-1].getA()
def train(self, x, y):
# Feed Through Network
prediction = self.predict(x)
# Container For The Calculated Layer Errors
errorsPerLayer = []
# Calculate Error Of The Output Layer
errorOutputLayer = listSubtract(prediction,y)
# Add The Error To The Container
errorsPerLayer.append(errorOutputLayer)
# Calculate The Total Error Of The Network
totalError = calcTotalError(errorOutputLayer)
# Calculate The Error Of The Hidden Layers
for layerNum in range(len(self.layerList)-2, 0, -1):
# Get The Error Of The Next Layer
errorOfNextLayer = errorsPerLayer[0]
# Forward The Calculation To The Next Layer, Which Returns The Weighted Error, By Giving It It's Error
weightedError = self.layerList[layerNum+1].calculateWeightedError(self.layerList[layerNum].getNeuronNum(), errorOfNextLayer)
# Forward The Calculation To The Current Layer, Which Returns The Error Of The Layer, By Giving It The Number Of Neurons In The Current Layer And The Weighted Error Of The Next Layer
currentLayerError = self.layerList[layerNum].calculateError(weightedError)
# Add The Just Calculated Error To The List
errorsPerLayer.insert(0, currentLayerError)
# Insert 0 As Error For The Input Layer, It's Not Important But That Way It's Size Matches The One Of The Layer List
errorsPerLayer.insert(0, 0)
# Update Weights And Biases
for layerNum in range(1, len(self.layerList)):
# Get The Output Of The Previous Layer
aOfPrevLayer = self.layerList[layerNum-1].getA()
# Forward The Error Of The Current Layer And The Output Of The Previous Layer To The Current Layer For Calculating Delta W
self.layerList[layerNum].updateWeightsAndBiases(errorsPerLayer[layerNum], aOfPrevLayer)
#print("The Network Predicted: ", prediction, " Expected Was: ", y, " The Error Of The Output Layer Is: ", errorOutputLayer)
# Return The Total Error Of The Network For Usage Outisde This Class
return totalError
def getNetworkInfo(self):
for layer in self.layerList:
print(layer.getLayerInfo())
----------
class Layer:
def __init__(self, layerNum, numNeurons, numNeuronsPrevLayer):
self.neurons = []
# Set The Number Of The Layer
self.layerNum = layerNum
# Create The Neurons
for index in range(numNeurons):
self.neurons.append(Neuron(numNeuronsPrevLayer))
# Print Info
print("Layer ", layerNum, " makes ", numNeurons, " Neurons", len(self.neurons))
def feedForward(self, aPrevLayer):
# Give It To The Neurono For Processing
for neuron in self.neurons:
neuron.feedForward(aPrevLayer)
def calculateWeightedError(self, numNeuronsCurrentLayer, errorOfNextLayer):
# The Container For The Weighted Error Of The Next Layer
weightedError = []
# The Calulation For Every Neuron Of The Current Layer One After Another
for neuronNum in range(numNeuronsCurrentLayer):
eSum = 0
# The Error Of The Neuron With The Neuron For Later Calculation
for e, n in zip(errorOfNextLayer, self.neurons):
# Forward The Calculation To The Current Neuron, By Giving It It's Error And The Connecting Neuron Num
eSum += n.weightError(e, neuronNum)
# Add The Summed And Weighted Error
weightedError.append(eSum)
# Return The Error Of The Current Layer
return weightedError
def calculateError(self, weightedError):
# The Container For The Error Of The Current Layer
errorOfCurrentLayer = []
# The Weighted Error For The Neuron With The Neruon
for wE, n in zip(weightedError, self.neurons):
# Add The Product Of The Weighted Error With The Z Of The Current Neuron Run To Sigmoid Prime
errorOfCurrentLayer.append(wE * sigmoidPrime(n.getZ()))
# Return The Error Of The Current Layer
return errorOfCurrentLayer
def updateWeightsAndBiases(self, errorOfCurrentLayer, aOfPrevLayer):
# The Error For The Neuron With The Neuron
for e, n in zip(errorOfCurrentLayer, self.neurons):
# Error Of Current Layer Is Equal To The Delta Of The Bias So Apply That
n.updateBias(e)
# Forward The Error And All The Activity Of The Previous Layer To The Current Neuron To Update It's Weights
n.updateWeights(e, aOfPrevLayer)
def setInput(self, x):
# Set It To Every Neuron
for neuron, val in zip(self.neurons, x):
neuron.setInput(val)
def getA(self):
aOfLayer = []
for neuron in self.neurons:
aOfLayer.append(neuron.getA())
return aOfLayer
def getNeuronNum(self):
return len(self.neurons)
def getLayerInfo(self):
return "Layer( %i ), has %i Neurons" % (self.layerNum, len(self.neurons))
----------
class Neuron:
def __init__(self, numNeuronsPrevLayer):
self.a = 0
self.z = 0
self.b = 0.5
if numNeuronsPrevLayer != 0:
self.w = np.random.uniform(low = 0, high = 0.5, size=(numNeuronsPrevLayer,))
def feedForward(self, aPrevLayer):
# Reset Z
self.z = 0
# Calculate Z
for w, a in zip(self.w, aPrevLayer):
self.z += w*a
# Add Bias
self.z += self.b
# Calculate A
self.a = sigmoid(self.z)
def weightError(self, e, neuronNum):
# Weight Error With The Connecting Weight
return e * self.w[neuronNum]
def updateWeights(self, e, aOfPrevLayer):
# The Weight With The Matching Activity Of The Previous Layer
for index in range(len(self.w)):
# The Delta Of The Weight Is The Error Of That Neuron Mutliplied With The Through The Weight Connected Activiy Of The Previous Layer
self.w[index] -= e * aOfPrevLayer[index]
def updateBias(self, e):
# E Is The Delta Of The Bias1
self.b -= e
def setInput(self, x):
self.z = x
self.a = x
def getA(self):
return self.a
def getZ(self):
return self.z
def getB(self):
return self.b
def getW(self):
return self.w
----------
**helper functions**
def map0to1(val, valMax):
return val/valMax
def calcTotalError(errorOutputLayer):
totalError = 0
for e in errorOutputLayer:
totalError += e**2
totalError *= 0.5
return totalError
def listSubtract(list1, list2):
subbed = []
for l1, l2 in zip(list1, list2):
subbed.append(l1-l2)
return subbed
新守则
主文件
import random
from network import *
from mnistreader import *
def map0to1(val, valMax):
return val/valMax
# THIS WORKS
'''
trainX = [ [0.0,0.0], [1.0,0.0], [0.0,1.0], [1.0,1.0] ]
trainY = [ [ 0.0], [ 1.0 ], [ 1.0 ], [ 0.0 ] ]
# Create Network
xorNet = Network((2,2,1))
# Train
for index in range(100000):
randIndex = random.randint(0, 3)
xorNet.train(trainX[randIndex], trainY[randIndex])
print("Prediction After Training:", xorNet.predict(trainX[0]), "Expected:", trainY[0])
print("Prediction After Training:", xorNet.predict(trainX[1]), "Expected:", trainY[1])
print("Prediction After Training:", xorNet.predict(trainX[2]), "Expected:", trainY[2])
print("Prediction After Training:", xorNet.predict(trainX[3]), "Expected:", trainY[3])
'''
mnistNet = Network((784, 30, 10))
# Load Trainigs Data
rawImages, rawLabels, numImagePixels = get_data_and_labels("C:\\Users\\Robin\\Documents\\MNIST\\Images\\train-images.idx3-ubyte", "C:\\Users\\Robin\\Documents\\MNIST\\Labels\\train-labels.idx1-ubyte")
# Prepare Data
print("Start Preparing Data")
images = []
labels = []
for i in rawImages:
insert = []
for pixel in i:
insert.append(map0to1(pixel, 255))
images.append(insert)
for l in rawLabels:
y = [0] * 10
y[l] = 1
labels.append(y)
print("Finished Preparing Data")
# Define Variables
learningRate = 0.0001
error = 10
# Training
while error > 0.1:
for tNum in range(len(images)):
error = mnistNet.train(images[tNum], labels[tNum], learningRate)
print("Error:", error, "\n Prediction:\n", mnistNet.predict(images[1]), "\nExpected:", rawLabels[1], "\n\n")
# Test Prediction
print("For", rawLabels[1], "Predicted\n", mnistNet.predict(images[1]))
网络类
import numpy as np
from layer import *
from transferfunction import *
class Network:
def __init__(self, shape):
# Save The Shape Of The Nework
self.shape = shape
# Create A List Of Layers
self.layers = []
# Create Input Layer
self.layers.append(Layer((shape[0],), layerType = 'Input'))
# Create Hidden Layers
for numNeurons, numNeuronsPrevLayer in zip(shape[1:], shape[:-2]):
self.layers.append(Layer((numNeurons, numNeuronsPrevLayer), layerType = 'Hidden'))
# Create Output Layer
self.layers.append(Layer((shape[-1], shape[-2]), layerType = 'Output'))
def predict(self, x):
# X Is A Row So Shape It To Be A Column
x = np.array(x).reshape(-1, 1)
# Set X To Be The Ouput Of The Input Layer
self.layers[0].setOutput(x)
# Feed Through Other Layers
for layerNum in range(1,len(self.layers)):
self.layers[layerNum].feedForward(self.layers[layerNum-1].getOutput())
# Return The Output Of The Output Layer
return self.layers[-1].getOutput()
def train(self, x, y, learningRate):
'''
1. Feed Forward
2. Calculate Error
3. Calulate Deltas
4. Apply Deltas
Error Output Layer = f'(z) * (prediction - target)
Error Hidden Layer = f'(z) * ( transposed weights next layer DOT error next layer )
Delta Bias = learning rate * error
Delta Weights = learning rate * ( error DOT transposed activity previous layer )
'''
# Feed Through Network
prediction = self.predict(x)
# Y Is A Row So Shape It To Be A Column
y = np.array(y).reshape(-1, 1)
# Calculate Error
error = prediction - y
# Calculate Total Error
totalError = 0.5 * np.sum(error**2)
# Create Container For The Deltas
deltas = []
# Calculate Delta For Output Layer
deltas.append( np.multiply(sigmoidPrime(self.layers[-1].getZ()), error) )
# Calculate Deltas For Every Hidden Layer
for layerNum in range(len(self.layers)-2, 0, -1):
# Compute The Weighted Error Of The Next Layer
weightedErrorOfNextLayer = np.dot(self.layers[layerNum+1].getW().T, deltas[0])
# Compute The Delta And Add It In Front
deltas.insert(0, np.multiply(sigmoidPrime(self.layers[layerNum].getZ()), weightedErrorOfNextLayer) )
# Insert Placeholder To Make Delta List As Big As The Layer List
deltas.insert(0, 0)
# For Numerical Grandient Checking Do
numericalGradients = self.performNumericalGradientChecking(x,y)
# Update Weights And Biases
for layerNum in range(1, len(self.layers)):
self.layers[layerNum].updateBias(deltas[layerNum], learningRate)
self.layers[layerNum].updateWeight(deltas[layerNum], self.layers[layerNum-1].getOutput(), learningRate, numericalGradients[layerNum-1])
# Show Information
#print('Network Predicted: \n', prediction, '\nTarget:\n', y, '\nError: ', totalError)
return totalError
def performNumericalGradientChecking(self, x, y):
# Container Saving All Current Weight Values
weightSave = []
# Save All The Weight Values
for layerNum in range(1, len(self.layers)):
weightSave.append(self.layers[layerNum].getW())
# Define Epsilon To Be A Small Number
epsilon = 1e-4
# Gradient Container
numericalGradients = []
#Perform The Check For Every Layer Therfore Every Set Of Weights
for layerNum in range(1, len(self.layers)):
# Feed Forward With Changed Weights(+epsilon), And Compute Cost
self.layers[layerNum].setW(weightSave[layerNum-1] + epsilon)
prediction = self.predict(x)
loss2 = 0.5 * np.sum ((prediction - y)**2)
# Feed Forward With Changed Weights(-epsilon), And Compute Cost
self.layers[layerNum].setW(weightSave[layerNum-1] - epsilon)
prediction = self.predict(x)
loss1 = 0.5 * np.sum ((prediction - y)**2)
# Reset Weight
self.layers[layerNum].setW(weightSave[layerNum-1])
# Calculate Numerical Loss
numericalGradient = (loss2 - loss1) / (2 * epsilon)
# Add The Numerical Grandient
numericalGradients.append(numericalGradient)
return numericalGradients
def __str__(self):
strBuff = ''
for layer in self.layers:
strBuff += layer.getInfo()
return strBuff
图层类
import numpy as np
from transferfunction import *
class Layer:
def __init__(self, neurons, layerType = 'Hidden'):
''' Neurons Is A Tuple Consisting Of [0]=NumNeurons And [1]=numNeuronsPrevLayer '''
# Remember The Type Of The Layer
self.layerType = layerType
# Remember How Many Neurons This Layer Has
self.neuronCount = neurons[0]
# Create Layer Based On Type
if layerType.lower() == 'input':
# Create Container For Input Data
self.a = []
elif layerType.lower() == 'hidden' or layerType.lower() == 'output':
# Create Container For Activation
self.z = []
# Create Container For Neurons Input
self.a = []
# Create Weights
self.w = np.random.uniform(low = 0.0, high = 0.4, size=(neurons[0], neurons[1]))
# Create Prev Delta Weight For Momentum
self.momentum = 0.3
self.prevDelta = np.full((neurons[0], neurons[1]), 0)
# Create Bias
self.b = np.full((neurons[0],1), 0, dtype=float)
else:
print('Wrong Type Of Layer Specified')
def feedForward(self, aPrevLayer):
self.z = np.dot(self.w, aPrevLayer) + self.b
self.a = sigmoid(self.z)
def updateBias(self, e, learningRate):
self.b -= learningRate * e
def updateWeight(self, e, aPrevLayer, learningRate, numericalGradient):
# Calulate The Delta Of The Weights
deltaW = np.dot(e, aPrevLayer.T)
# Compare DeltaW With The Numerical Grandient
check = np.linalg.norm(deltaW - numericalGradient) / np.linalg.norm(deltaW + numericalGradient)
# DEBUG
print(check)
# The Weight Change Is The Delta With The Addition Of The Momentum
self.w -= ( learningRate * deltaW ) + ( self.momentum * self.prevDelta)
# Save The Current DeltaW
self.prevDelta = deltaW
def getW(self):
return self.w
def getZ(self):
return self.z
def getOutput(self):
return self.a
def setOutput(self, x):
self.a = x
def setW(self, w):
self.w = w
def getInfo(self):
if self.layerType.lower() == 'input':
return 'Input Layer With ' + str(self.neuronCount) + ' Neurons\n'
else:
return self.layerType + ' Layer With ' + str(self.neuronCount) + ' Neurons And Weights Of Shape: ' + str(self.w.shape) + ' With Biases Of Shape: ' + str(self.b.shape) + '\n'
所以我的问题很简单:“怎么了?”
问题:
- 错误停留在 0.45
- 当使用具有 800 个神经元的隐藏层时,我得到警告除以零,并且全部从 sigmoid prime 变成 NaN
- 隐藏层的数值梯度检查为:1.0,输出层:0.995784895209。我知道这应该是一个很小的数字。但是在第二个培训示例中,它会产生溢出错误并变成 NaN
主要编辑 到目前为止,我非常感谢所有建议,我已经使用矢量化形式更新了问题,因此更容易了解我在这里所做的事情。我现在也尝试了梯度检查,不确定我是否正确实施(使用 Welch Labs 的教程(https://youtu.be/pHMzNW8Agq4))我希望代码是可读的
