我是一名高中生,对制作神经网络非常陌生。我一直在使用 Iris Flower 数据集 ( https://www.kaggle.com/arshid/iris-flower-dataset ) 来构建我的神经网络。我的模型在训练和测试数据上的准确率都超过了 90%,但是当我使用模型中的权重和偏差项制作分类器时,分类器总是将数据分类为“Iris - Virginica”。我不确定问题是什么,任何帮助将不胜感激。应该注意的是,我想自己使用前馈、反向传播、梯度下降等来制作神经网络。我不想使用来自知名库的现有分类器(例如,来自 SKLearn 的 KNeighbors)。我的代码如下。
#Full Code
#importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.read_csv("Iris.csv") #loading the data into Python
df.head()#checking the contents to manipulate
#Preprocessing the data (making the features and the targets)
X = np.asmatrix(np.copy(df))[:,:5] #getting all the columns of the feature data
X = np.delete(X, 0, axis = 1) #dropping column of index 0 because it is "id".
nameOfTargets = df.Species.unique() #Getting the unique values of the target column for one hot encoding
Y_data = [] #Empty list that will eventually become target data
for i in df.iloc[:,5]:
for j in range(nameOfTargets.shape[0]): #for j from 0 to N, where N is the number of items in nameOfTargets
if i == nameOfTargets[j]:
Y_data.append(j)
#The index number of the item in nameOfTargets is how they will be represented
#in the target data. I.E if the value of the target is equal to the FIRST
#item of nameOfTargets, the value is represented by the item's INDEX (0).
N = len(Y_data) #Getting the number of items in the dataset
Y = np.zeros(N*nameOfTargets.shape[0]).reshape(N,nameOfTargets.shape[0])
#Making the target matrix. The number of rows = number of subjects, number of columns = number of unique targets
for i in range(N): #One Hot Encoding. After the loop finishes, Y will be the final target matrix.
t = Y_data[i]
Y[i,t] = 1
#Standardizing values in the feature matrix X
for i in range(X.shape[1]):
X[:,i] = (X[:,i].astype(float) - np.mean(X[:,i].astype(float)))/np.std(X[:,i].astype(float))
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size = 0.2, random_state = 42) #Splitting the data into testing data and training data
#Deep Learning
np.random.seed(1) #making sure the weights are the same every time the cell is rerun (still random)
N,D = X_train.shape #N = num subjects, D = num features
M = 100 #num hidden nodes of the first hidden layer
K = nameOfTargets.shape[0] #number of outputs
iteration_num = 1000 #Number of times gradient descent will be performed
a = 0.02 #learning rate
#creating the weights
W = np.random.randn(D*M).reshape(D,M)
V = np.random.randn(M*K).reshape(M,K)
#creating the biased terms
b = np.random.randn(M).reshape(1,M)
b_ones = np.ones(N).reshape(N,1)
b = np.dot(b_ones,b)
c = np.random.randn(K).reshape(1,K)
c_ones = np.ones(N).reshape(N,1)
c = np.dot(c_ones, c)
for j in range(iteration_num): #Back Propagation
#feed forward
z = np.dot(X_train,W) + b
z = 1/(1 + np.exp(-z.astype(float)))
predictions = np.exp(np.dot(z,V) + c)
#softmax
for i in range(predictions.shape[0]):
predictions[i,:] = predictions[i,:]/np.sum(predictions[i,:])
#gradient descent
dV = np.dot(z.T,(y_train - predictions))
dZ = np.dot(np.dot(np.dot((y_train - predictions), V.T).T, z),(1-z.T)) #m x n matrix
dW = np.dot(X_train.T,dZ.T)
db = np.dot(np.dot(np.dot((y_train - predictions), V.T).T, z), (1-z.T)).T.sum(axis = 0)
dc = (y_train - predictions).sum(axis = 0)
W += a*dW.astype(float)
V += a*dV.astype(float)
b += a*db.astype(float)
c += a*dc.astype(float)
if j%100 == 0: #Every 100 iterations, print out the cost and accuracy
total = -np.dot(y_train.T, np.log(predictions))
cost = total.sum()
Accuracy = np.mean(np.round(predictions) == y_train)
print(cost, Accuracy)
print(" ")
print("Final Cost and Accuracy of training data: ")
print(cost, Accuracy)
#Applying the model to the test data. The X_test data must be put through the softmax function and compared to y_test
#feed forward
z = np.dot(X_test,W) + b[0]
z = 1/(1 + np.exp(-z.astype(float)))
test_predictions = np.exp(np.dot(z,V) + c[0])
#softmax
for i in range(test_predictions.shape[0]):
test_predictions[i,:] = test_predictions[i,:]/np.sum(test_predictions[i,:])
test_Acc = np.mean(np.round(test_predictions) == y_test)
test_total = -np.dot(y_test.T, np.log(test_predictions))
test_cost = total.sum()
print(" ")
print("Cost and Accuracy of testing data: ")
print(test_cost, test_Acc)
def classify(SLen, SWid, PLen, PWid):
X = np.array([SLen, SWid, PLen, PWid]).reshape(1,-1) #Converting to 2D matrix for calculations
z = np.dot(X,W) + b[0]
z = 1/(1 + np.exp(-z.astype(float)))
test_predictions = np.exp(np.dot(z,V) + c[0])
#softmax
for i in range(test_predictions.shape[0]):
test_predictions[i,:] = test_predictions[i,:]/np.sum(test_predictions[i,:])
test_predictions = np.round(test_predictions)[0] #reshaping back to 1D matrix
j = np.where(test_predictions == 1)[0][0]
return nameOfTargets[j]
print(classify(4.7, 3.2, 1.3, 0.2)