数据挖掘 - 使用 Iris Flower 数据集，为什么我的分类器将输入的任何数据分类为“Iris - Virginica”？ - 吾爱随笔录

我是一名高中生，对制作神经网络非常陌生。我一直在使用 Iris Flower 数据集 ( https://www.kaggle.com/arshid/iris-flower-dataset ) 来构建我的神经网络。我的模型在训练和测试数据上的准确率都超过了 90%，但是当我使用模型中的权重和偏差项制作分类器时，分类器总是将数据分类为“Iris - Virginica”。我不确定问题是什么，任何帮助将不胜感激。应该注意的是，我想自己使用前馈、反向传播、梯度下降等来制作神经网络。我不想使用来自知名库的现有分类器（例如，来自 SKLearn 的 KNeighbors）。我的代码如下。

#Full Code

#importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("Iris.csv") #loading the data into Python
df.head()#checking the contents to manipulate

#Preprocessing the data (making the features and the targets)

X = np.asmatrix(np.copy(df))[:,:5] #getting all the columns of the feature data
X = np.delete(X, 0, axis = 1) #dropping column of index 0 because it is "id".

nameOfTargets = df.Species.unique() #Getting the unique values of the target column for one hot encoding

Y_data = [] #Empty list that will eventually become target data
for i in df.iloc[:,5]:
    for j in range(nameOfTargets.shape[0]): #for j from 0 to N, where N is the number of items in nameOfTargets
        if i == nameOfTargets[j]:       
            Y_data.append(j)
    #The index number of the item in nameOfTargets is how they will be represented 
        #in the target data. I.E if the value of the target is equal to the FIRST 
        #item of nameOfTargets, the value is represented by the item's INDEX (0).

N = len(Y_data) #Getting the number of items in the dataset
Y = np.zeros(N*nameOfTargets.shape[0]).reshape(N,nameOfTargets.shape[0]) 
#Making the target matrix. The number of rows = number of subjects, number of columns = number of unique targets


for i in range(N): #One Hot Encoding. After the loop finishes, Y will be the final target matrix.
    t = Y_data[i]
    Y[i,t] = 1


#Standardizing values in the feature matrix X
for i in range(X.shape[1]):
    X[:,i] = (X[:,i].astype(float) - np.mean(X[:,i].astype(float)))/np.std(X[:,i].astype(float))


X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size = 0.2, random_state = 42) #Splitting the data into testing data and training data


#Deep Learning

np.random.seed(1) #making sure the weights are the same every time the cell is rerun (still random)
N,D = X_train.shape #N = num subjects, D = num features
M = 100 #num hidden nodes of the first hidden layer
K = nameOfTargets.shape[0] #number of outputs

iteration_num = 1000 #Number of times gradient descent will be performed
a = 0.02 #learning rate

#creating the weights
W = np.random.randn(D*M).reshape(D,M)
V = np.random.randn(M*K).reshape(M,K)


#creating the biased terms
b = np.random.randn(M).reshape(1,M)
b_ones = np.ones(N).reshape(N,1)
b = np.dot(b_ones,b)

c = np.random.randn(K).reshape(1,K)
c_ones = np.ones(N).reshape(N,1)
c = np.dot(c_ones, c)

for j in range(iteration_num): #Back Propagation

    #feed forward
    z = np.dot(X_train,W) + b
    z = 1/(1 + np.exp(-z.astype(float)))
    predictions = np.exp(np.dot(z,V) + c)

    #softmax
    for i in range(predictions.shape[0]):
        predictions[i,:] = predictions[i,:]/np.sum(predictions[i,:])


    #gradient descent
    dV = np.dot(z.T,(y_train - predictions)) 
    dZ = np.dot(np.dot(np.dot((y_train - predictions), V.T).T, z),(1-z.T)) #m x n matrix
    dW = np.dot(X_train.T,dZ.T) 
    db = np.dot(np.dot(np.dot((y_train - predictions), V.T).T, z), (1-z.T)).T.sum(axis = 0)
    dc = (y_train - predictions).sum(axis = 0)


    W += a*dW.astype(float) 
    V += a*dV.astype(float) 
    b += a*db.astype(float)
    c += a*dc.astype(float)

    if j%100 == 0: #Every 100 iterations, print out the cost and accuracy
        total = -np.dot(y_train.T, np.log(predictions))
        cost = total.sum()
        Accuracy = np.mean(np.round(predictions) == y_train)
        print(cost, Accuracy)


print(" ")
print("Final Cost and Accuracy of training data: ")
print(cost, Accuracy)

#Applying the model to the test data. The X_test data must be put through the softmax function and compared to y_test

#feed forward
z = np.dot(X_test,W) + b[0]
z = 1/(1 + np.exp(-z.astype(float)))
test_predictions = np.exp(np.dot(z,V) + c[0])

#softmax
for i in range(test_predictions.shape[0]):
    test_predictions[i,:] = test_predictions[i,:]/np.sum(test_predictions[i,:])

test_Acc = np.mean(np.round(test_predictions) == y_test)
test_total = -np.dot(y_test.T, np.log(test_predictions))
test_cost = total.sum()

print(" ")
print("Cost and Accuracy of testing data: ")
print(test_cost, test_Acc)

def classify(SLen, SWid, PLen, PWid):
    X = np.array([SLen, SWid, PLen, PWid]).reshape(1,-1) #Converting to 2D matrix for calculations
    z = np.dot(X,W) + b[0]
    z = 1/(1 + np.exp(-z.astype(float)))
    test_predictions = np.exp(np.dot(z,V) + c[0])

    #softmax
    for i in range(test_predictions.shape[0]):
        test_predictions[i,:] = test_predictions[i,:]/np.sum(test_predictions[i,:])

    test_predictions = np.round(test_predictions)[0] #reshaping back to 1D matrix
    j = np.where(test_predictions == 1)[0][0]

    return nameOfTargets[j]

print(classify(4.7, 3.2, 1.3, 0.2)