我正在研究以下代码,它cross_val_score_与.mean()和一起使用.std()。我阅读了许多有关含义的文档,但没有得到上述每一个的作用。
import pandas as pd
import numpy as np
from sklearn import tree
import graphviz
from sklearn.model_selection import cross_val_score
#importing the dataset
d = pd.read_csv('student-por.csv', sep= ';')
d['pass'] = d.apply(lambda row: 1 if (row['G1']+ row['G2']+ row ['G3']) >= 35 else 0 , axis=1)
d = d.drop(['G1', 'G2','G3'], axis=1 )
#Doing one-hot encoding
d=pd.get_dummies(d, columns =['sex','activities','school', 'address', 'famsize','Pstatus','Mjob','Fjob','reason','guardian','schoolsup','famsup','paid','nursery','higher','internet','romantic'])
#shuffle rows
d = d.sample(frac=1)
#split traning and test
d_train = d[:500]
d_test = d[500:]
d_train_att = d_train.drop(['pass'], axis=1)
d_train_pass= d_train['pass']
d_test_att = d_test.drop(['pass'], axis=1)
d_test_pass= d_test['pass']
d_att = d.drop(['pass'], axis=1)
d_pass = d['pass']
t = tree.DecisionTreeClassifier(criterion ='entropy', max_depth = 5)
t= t.fit (d_train_att, d_train_pass)
#to export the tree
dot_data = tree.export_graphviz(t,out_file = 'students-tree.png', label ='all', impurity=False, proportion= True, feature_names=list(d_train_att), class_names=['fail', 'pass'], filled = True, rounded=True)
t.score (d_test_att, d_test_pass)
scores = cross_val_score(t, d_att,d_pass, cv=5)
print ('Acuracy %0.2f (+/- %0.2f)' % (scores.mean(), scores.std() *2))
简而言之,这就是我需要知道的:
scores = cross_val_score(t, d_att,d_pass, cv=5)
print ('Acuracy %0.2f (+/- %0.2f)' % (scores.mean(), scores.std() *2))
还有一件事,我想得到与原始代码发布者相同的分数吗?因为我没有。