# from the titanic dataset
X = df.drop(columns="survived")
y = df.survived
scoring = ['accuracy','precision','roc_auc','f1',]
from sklearn.model_selection import cross_validate
from sklearn.linear_model import (LogisticRegression)
def model_LR(): #logstic Regression
index = ["kfold-1","kfold-2","kfold-3","kfold-4","kfold-5"]
s = model_selection.cross_validate(LogisticRegression(), X, y, scoring = scoring, cv = 5 )
s = pd.DataFrame(data = s, index = index)
display (s)
print ("The mean scores for the above:\n", s.mean())
model_LR()
# OUTPUT :
fit_time score_time test_accuracy test_precision test_roc_auc test_f1
kfold-1 0.003998 0.006969 0.774809 0.711340 0.823673 0.700508
kfold-2 0.003990 0.005005 0.820611 0.778947 0.856481 0.758974
kfold-3 0.003003 0.003989 0.774809 0.715789 0.796667 0.697436
kfold-4 0.002992 0.003992 0.767176 0.709677 0.841852 0.683938
kfold-5 0.001994 0.003989 0.819923 0.819277 0.877081 0.743169
The mean scores for the above:
fit_time 0.003195
score_time 0.004789
test_accuracy 0.791466
test_precision 0.747006
test_roc_auc 0.839151
test_f1 0.716805
dtype: float64
我的问题:在上面的代码中,通过使用 LogisticsRegression 调用 cross_validate,我得到分数(例如 roc_auc),就好像模型已经过拟合和训练一样。
我没有使用任何适合或训练功能。这是否意味着 cross_validate 会自动执行此操作?谢谢。