我正在运行 xgboost 并修复了种子编号
,但每次使用相同数据集重新运行 xgboost 时结果都不同
xgb = XGBClassifier(
bagging_fraction= 0.8,
boosting= 'gbdt',
colsample_bytree= 0.7,
feature_fraction= 0.9,
learning_rate= 0.05,
max_bin= 32,
max_depth= 10,
min_child_weight= 11,
missing= -999,
n_estimators= 400,
nthread= 4,
num_leaves= 100,
predictor= 'gpu_predictor',
seed= 1000,
silent= 1,
subsample= 0.8,
tree_method= 'gpu_hist',
verbose= True
)
这是我的参数和
# Our data is already scaled we should split our training and test sets
from sklearn.model_selection import train_test_split
# This is explicitly used for undersampling.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
kf = StratifiedKFold(n_splits=5, shuffle=True)
# partially based on https://www.kaggle.com/c0conuts/xgb-k-folds-fastai-pca
predicts = []
for train_index, test_index in kf.split(X_train, y_train):
print("###")
X_train, X_val = X.iloc[train_index], X.iloc[test_index]
y_train, y_val = y.iloc[train_index], y.iloc[test_index]
xgb.fit(X_train, y_train, eval_set=[(X_val, y_val)],
early_stopping_rounds=30)
predicts.append(xgb.predict(X_test))