我正在使用 scikit 管道进行许多数据转换并拟合模型,但我需要在转换(输入器、编码等)之后立即提取 X_train 和 X_test 以便将其用于其他分析。我怎么才能得到它?
这是我的管道:
imputer_num = SimpleImputer(strategy = 'median')
imputer_cat = SimpleImputer(strategy = 'most_frequent')
XGB = XGBClassifier()
BBC = BalancedBaggingClassifier()
BRC = BalancedRandomForestClassifier()
models = [XGB, BBC, BRC]
numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy = 'median'))
,('scaler', StandardScaler())
])
categorical_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy = 'most_frequent'))
,('encod', encoder)
])
preprocessor = ColumnTransformer(
transformers=[
('num', numeric_transformer, numericas_all)
,('cat', categorical_transformer, categoricas_all)
])
for item in models:
pipe = Pipeline(steps=[('preprocessor', preprocessor),('classifier', item)])
model = pipe.fit(X_train, y_train)
y_pred = model.predict(X_test)
test_probs = model.predict_proba(X_test)
print(model)
print(balanced_accuracy_score(y_test, y_pred))
print(roc_auc_score(y_test, y_pred))