# use bayes_opt
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score,KFold
from bayes_opt import BayesianOptimization
import numpy as np
# 产生随机分类数据集,10个特征, 2个类别
x, y = make_classification(n_samples=1000,n_features=10,n_classes=2)
# 尝试一下用未调参的随机森林模型进行交叉验证
rf = RandomForestClassifier()
# 这里会输出5个值,取得均值
cv_score = cross_val_score(rf, x, y, scoring="f1", cv=5).mean()
cv_score
# 定义一个函数,输入一些超参数,这些超参数就是需要进行调整的参数
def rf_cv(n_estimators, min_samples_split, max_features, max_depth):
cv_score = cross_val_score(
RandomForestClassifier(n_estimators=int(n_estimators),
min_samples_split=int(min_samples_split),
max_features=float(max_features),
max_depth=int(max_depth),
random_state=2),
x, y, scoring="f1", cv=5).mean()
# 必须返回一个值,如果像误差的话(回归算法)这里是需要加上一个负号的
return cv_score
rf_bo = BayesianOptimization(
rf_cv,
{
'n_estimators': (10, 250),
'min_samples_split': (2, 25),
'max_features': (0.1, 0.999),
'max_depth': (5, 15)}
)
# 输出不同迭代参数组合下的得分
rf_bo.maximize()
# 输出最高得分下的参数组合
rf_bo.max
# 带入最佳参数进行计算
rf = RandomForestClassifier(random_state=2,max_depth=12,max_features=0.2694,min_samples_split=