lightgbm实战-二分类问题（贝叶斯优化下调参方法）

本文链接：https://blog.csdn.net/rong_king_/article/details/108405630

# use bayes_opt
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score,KFold
from bayes_opt import BayesianOptimization
import numpy as np
# 产生随机分类数据集，10个特征， 2个类别
x, y = make_classification(n_samples=1000,n_features=10,n_classes=2)


# 尝试一下用未调参的随机森林模型进行交叉验证
rf = RandomForestClassifier()
# 这里会输出5个值，取得均值
cv_score = cross_val_score(rf, x, y, scoring="f1", cv=5).mean()
cv_score

# 定义一个函数，输入一些超参数，这些超参数就是需要进行调整的参数
def rf_cv(n_estimators, min_samples_split, max_features, max_depth):
    cv_score = cross_val_score(
        RandomForestClassifier(n_estimators=int(n_estimators),
                            min_samples_split=int(min_samples_split),
                            max_features=float(max_features), 
                            max_depth=int(max_depth),
                              random_state=2),
    x, y, scoring="f1", cv=5).mean()
    # 必须返回一个值，如果像误差的话（回归算法）这里是需要加上一个负号的
    return cv_score
rf_bo = BayesianOptimization(
        rf_cv,
        {
   
   'n_estimators': (10, 250),
        'min_samples_split': (2, 25),
        'max_features': (0.1, 0.999),
        'max_depth': (5, 15)}
    )
# 输出不同迭代参数组合下的得分
rf_bo.maximize()
# 输出最高得分下的参数组合
rf_bo.max

在这里插入图片描述

# 带入最佳参数进行计算
rf = RandomForestClassifier(random_state=2,max_depth=12,max_features=0.2694,min_samples_split=