使用sklearn实现多元线性回归
多元线性回归
import pandas as pdimport numpy as npimport matplotlib.pyplot as pltfrom sklearn.linear_model import LinearRegressionfrom sklearn.model_selection import train_test_split%config InlineBackend.figure_format = 'svg'import seaborn as snsdata = pd.read_csv('/Users/jingjingdehnaa/Documents/sales.csv')data.corr() TV radio newspaper salesTV 1.000000 0.054809 0.056648 0.782224radio 0.054809 1.000000 0.354104 0.576223newspaper 0.056648 0.354104 1.000000 0.228299sales 0.782224 0.576223 0.228299 1.000000sns.pairplot(data,x_vars=['TV','radio','newspaper'], y_vars='sales', size=4, aspect=0.8,kind = 'reg')plt.show()
#建立模型的第一步我们将建立训练集和检测集
X_train,X_test,Y_train,Y_test = train_test_split(data.iloc[:,:3],data.sales,train_size = 0.8)
#将训练集中的特征值与标签值放入LinearRegression()模型中且使用fit函数进行训练
#得到所对应的方程式(线性回归方程式)需要利用函数中的intercept_与coef_。
model = LinearRegression()model.fit(X_train,Y_train)a = model.intercept_b = model.coef_print("最佳拟合线:截距",a,",回归系数:",b)print('拟合函数: y = %f * x+%f * x+%f * x+ %f'%(b[0],b[1],b[2],a))最佳拟合线:截距 2.8359172600679603 ,回归系数:[ 0.04636284 0.18702274 -0.00105945]拟合函数: y = 0.046363 * x+0.187023 * x+-0.001059 * x+ 2.835917
#对数据集进行预测与模型测评
#使用predict与score函数来获取所需要的预测值与得分
plt.rcParams['font.sans-serif'] = 'SimHei'score = model.score(X_test,Y_test)print(score)Y_test_pred = model.predict(X_test)plt.plot(range(len(Y_test_pred)),Y_test,label = 'test',color = 'b')plt.plot(range(len(Y_test_pred)),Y_test_pred,color = 'r',label = 'predict')plt.xlabel('投入')plt.ylabel('销量')plt.legend()plt.show()0.9261973226814358
使用statsmodels实现多元线性回归
import statsmodels.api as smimport numpy as npdata = pd.read_csv('/Users/jingjingdehnaa/Documents/sales.csv')x =data.iloc[:,:3]X = sm.add_constant(x)y = data.salesmodels = sm.OLS(y,X).fit()print(models.summary())print('拟合函数: y = %f*x+%f*x+%f*x+ %f'%(models.params[1],models.params[2],models.params[3],models.params[0])) OLS Regression Results ==============================================================================Dep. Variable: sales R-squared: 0.897Model: OLS Adj. R-squared: 0.896Method: Least Squares F-statistic: 570.3Date: Sun, 25 Oct 2020 Prob (F-statistic): 1.58e-96Time: 14:57:23 Log-Likelihood: -386.18No. Observations: 200 AIC: 780.4Df Residuals: 196 BIC: 793.6Df Model: 3 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975]------------------------------------------------------------------------------const 2.9389 0.312 9.422 0.000 2.324 3.554TV 0.0458 0.001 32.809 0.000 0.043 0.049radio 0.1885 0.009 21.893 0.000 0.172 0.206newspaper -0.0010 0.006 -0.177 0.860 -0.013 0.011==============================================================================Omnibus: 60.414 Durbin-Watson: 2.084Prob(Omnibus): 0.000 Jarque-Bera (JB): 151.241Skew: -1.327 Prob(JB): 1.44e-33Kurtosis: 6.332 Cond. No. 454.==============================================================================拟合函数: y = 0.045765*x+0.188530*x+-0.001037*x+ 2.938889