#导入库 任务1——加载数据及进行预处理
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df=pd.read_csv('./data/insurance.csv')
df.head()
df.shape
df.info()
2.数据的清洗和转换
#将性别'female'——0;'male'-1替换 df['sex'].unique() df['sex'].replace({'female':0,'male':1},inplace=True) df.head() #df['smoker']中'yes'——1,'no'——0替换 df['smoker'].unique() df['smoker'].replace({'yes':1,'no':0},inplace=True) df.head()
df['region']中'southwest’,'southeast','northwest','northeast'分别对应1、2、3、4数据的清洗和转换
df['region'].unique()
dict_region={'southwest':1,'southeast':2,'northwest':3,'northeast':4}
df['region']=df['region'].map(dict_region)#map可以接收字典
df.head()
#归一化处理数据标准化
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
scaler.fit(df)#确定转换特征值范围 min max
df1=scaler.transform(df)#转换器 x-min / (max-min)
df1