一、数据清洗
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
%config InlineBackend.figure_format = 'svg'
ec_df = pd.read_csv('datas/ecomm/某电商平台2021年订单数据.csv', index_col=0)
ec_df['orderTime'] = pd.to_datetime(ec_df['orderTime'])
ec_df['payTime'] = pd.to_datetime(ec_df['payTime'])
ec_df
ec_df = ec_df[ec_df['orderTime'].dt.year == 2021]
ec_df
ec_df = ec_df[(ec_df['payTime'] - ec_df['orderTime']).dt.total_seconds() <= 1800]
ec_df = ec_df[ec_df['payTime'] >= ec_df['orderTime']]
ec_df = ec_df[(ec_df['payment'] >= 0) & (ec_df['orderAmount'] >= 0)]
ec_df
ec_df['chanelID'] = ec_df['chanelID'].fillna(ec_df['chanelID'].mode()[0])
ec_df.info()
ec_df = ec_df.rename(columns={
'chanelID': 'channelID', 'platfromType': 'platformType'})
ec_df
ser = ec_df['platformType'].replace(r'[\s·]', '', regex=True)
ser = ser.str.title()
ec_df['platformType'] = ser.replace(['薇信', 'Vx', '网页', '网站'],
['微信', '微信', 'Web', 'Web'])
temp_df = ec_df[ec_df['payment'] <= ec_df['orderAmount']]
mean_discount = (temp_df['payment'] / temp_df['orderAmount']).mean()
ec_df['payment'] = np.round(
ec_df['payment'].where(
ec_df['payment'] <= ec_df['orderAmount'],
ec_df['orderAmount'] * mean_discount
)
)
二、数据分析
2.1 计算总体指标
GMV = ec_df['orderAmount'].sum(