利用棒棒糖图探索马里奥赛车 64 世界纪录
import numpy as np
import pandas as pd
import matplotlib.colors as mc
import matplotlib.pyplot as plt
from matplotlib.cm import ScalarMappable
from matplotlib.lines import Line2D
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from palettable import cartocolors # 获得好看的颜色
数据探索
以下数据如果有需要的同学可关注公众号HsuHeinrich,回复【数据可视化】自动获取~
df_records = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-05-25/records.csv')
df_records.head()
track:表示赛道的名字
type:告诉我们记录是针对单圈还是完整比赛(三圈)
shortcut:是一个是/否变量,用来确定是否使用了捷径的记录
date:表示取得记录的日期
time:表示完成赛道所需的秒数
- 生成数据df_rank:保留每个track的最快记录
# 保留完整比赛数据
df_rank = df_records.query("type == 'Three Lap'")
# 计算最短时间(记录)
df_rank = df_rank.loc[df_rank.groupby("track")["time"].idxmin()]
# 按时间排序
df_rank = df_rank.sort_values("time", ascending=False)
# 将track转为排序的category类型
df_rank["track"] = pd.Categorical(df_rank["track"], ordered=True, categories=df_rank["track"])
- 生成数据df_connect:保留每个track使用捷径的最慢记录、不使用捷径的最快记录
# 筛选完整比赛数据并重置index
df_records_three = df_records.query("type == 'Three Lap'").reset_index()
# 增加年份
df_records_three["year"] = pd.DatetimeIndex(df_records_three["date"]).year
# 计算不同赛道/是否使用捷径的最快记录和最慢记录
df_connect = df_records_three.groupby(["track", "shortcut"]).agg(
no = ("time", min), # 最快时间命名为no
yes = ("time", max) # 最慢时间命名为yes
).reset_index()
# 将数据转换为长格式,每一行包含赛道名称,是否使用了捷径,是否是当前记录,以及所用的时间
df_connect = pd.melt(
df_connect,
id_vars=["track", "shortcut"],
value_vars=["no", "yes"],
var_name="record",
value_name="time"
)
# 筛选数据
df_connect = df_connect.query(
"(shortcut == 'No' and record == 'no') or (shortcut == 'Yes' and record == 'yes')"
)
# 将数据转换为宽格式,每个赛道只有一行记录,列是不同的"record"值
df_connect = df_connect.pivot_table(index="track", columns="record", values="time").reset_index()
- 生成数据df_longdist(没有使用捷径的记录)和df_shortcut(使用捷径的记录)
# 没有使用捷径
df_longdist = df_records_three.query("shortcut == 'No'")
# 只保留每个赛道中的最快记录和最慢记录。
grouped = df_longdist.groupby("track")
df_longdist = df_longdist.loc[pd.concat([grouped["time"].idxmax(), grouped["time"].idxmin()])]
# 添加新列group:值为"max"和"min"
df_longdist.loc[grouped["time"].idxmax(), "group"] = "max"
df_longdist.loc[grouped["time"].idxmin(), "group"] = "min"
# 每个赛道的最新记录年份
df_longdist["year"] = df_longdist.groupby("track")['year'].transform(max)
# 转为款数据
df_longdist = df_longdist.pivot_table(index=["track", "year"], columns="group", values="time").reset_index()
# 时间差
df_longdist["diff"] = df_longdist["max"] - df_longdist["min"]
# 使用捷径按上面同样操作一遍
df_shortcut = df_records_three.query("shortcut == 'Yes'")
grouped = df_shortcut.groupby("track")
df_shortcut = df_shortcut.loc[pd.concat([grouped["time"].idxmax(), grouped["time"].idxmin()])]
df_shortcut.loc[grouped["time"].idxmax(), "group"] = "max"
df_shortcut.loc[grouped["time"].idxmin(), "group"] = "min"
df_shortcut["year"] = df_shortcut.groupby("track")['year'].transform(max)
df_shortcut = df_shortcut.pivot_table(index=["track", "year"], columns="group", values="time").reset_index()
df_shortcut["diff"] = df_shortcut["max"] - df_shortcut["min"]
# 获取track的列表
tracks_sorted = df_rank["track"].dtype.categories.tolist()
# --------------对df_connect、df_longdist、df_shortcut进行排序,排序依据为tracks_sorted--------------
# 对df_connect按track排序
df_connect["track"] = df_connect["track"].astype("category")
df_connect["track"] = df_connect["track"].cat.set_categories(tracks_sorted)
df_connect = df_connect.sort_values("track")
# 对df_longdist按track排序
df_longdist["track"] = df_longdist["track"].astype("category")
df_longdist["track"] = df_longdist["track"].cat.set_categories(tracks_sorted)
df_longdist = df_longdist.sort_values("track")
# 对df_shortcut按track排序
df_shortcut["track"] = df_shortcut["track"].astype("category")
df_shortcut["track"] = df_shortcut["track"].cat.set_categories(tracks_sorted)
df_shortcut = df_shortcut.sort_values("track")
绘制棒棒糖
# 定义颜色
GREY94 = "#f0f0f0"
GREY75 = "#bfbfbf"
GREY65 = "#a6a6a6"
GREY55 = "#8c8c8c"
GREY50 = "#7f7f7f"
GREY40 = "#666666"
LIGHT_BLUE = "#b4d1d2"
DARK_BLUE = "#242c3c"
BLUE = "#4a5a7b"
WHITE = "#FFFCFC"
# 生成橘色调色板(本质是个函数)
colormap_orange = cartocolors.sequential.RedOr_5.mpl_colormap
# 生成蓝色调色板(本质是个函数)
colormap_blue = mc.LinearSegmentedColormap.from_list("blue", [LIGHT_BLUE, DARK_BLUE], N=256)
# 初始化布局
fig, ax = plt.subplots(figsize = (15, 10))
# 虚线连接使用捷径与不使用捷径
ax.hlines(y="track", xmin="yes", xmax="no", color=GREY75, ls=":", data=df_connect)
# 绘制使用捷径的水平线:diff越大颜色越深
norm_diff = mc.Normalize(vmin=0, vmax=250)
color = colormap_orange(norm_diff(df_shortcut["diff"].values))
ax.hlines(y="track", xmin="min", xmax="max", color=color, lw=5, data=df_shortcut)
# 绘制没有使用捷径的两条水平线:diff越大颜色越深
color = colormap_orange(norm_diff(df_longdist["diff"].values))
ax.hlines(y="track", xmin="min", xmax="max", color=color, lw=4, data=df_longdist)
ax.hlines(y="track", xmin="min", xmax="max", color=WHITE, lw=2, data=df_longdist)
# 为shortcut==yes的首个记录添加点,zorder确保位于顶部
ax.scatter(x="max", y="track", s=200, color=GREY65, edgecolors=GREY65, lw=2.5, zorder=2, data=df_shortcut)
# 为shortcut==yes的最新记录添加点,按年份范围生成蓝色
norm_year = mc.Normalize(df_shortcut["year"].min(), df_shortcut["year"].max())
color = colormap_blue(norm_year(df_shortcut["year"].values))
ax.scatter(x="min", y="track", s=160, color=color, edgecolors=color, lw=2, zorder=2, data=df_shortcut)
# 为shortcut==no的首个记录添加点,zorder确保位于顶部
color = colormap_blue(norm_year(df_longdist["year"].values))
ax.scatter(x="min", y="track", s=120, color=WHITE, edgecolors=color, lw=2, zorder=2, data=df_longdist)
# 为shortcut==no的最新记录添加点
ax.scatter(x="max", y="track", s=120, color=WHITE, edgecolors=GREY65, lw=2, zorder=2, data=df_longdist)
# df_shortcut左侧添加标签
for row in range(df_shortcut.shape[0]):
ax.text(
df_shortcut["min"][row] - 7,
df_shortcut["track"][row],
df_shortcut["track"][row],
ha="right",
va="center",
size=16,
color="black",
fontname="Bell MT"
)
# 左侧添加标签,如果track在df_longdist中但不在df_shortcut中
for row in range(df_longdist.shape[0]):
if df_longdist["track"][row] not in df_shortcut["track"].values:
ax.text(
df_longdist["min"][row] - 7,
df_longdist["track"][row],
df_longdist["track"][row],
ha="right",
va="center",
size=17,
color="black",
fontname="Bell MT",
)
# 为shortcut==yes的两端点顶部添加文字标签(只为第一个track添加)
df_shortcut_wario = df_shortcut.query("track == 'Wario Stadium'")
ax.text(
df_shortcut_wario["min"].iloc[0],
df_shortcut_wario["track"].iloc[0],
"Most recent record\nwith shortcuts\n",
color=BLUE,
ma="center",
va="bottom",
ha="center",
size=9,
fontname="Overpass"
)
ax.text(
df_shortcut_wario["max"].iloc[0],
df_shortcut_wario["track"].iloc[0],
"First record\nwith shortcuts\n",
color=GREY50,
ma="center",
va="bottom",
ha="center",
size=9,
fontname="Overpass"
)
# 为shortcut==no的两端点顶部添加文字标签(只为第一个track添加)
df_longdist_wario = df_longdist.query("track == 'Wario Stadium'")
ax.text(
df_longdist_wario["min"].iloc[0] - 10,
df_longdist_wario["track"].iloc[0],
"Most recent record\nno shortcuts\n",
color=BLUE,
ma="center",
va="bottom",
ha="center",
size=9,
fontname="Overpass"
)
ax.text(
df_longdist_wario["max"].iloc[0] + 10,
df_longdist_wario["track"].iloc[0],
"First record\nno shortcuts\n",
color=GREY50,
ma="center",
va="bottom",
ha="center",
size=9,
fontname="Overpass"
)
# -------------------------------------------自定义布局-------------------------------------------
# 隐藏边框
ax.spines["left"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
# 隐藏y标签
ax.yaxis.set_visible(False)
# 自定义x轴(刻度、范围、标签)
ax.tick_params(axis="x", bottom=True, top=True, labelbottom=True, labeltop=True, length=0)
xticks = np.linspace(0, 400, num=9, dtype=int).tolist()
ax.set_xlim(-60, 400)
ax.set_xticks(xticks)
ax.set_xticklabels(["0 seconds"] + xticks[1:], fontname="Corbel", color=GREY40, size=9)
# 设置背景色
ax.set_facecolor(WHITE)
# 添加垂直线:zorder=0保证该水平线位于图层最后面
for xtick in xticks:
ax.axvline(xtick, color=GREY94, zorder=0)
# 获取x/y的范围,并为y的顶部增加0.5的空间
x0, x1, y0, y1 = plt.axis()
plt.axis((x0, x1, y0, y1 + 0.5));
# -------------------------------------------自定义图例-------------------------------------------
# -------------为diff生成图例-------------
# 插入竖直轴
cbaxes = inset_axes(
ax, width="0.8%", height="44%", loc=3,
bbox_to_anchor=(0.025, 0., 1, 1),
bbox_transform=ax.transAxes
)
cb = fig.colorbar(
ScalarMappable(norm=norm_diff, cmap=colormap_orange), cax=cbaxes,
ticks=[0, 50, 100, 150, 200, 250]
)
# 移除轮廓
cb.outline.set_visible(False)
# 设置图例的标签
cb.set_label(
"Time difference between first and most recent record",
labelpad=-45,
color=GREY40,
size=10,
fontname="Overpass"
)
# 删除'size=0'的刻度
cb.ax.yaxis.set_tick_params(
color=GREY40,
size=0
)
# 添加刻度标签
cb.ax.yaxis.set_ticklabels(
[0, 50, 100, 150, 200, 250],
fontname="Corbel",
color=GREY40,
size=10
)
# -------------为year生成图例-------------
# 插入手柄(圆点)
years = [2016, 2017, 2018, 2019, 2020, 2021]
def legend_dot(year):
line = Line2D(
[0],
[0],
marker="o",
markersize=10,
linestyle="none",
color=colormap_blue(norm_year(year)),
label=f"{year}"
)
return line
# 将年份元素存储
years_legend = ax.legend(
title="Year of Record",
handles=[legend_dot(year) for year in years],
loc=3, # lower left
bbox_to_anchor=(0.08, 0, 1, 1),
frameon=False
)
# 设置字体
for text in years_legend.get_texts():
text.set_fontfamily("Corbel")
text.set_color(GREY40)
text.set_fontsize(10)
# 修改图例标题的字体
legend_title = years_legend.get_title()
legend_title.set_fontname("Overpass")
legend_title.set_color(GREY40)
legend_title.set_fontsize(10)
# 副标题
plt.suptitle(
"Let's-a-Go! You May Still Have Chances to Grab a New World Record for Mario Kart 64",
fontsize=13,
fontname="Bell MT",
weight="bold",
x = 0.457,
y = 0.99
)
subtitle = [
"Most world records for Mario Kart 64 were achieved pretty recently (13 in 2020, 10 in 2021). On several tracks, the players considerably improved the time needed to complete three laps when they used shortcuts (Choco Mountain,",
"D.K.'s Jungle Parkway, Frappe Snowland, Luigi Raceway, Rainbow Road, Royal Raceway, Toad's Turnpike, Wario Stadium, and Yoshi Valley). Actually, for three out of these tracks the previous records were more than halved since 2020",
"(Luigi Raceway, Rainbow Road, and Toad's Turnpike). Four other tracks still have no records for races with shortcuts (Moo Moo Farm, Koopa Troopa Beach, Banshee Boardwalk, and Bowser's Castle). Are there none or did nobody find",
"them yet? Pretty unrealistic given the fact that since more than 24 years the game is played all around the world—but maybe you're able to find one and obtain a new world record?"
]
# 轴标题充当副标题
ax.set_title(
"\n".join(subtitle),
loc="center",
ha="center",
ma="left",
color=GREY40,
fontname="Overpass",
fontsize=9,
pad=20
)
# 添加著作信息
fig.text(
0.8, .05, "Visualization: Cédric Scherer • Data: mkwrs.com/mk64",
fontname="Overpass",
fontsize=12,
color=GREY55,
ha="center"
)
# 背景色
fig.patch.set_facecolor(WHITE)
plt.show()
参考:Mario Kart 64 World Records with Python and Matplotlib
共勉~