Visualizing Chipotle's Data
Step 1. Import the necessary libraries
import pandas as pd
import collections
import matplotlib.pyplot as plt
# set this so the graphs open internally
%matplotlib inline
Step 2. Import the dataset from this address.
# 数据集地址(假设使用 Chipotle 订单数据)
url = 'chipotle.tsv'
Step 3. Assign it to a variable called chipo.
chipo = pd.read_csv(url, sep='\t') # 注意数据是制表符分隔的
Step 4. See the first 10 entries
chipo.head(10)
Step 5. Create a histogram of the top 5 items bought
# 方法一:使用更简洁的 value_counts() 方法
top5_items = chipo['item_name'].value_counts().head(5)
# 绘制柱状图
plt.figure(figsize=(10,6))
top5_items.plot(kind='bar', color='skyblue')
plt.title('Top 5 Most Frequently Ordered Items')
plt.xlabel('Item Name')
plt.ylabel('Order Count')
plt.xticks(rotation=45)
plt.show()
Step 6. Create a scatterplot with the number of items orderered per order price
# 清理价格数据(将 $ 转换为浮点数)
chipo['item_price'] = chipo['item_price'].str.replace('$', '').astype(float)
# 按订单分组并聚合数据
orders = chipo.groupby('order_id').agg({
'quantity': 'sum', # 总商品数
'item_price': 'sum' # 总订单金额
})
# 绘制散点图
plt.figure(figsize=(10,6))
plt.scatter(orders['item_price'], orders['quantity'], alpha=0.5)
plt.title('Number of Items Ordered vs Order Price')
plt.xlabel('Total Order Price ($)')
plt.ylabel('Total Items Ordered')
plt.grid(True)
plt.show()
Step 7. BONUS: Create a question and a graph to answer your own question.
# Step 7. BONUS: 自定义问题与可视化
# 提取销量前 5 的商品
top5_names = chipo['item_name'].value_counts().head(5).index.tolist()
# 筛选数据时创建显式副本
top5_data = chipo[chipo['item_name'].isin(top5_names)].copy() # 关键修复:添加 .copy()
# 计算每个商品的单价(总价/数量)
top5_data['unit_price'] = top5_data['item_price'] / top5_data['quantity']
# 绘制箱线图
plt.figure(figsize=(12,8))
top5_data.boxplot(column='unit_price', by='item_name', vert=False)
plt.title('Unit Price Distribution of Top 5 Items')
plt.xlabel('Unit Price ($)')
plt.ylabel('Item Name')
plt.suptitle('') # 移除默认标题
plt.show()
