效果:有60个结构类似的PPT,每个PPT里面有5页,需抓取特定标题页面下特定位置的PPT内容。
思路:①遍历PPT文件;②定义ppt读取函数;③遍历PPT文件,读取目标值;④# 将值存放进去excel
https://pythonbrief.blog.csdn.net/article/details/109089030
read_ppt.py
import imp
from pptx import Presentation
import os
import shutil
from openpyxl import load_workbook
from datetime import datetime
file_address = r'Total'
talent_excel= r"人員名單 .xlsx"
wrong_excel= r"读取记录.xlsx"
file_address_list = []
for filename in os.listdir(file_address):
if filename.endswith('.ppt'):
file_address_list.append(filename)
elif filename.endswith('.pptx'):
file_address_list.append(filename)
def read_ppt_goal(filename):
prs = Presentation(filename)
person_goal= ''
team_goal= ''
employee_id = filename.split('-')[1]
for slide in prs.slides:
for shape in slide.shapes:
if shape.has_text_frame:
text_frame = shape.text_frame
if 'GraphicFrame' in str(type(shape)) and ('一些字眼' in text_frame.text and '延伸' not in text_frame.text):
table = shape.table
person_goal= table.cell(1, 1).text.replace('\n', '').replace('\x0b', '')
if 'GraphicFrame' in str(type(shape)) and ('一些字眼' in text_frame.text):
table = shape.table
team_goal= table.cell(4, 4).text.replace('\n', '').replace('\x0b', '')
return employee_id,person_goal,team_goal
wb_ppt = load_workbook(wrong_excel)
sheet_ppt = wb_ppt['sheet1']
sheet_wrong = wb_ppt['sheet2']
talent_data = []
print(len(file_address_list))
i = 1
j = 1
for filename in file_address_list:
try:
i = i + 1
employee_id,person_goal,team_goal = read_ppt_goal(filename)
talent_data.append([employee_id,person_goal,team_goal])
print(employee_id,person_goal,team_goal)
sheet_ppt.cell(row=i, column=1).value = employee_id
sheet_ppt.cell(row=i, column=2).value = person_goal
sheet_ppt.cell(row=i, column=3).value = team_goal
wb_ppt.save(wrong_excel)
except Exception:
j = j + 1
print(filename,"读取失败")
sheet_wrong.cell(row=j, column=1).value = filename + "读取失败"
wb_ppt.save(wrong_excel)
pass
print(talent_data)
print('len of talent data',len(talent_data))
talent_wb = load_workbook(talent_excel)
talent_sheet = talent_wb.active
max_row = talent_sheet.max_row
for i in range(2,max_row):
for listdata in talent_data:
print(i,listdata)
if str(talent_sheet.cell(row=i, column=5).value) == str(listdata[0]):
talent_sheet.cell(row=i, column=20).value = str(listdata[1])
if talent_sheet.cell(row=i, column=20).value:
talent_sheet.cell(row=i, column=19).value = 'Y'
else:
talent_sheet.cell(row=i, column=19).value = 'N'
talent_sheet.cell(row=i, column=23).value = str(listdata[2])
if talent_sheet.cell(row=i, column=23).value:
talent_sheet.cell(row=i, column=22).value = 'Y'
else:
talent_sheet.cell(row=i, column=22).value = 'N'
talent_wb.save(talent_excel)