基于pyPDF2
from PyPDF2 import PdfReader, PdfWriter
from pathlib import Path
TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"
def split_pdf(file_name, start_page, end_page, output_pdf):
#拆分pdf
input_file = PdfReader(file_name)
page = input_file.pages[0]
print(page.extract_text())
output_file = PdfWriter()
for i in range(start_page, end_page):
output_file.add_page(input_file.pages[i])
with open(output_pdf, 'wb') as f:
output_file.write(f)
#writer.clone_document_from_reader(reader.getPage(7))
def hebing_pdf(file_name1, start_page1, end_page1,file_name2,start_page2,end_page2,output_pdf):
#合并pdf
input_file1 = PdfReader(file_name1)
input_file2 = PdfReader(file_name2)
page = input_file1.pages[0]
print(page.extract_text())
output_file = PdfWriter()
print(file_name1+" 文件1的页数:"+str(len(input_file1.pages))+" \n"+file_name2+" 文件2的页数:"+str(len(input_file2.pages)) )
for i in range(start_page1, end_page1):
output_file.add_page(input_file1.pages[i])
print("文件1取:"+str(i+1))
for i in range(start_page2, end_page2):
output_file.add_page(input_file2.pages[i])
print("文件2取:"+str(i+1))
with open(output_pdf, 'wb') as f:
output_file.write(f)
print("输入文件:"+output_pdf)
#writer.clone_document_from_reader(reader.getPage(7))
'''
test_writer_clone()
reader = PdfReader("期权营销表-新版.pdf")
number_of_pages = len(reader.pages)
page = reader.pages[0]
text = page.extract_text()
print(page)
print(text)
'''
if __name__ == '__main__':
#split_pdf("测试通知.pdf", 0, 1, "1.pdf")
hebing_pdf("测试通知.pdf", 0, 1,"新版.pdf",3,5,"1.pdf")