先添加依赖
<dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>2.0.28</version> </dependency>
private byte[] pdf2word(String filePath, byte[] in) throws IOException { String ext = FileUtil.getExt(filePath); if ("pdf".equals(ext)) { //将pdf加载到对象中去 PDDocument doc = PDDocument.load(in); //得到pdf的页数 int pagenumber = doc.getNumberOfPages(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); Writer writer = new OutputStreamWriter(baos, StandardCharsets.UTF_8); PDFTextStripper stripper = new PDFTextStripper(); stripper.setSortByPosition(true);//排序 stripper.setStartPage(1);//设置转换的开始页 stripper.setEndPage(pagenumber);//设置转换的结束页 stripper.writeText(doc, writer); writer.close(); return baos.toByteArray(); } return null; }
会丢失部分格式,但是整体还行,毕竟免费的要啥自行车