Java实现pdf转excel
最近项⽬需要解析pdf单据,获取⾥⾯的字段数据,通过⽹上的查阅发现itext⽐pdfbox的⽂档要多⼀点,所以选择了itext(不是说pdfbox不好,只是api和例⼦太少,难以解)。因pdf⾮模板化(某政府发放),所以靠表单域获取变得不现实。⼀开始通过PdfReaderContentParser 获取的⽂档内容,但是获取到的是所有内容拼接成的⼀个字符串,⽽需求需要将数据精确到字段,靠截取字符串来达到解析的⽬的是⾏不通的,因为获取的内容毫⽆规律。后查看源码代码,发现解析过程是逐字随机字符解析的,所以只有通过字段所在坐标范围来获取字段内容。
/**
* Created by luon 2018/3/21.
*/
@Override
protected ModelAndView onSubmit(HttpServletRequest request, HttpServletResponse response, Object command, BindException errors) throws Exception { FileUploadForm form = (FileUploadForm) command;
SimpleResult result = ate(false);
if (File() == null || File().getSize() == 0) {
result.setMessage("请上传pdf⽂件");
return new ModelAndView(new JsonView(result));
}
if (!"pdf".File().getOriginalFilename().toLowerCase()))) {
result.setMessage("请上传pdf格式的⽂件");
return new ModelAndView(new JsonView(result));
}
//获取pdf⽂件流
InputStream inputStream = File().getInputStream();
/
/获取pdf内容
List<List<Map<String, String>>> listAll = exportPdfList(inputStream);
final String path = "/excel模板路径/xls/fillbls.xls";
Workbook workbook = ExcelLoader.Class().getResourceAsStream(path));
//遍历pdf 内容插⼊Excel;
List<ExcelRow> sheet = new ArrayList<>();
for (int i = 0; i < listAll.size(); i++) {
List<Map<String, String>> listdata = (i);
for (Map<String, String> map : listdata) {
String orderNum = ("orderNum");
String trackNum = ("trackNum");
String serviceType = ("serviceType");
String actualWeight = ("actualWeight");
String actualWeightUnits = ("actualWeightUnits");
String ratedWeight = ("ratedWeight");
String ratedWeightUnits = ("ratedWeightUnits");
String amount = ("amount");
String chargeDesion1 = ("chargeDesion1");
String chargeDesionCash1 = ("chargeDesionCash1");
ExcelRow itemRow = new ExcelRow();
itemRow.add(orderNum);
itemRow.add(trackNum);
itemRow.add(serviceType);
itemRow.add(actualWeight);
itemRow.add(actualWeightUnits);
itemRow.add(ratedWeight);
itemRow.add(ratedWeightUnits);
itemRow.add(amount);
itemRow.add(chargeDesion1);
itemRow.add(chargeDesionCash1);
sheet.add(itemRow);
}
}
ExcelWriter.write(workbook, sheet, 0, 1);
InputStream outStream = ExcelWriter.close(workbook);
String fileName = "美国境内账单表.xls";
fileName = de(fileName, "UTF-8");
return new ModelAndView(new DownloadView(outStream, fileName));
}
//读取pdf内容注意此⽅法没有贴上详细代码。
public List<List<Map<String, String>>> exportPdfList(InputStream inputStream) {
List<List<Map<String, String>>> listAll = new ArrayList<>();
excel最强教科书完全版pdftry {
Map<String, byte[]> pdfData = LabelSpliter.byPageNum(inputStream);
/
/分页取pdf
List<Map.Entry<String, byte[]>> list = new ArrayList<>(Set());
//排序从第⼀页开始
Collections.sort(list, new Comparator<Map.Entry<String, byte[]>>() {
public int compare(Map.Entry<String, byte[]> o1, Map.Entry<String, byte[]> o2) {
return (new Key()))pareTo(new Key()));
}
});
for (Map.Entry<String, byte[]> entry : list) {
System.out.Key() + "-------------------------------------" + Value());
byte[] pdfBypage = Value();
InputStream inputfjsb = new ByteArrayInputStream(pdfBypage);
PDDocument document = PDDocument.load(inputfjsb);
if (!document.isEncrypted()) {
PDFTextStripperByArea stripper = new PDFTextStripperByArea();
stripper.setSortByPosition(true);
PDFTextStripper tStripper = new PDFTextStripper();
String pdfFileInText = Text(document);
listMap.add(map);
listAll.add(listMap);
return listAll;
}
}
通过PdfReaderContentParser获取的⽂档内容,但是获取到的是所有内容拼接成的⼀个字符串,⽽需求需要将数据精确到字段,靠截取字符串来达到解析的⽬的是⾏不通的,因为获取的内容毫⽆规律。以上代码仅提供思路,谢谢!

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。