| | |
| | | import org.apache.commons.lang3.StringUtils; |
| | | import org.apache.pdfbox.pdmodel.PDDocument; |
| | | import org.apache.pdfbox.rendering.PDFRenderer; |
| | | import org.apache.pdfbox.util.filetypedetector.FileType; |
| | | import org.apache.poi.util.IOUtils; |
| | | |
| | | import javax.swing.*; |
| | |
| | | } |
| | | } |
| | | |
| | | /** |
| | | * 输出文件 |
| | | * @param newFileName 新文件名 |
| | | * @param file 源文件 |
| | | * @param fileSuffix 文件后缀 |
| | | * @param targetPath 目标路径 |
| | | */ |
| | | private void writeFile(String newFileName,File file,String fileSuffix,File targetPath) throws IOException { |
| | | String outputFileName = newFileName + fileSuffix; |
| | | File outputFile = new File(targetPath, outputFileName); |
| | | if (!outputFile.getParentFile().exists()) { |
| | | outputFile.getParentFile().mkdirs(); |
| | | } |
| | | IOUtils.copy(Files.newInputStream(file.toPath()),outputFile); |
| | | } |
| | | |
| | | /** |
| | | * 处理合同编号方法 |
| | |
| | | |
| | | for (File file : selectedFiles) { |
| | | processCount++; |
| | | //获取识别到的第一个内容 |
| | | String text = file.getName().replace(".pdf",""); |
| | | try { |
| | | //截取pdf选区图像 |
| | | String pathStr = capturePdfArea(file, prefs); |
| | | // ToFile.preprocessImage(pathStr); |
| | | //读取图像内容 |
| | | String ocrFullText = FileNameValidator.validateAndCleanFileName(ocrService.ocr(pathStr.replaceFirst("/", ""))); |
| | | //获取识别到的第一个内容 |
| | | String text = file.getName().replace(".pdf",""); |
| | | if(StringUtils.isNotBlank(ocrFullText) && !StringUtils.equals(ocrFullText,text)){ |
| | | text = ocrFullText; |
| | | String finalText = text; |
| | |
| | | fileIndex.getAndIncrement(); |
| | | } |
| | | //将识别的内容设置为文件名,导出到指定目录 |
| | | String outputFileName = text + ".pdf"; |
| | | File outputFile = new File(outputDirectory, outputFileName); |
| | | if (!outputFile.getParentFile().exists()) { |
| | | outputFile.getParentFile().mkdirs(); |
| | | } |
| | | IOUtils.copy(Files.newInputStream(file.toPath()),outputFile); |
| | | writeFile(text,file, ".pdf",outputDirectory); |
| | | } |
| | | successCount++; |
| | | contractNumberList.add(new ContractNumberExcelData(text)); |
| | |
| | | failCount++; |
| | | e.printStackTrace(); |
| | | log("处理失败: " + file.getName() + " - " + e.getMessage()); |
| | | //处理失败的文件也输出 |
| | | try { |
| | | writeFile(text+"_fail", file, ".pdf",outputDirectory); |
| | | } catch (IOException ex) { |
| | | throw new RuntimeException(ex); |
| | | } |
| | | }finally { |
| | | //删除临时目录 |
| | | ToFile.deleteTempFiles(OcrSwingConstants.cacheDir); |