| | |
| | | } |
| | | |
| | | log("开始处理文件..."); |
| | | fileIndex = new AtomicInteger(1); |
| | | //识别到的合同编号列表 |
| | | final List<ContractNumberExcelData> contractNumberList = new CopyOnWriteArrayList<>(); |
| | | |
| | |
| | | try { |
| | | //截取pdf选区图像 |
| | | String pathStr = capturePdfArea(file, prefs); |
| | | // ToFile.preprocessImage(pathStr); |
| | | //读取图像内容 |
| | | String ocrFullText = FileNameValidator.validateAndCleanFileName(ocrService.ocr(pathStr.replaceFirst("/", ""))); |
| | | if(StringUtils.isNotBlank(ocrFullText) && !StringUtils.equals(ocrFullText,text)){ |
| | | text = ocrFullText; |
| | | String finalText = text; |
| | | //如果合同编号重复,则在文件名后加一个序号 |
| | | if(contractNumberList.stream().anyMatch(f -> f.getContractNumber().equals(finalText))){ |
| | | text+="("+ fileIndex.get() +")"; |
| | | String finalOcrFullText = ocrFullText; |
| | | if(contractNumberList.stream().anyMatch(f -> f.getContractNumber().equals(finalOcrFullText))){ |
| | | ocrFullText+="("+ fileIndex.get() +")"; |
| | | fileIndex.getAndIncrement(); |
| | | } |
| | | //将识别的内容设置为文件名,导出到指定目录 |
| | | writeFile(text,file, ".pdf",outputDirectory); |
| | | writeFile(ocrFullText,file, ".pdf",outputDirectory); |
| | | } |
| | | successCount++; |
| | | contractNumberList.add(new ContractNumberExcelData(text)); |
| | | contractNumberList.add(new ContractNumberExcelData(ocrFullText)); |
| | | log("处理成功("+processCount+"/"+selectedFiles.size()+"): " + file.getName()); |
| | | } catch (Exception e) { |
| | | failCount++; |
| | |
| | | try { |
| | | writeFile(text+"_fail", file, ".pdf",outputDirectory); |
| | | } catch (IOException ex) { |
| | | throw new RuntimeException(ex); |
| | | } |
| | | }finally { |
| | | //删除临时目录 |