From eed98e551c817ead7965e08820d4b7adbc4a47f0 Mon Sep 17 00:00:00 2001
From: zouyu <2723363702@qq.com>
Date: 星期四, 27 十一月 2025 10:53:55 +0800
Subject: [PATCH] 合同编号识别调整:文件命名非法字符处理
---
src/main/java/com/xindao/ocr/swingui/swing/jpanel/ContractNumberProcessPanel.java | 43 +++++++++++++++++++++++++++++--------------
1 files changed, 29 insertions(+), 14 deletions(-)
diff --git a/src/main/java/com/xindao/ocr/swingui/swing/jpanel/ContractNumberProcessPanel.java b/src/main/java/com/xindao/ocr/swingui/swing/jpanel/ContractNumberProcessPanel.java
index 112b9bd..8e47707 100644
--- a/src/main/java/com/xindao/ocr/swingui/swing/jpanel/ContractNumberProcessPanel.java
+++ b/src/main/java/com/xindao/ocr/swingui/swing/jpanel/ContractNumberProcessPanel.java
@@ -12,6 +12,7 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
+import org.apache.pdfbox.util.filetypedetector.FileType;
import org.apache.poi.util.IOUtils;
import javax.swing.*;
@@ -281,6 +282,21 @@
}
}
+ /**
+ * 杈撳嚭鏂囦欢
+ * @param newFileName 鏂版枃浠跺悕
+ * @param file 婧愭枃浠�
+ * @param fileSuffix 鏂囦欢鍚庣紑
+ * @param targetPath 鐩爣璺緞
+ */
+ private void writeFile(String newFileName,File file,String fileSuffix,File targetPath) throws IOException {
+ String outputFileName = newFileName + fileSuffix;
+ File outputFile = new File(targetPath, outputFileName);
+ if (!outputFile.getParentFile().exists()) {
+ outputFile.getParentFile().mkdirs();
+ }
+ IOUtils.copy(Files.newInputStream(file.toPath()),outputFile);
+ }
/**
* 澶勭悊鍚堝悓缂栧彿鏂规硶
@@ -297,6 +313,7 @@
}
log("寮�濮嬪鐞嗘枃浠�...");
+ fileIndex = new AtomicInteger(1);
//璇嗗埆鍒扮殑鍚堝悓缂栧彿鍒楄〃
final List<ContractNumberExcelData> contractNumberList = new CopyOnWriteArrayList<>();
@@ -307,37 +324,35 @@
for (File file : selectedFiles) {
processCount++;
+ //鑾峰彇璇嗗埆鍒扮殑绗竴涓唴瀹�
+ String text = file.getName().replace(".pdf","");
try {
//鎴彇pdf閫夊尯鍥惧儚
String pathStr = capturePdfArea(file, prefs);
-// ToFile.preprocessImage(pathStr);
//璇诲彇鍥惧儚鍐呭
String ocrFullText = FileNameValidator.validateAndCleanFileName(ocrService.ocr(pathStr.replaceFirst("/", "")));
- //鑾峰彇璇嗗埆鍒扮殑绗竴涓唴瀹�
- String text = file.getName().replace(".pdf","");
if(StringUtils.isNotBlank(ocrFullText) && !StringUtils.equals(ocrFullText,text)){
- text = ocrFullText;
- String finalText = text;
//濡傛灉鍚堝悓缂栧彿閲嶅锛屽垯鍦ㄦ枃浠跺悕鍚庡姞涓�涓簭鍙�
- if(contractNumberList.stream().anyMatch(f -> f.getContractNumber().equals(finalText))){
- text+="("+ fileIndex.get() +")";
+ String finalOcrFullText = ocrFullText;
+ if(contractNumberList.stream().anyMatch(f -> f.getContractNumber().equals(finalOcrFullText))){
+ ocrFullText+="("+ fileIndex.get() +")";
fileIndex.getAndIncrement();
}
//灏嗚瘑鍒殑鍐呭璁剧疆涓烘枃浠跺悕锛屽鍑哄埌鎸囧畾鐩綍
- String outputFileName = text + ".pdf";
- File outputFile = new File(outputDirectory, outputFileName);
- if (!outputFile.getParentFile().exists()) {
- outputFile.getParentFile().mkdirs();
- }
- IOUtils.copy(Files.newInputStream(file.toPath()),outputFile);
+ writeFile(ocrFullText,file, ".pdf",outputDirectory);
}
successCount++;
- contractNumberList.add(new ContractNumberExcelData(text));
+ contractNumberList.add(new ContractNumberExcelData(ocrFullText));
log("澶勭悊鎴愬姛("+processCount+"/"+selectedFiles.size()+"): " + file.getName());
} catch (Exception e) {
failCount++;
e.printStackTrace();
log("澶勭悊澶辫触: " + file.getName() + " - " + e.getMessage());
+ //澶勭悊澶辫触鐨勬枃浠朵篃杈撳嚭
+ try {
+ writeFile(text+"_fail", file, ".pdf",outputDirectory);
+ } catch (IOException ex) {
+ }
}finally {
//鍒犻櫎涓存椂鐩綍
ToFile.deleteTempFiles(OcrSwingConstants.cacheDir);
--
Gitblit v1.9.3