From 866a3e6cbd2df9841dfbbd733e1128938cef3e00 Mon Sep 17 00:00:00 2001
From: zouyu <2723363702@qq.com>
Date: 星期二, 09 十二月 2025 17:45:16 +0800
Subject: [PATCH] 采集器调整
---
src/main/java/com/chinaztt/mes/docx/util/TakeWords.java | 156 ++++++++++++++++-----------------------------------
1 files changed, 49 insertions(+), 107 deletions(-)
diff --git a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
index 301b47d..58fb34a 100644
--- a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
+++ b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
@@ -1,6 +1,8 @@
package com.chinaztt.mes.docx.util;
import cn.hutool.core.io.FileUtil;
+import cn.hutool.http.HttpRequest;
+import cn.hutool.json.JSONUtil;
import com.chinaztt.mes.docx.dto.GetFileDto;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
@@ -9,20 +11,19 @@
import net.sourceforge.tess4j.TesseractException;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
-import org.apache.poi.POIXMLDocument;
-import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hssf.usermodel.HSSFPictureData;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.ooxml.POIXMLDocument;
+import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
-import org.apache.poi.xssf.usermodel.XSSFSheet;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import java.io.*;
-import java.nio.file.Files;
import java.sql.*;
import java.util.*;
import java.util.regex.Pattern;
@@ -57,19 +58,22 @@
return result;
}
- public static Object readExcelFile(File file) throws IOException {
+ /**
+ * 璇诲彇excel鏂囦欢,鍏煎.xlsx,.xls鏍煎紡
+ * @param file
+ * @return
+ */
+ public static Object readExcelFile(File file) {
StringBuilder result = new StringBuilder();
- //鍒涘缓宸ヤ綔绨垮璞�
- try {
- XSSFWorkbook xssfWorkbook = new XSSFWorkbook(Files.newInputStream(file.toPath()));
- //鑾峰彇宸ヤ綔绨夸笅sheet鐨勪釜鏁� 鍙鍙栫涓�涓猻heet
-// int sheetNum = xssfWorkbook.getNumberOfSheets();
+ try (
+ FileInputStream fis = new FileInputStream(file);
+ Workbook workbook = WorkbookFactory.create(fis)
+ ) {
//閬嶅巻宸ヤ綔绨夸腑鐨勬墍鏈夋暟鎹�
- XSSFSheet sheet = xssfWorkbook.getSheetAt(0);
+ Sheet sheet = workbook.getSheetAt(0);
//鑾峰彇鏈�鍚庝竴琛岀殑num锛屽嵆鎬昏鏁般�傛澶勪粠0寮�濮�
int maxRow = sheet.getLastRowNum();
- for (int row = 1; row <= maxRow; row++) {
- //鑾峰彇鏈�鍚庡崟鍏冩牸num锛屽嵆鎬诲崟鍏冩牸鏁� ***娉ㄦ剰锛氭澶勪粠1寮�濮嬭鏁�***
+ for (int row = 0; row <= maxRow; row++) {
int maxRol = sheet.getRow(row).getLastCellNum();
StringBuilder aLine = new StringBuilder();
for (int rol = 0; rol < maxRol; rol++) {
@@ -77,13 +81,19 @@
}
result.append(aLine).append("\n");
}
- }catch (Exception e){
+ } catch (Exception e) {
e.printStackTrace();
}
return result.toString();
}
- public static Object readExcelxlsFile(File file) throws IOException {
+ /**
+ * 璇诲彇excel鏂囦欢涓殑鍥剧墖鍐呭
+ * @param file
+ * @return
+ * @throws IOException
+ */
+ public static Object readPngContextInExcel(File file) throws IOException {
String result = "";
try (FileInputStream fis = new FileInputStream(file);
Workbook workbook = new HSSFWorkbook(fis)) {
@@ -104,100 +114,18 @@
try (FileOutputStream fos = new FileOutputStream(tempFile)) {
fos.write(pictureData);
}
- String ocrResult = "";
+ String ocrResult;
try {
- ocrResult = (String) readPngFile(tempFile);
- } catch (TesseractException e) {
- ocrResult = "OCR璇嗗埆澶辫触: " + e.getMessage();
+ ocrResult = ocrImageContext(tempFile.getAbsolutePath());
} finally {
// 鍒犻櫎涓存椂鏂囦欢
tempFile.delete();
}
result = ocrResult;
-// String ocrText = fixOcrText(ocrResult);
-// result.append("OCR Result:").append(ocrText).append(",");
-
-
-// for (HSSFPictureData picture : pictures) {
-// // 鑾峰彇鍥剧墖绫诲瀷
-// String pictureType = picture.suggestFileExtension();
-// // 鑾峰彇鍥剧墖鏁版嵁
-// byte[] pictureData = picture.getData();
-// // 鍒涘缓涓存椂鏂囦欢
-// File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType);
-// try (FileOutputStream fos = new FileOutputStream(tempFile)) {
-// fos.write(pictureData);
-// }
-// // 鍥剧墖棰勫鐞�
-//// File processedFile = preprocessImage(tempFile, pictureType);
-// // 璋冪敤 readPngFile1 鏂规硶璇诲彇鍥剧墖鏂囧瓧淇℃伅
-// String ocrResult = "";
-// try {
-// ocrResult = (String) readPngFile(tempFile);
-//// ocrResult = (String) readPngFile(tempFile);
-//// ocrResult = (String) readPngFile(processedFile);
-// } catch (TesseractException e) {
-// ocrResult = "OCR璇嗗埆澶辫触: " + e.getMessage();
-// } finally {
-// // 鍒犻櫎涓存椂鏂囦欢
-//// tempFile.delete();
-//// processedFile.delete();
-// }
-//
-// // 灏嗗浘鐗囦俊鎭坊鍔犲埌缁撴灉涓�
-//// result.append("Picture Type: ").append(pictureType)
-//// .append(", Picture Size: ").append(pictureData.length)
-//// .append(" bytes")
-//// .append(", OCR Result: ").append(ocrResult)
-//// .append(",");
-// String ocrText = fixOcrText(ocrResult);
-// result.append("OCR Result:").append(ocrText).append(",");
-// }
}
-//
-// // 閬嶅巻姣忎竴琛�
-// for (Row row : sheet) {
-// // 閬嶅巻姣忎竴鍒�
-// for (Cell cell : row) {
-// CellType cellType = CellType.forInt(cell.getCellType());
-// switch (cellType) {
-// case STRING:
-// result.append(cell.getStringCellValue());
-// break;
-// case NUMERIC:
-// if (DateUtil.isCellDateFormatted(cell)) {
-// result.append(cell.getDateCellValue());
-// } else {
-// result.append(cell.getNumericCellValue());
-// }
-// break;
-// case BOOLEAN:
-// result.append(cell.getBooleanCellValue());
-// break;
-// case FORMULA:
-// result.append(cell.getCellFormula());
-// break;
-// default:
-// result.append("");
-// }
-// result.append("\t");
-// }
-// result.append("\n");
-// }
}
return result;
- }
-
- // 淇 OCR 璇嗗埆鏂囨湰涓殑閿欒鍏抽敭璇�
- public static String fixOcrText(String ocrText) {
- // 瀹氫箟閿欒鍏抽敭璇嶅拰姝g‘鍐呭鐨勬槧灏勶紝杩欓噷澶勭悊鈥滃嚮 瀹� 寮� 搴炩�濅慨姝d负鈥滃嚮绌垮己搴︹��
- // 鑰冭檻鍒板彲鑳芥湁绌烘牸鍒嗛殧锛岀敤姝e垯鍖归厤鍖呭惈杩欎簺瀛楃殑鍐呭
- ocrText = ocrText.replaceAll("鍑籠\s*瀹嘰\s*寮篭\s*搴�", "鍑荤┛寮哄害");
- // 杩樺彲浠ョ户缁坊鍔犲叾浠栭敊璇慨姝o紝姣斿涓嬮潰鍋囪鈥滅數 鍘� \\(HV\\)鈥濋噷鐨勭┖鏍煎奖鍝嶏紝涔熶慨姝d笅
- ocrText = ocrText.replaceAll("鐢礬\s*鍘媆\s*\\(HV\\)", "鐢靛帇(KV)");
- ocrText = ocrText.replaceAll("鐢礬\s*娴乗\s*\\(nt\\)", "鐢垫祦(mA)");
- return ocrText;
}
public static Object readPngFile1(File file) throws IOException, TesseractException {
@@ -284,8 +212,8 @@
Properties prop = new Properties();
//璁剧疆缂栫爜
prop.put("charSet", "UTF-8");
- prop.put("user", "");
- prop.put("password", "");
+ prop.put("user", StringUtils.isNotBlank(getFileDto.getDbUserName())?getFileDto.getDbUserName():"");
+ prop.put("password", StringUtils.isNotBlank(getFileDto.getDbPassword())?getFileDto.getDbPassword():"");
//鏁版嵁鍦板潃
String dbUrl = "jdbc:ucanaccess://" + file.getPath();
//寮曞叆椹卞姩
@@ -299,8 +227,6 @@
try {
List<Object> list = new ArrayList<>();
//閬嶅巻鑾峰彇澶氬紶琛ㄦ暟鎹�
-// String s = "select * from " + getFileDto.getDbFileName() + " where 1=1" + getFileDto.getMdbEntrustCode() + " = '" + getFileDto.getEntrustCode() +
-// "' and " + getFileDto.getMdbSampleCode() + " = '" + getFileDto.getSampleCode() + "'";
String s = "select * from " + getFileDto.getDbFileName() + " where 1=1";
if(StringUtils.isNotBlank(getFileDto.getMdbEntrustCode())){
s+=" and " + getFileDto.getMdbEntrustCode() + " = '" + getFileDto.getEntrustCode()+ "'";
@@ -308,6 +234,10 @@
if(StringUtils.isNotBlank(getFileDto.getMdbSampleCode())){
s+=" and " + getFileDto.getMdbSampleCode() + " = '" + getFileDto.getSampleCode() + "'";
}
+ if(StringUtils.isNotBlank(getFileDto.getBushingColorField())){
+ s+=" and " + getFileDto.getBushingColorField() + " = '" + getFileDto.getBushingColor() + "'";
+ }
+
preparedStatement = conn.prepareStatement(s);
rs = preparedStatement.executeQuery();
ResultSetMetaData data = rs.getMetaData();
@@ -370,9 +300,9 @@
String sql = "SELECT * FROM "+table+" WHERE 1=1";
if(StringUtils.isNotBlank(getFileDto.getMdbEntrustCode()) ){
- sql+=" AND (" + getFileDto.getMdbEntrustCode() + " = '" + getFileDto.getEntrustCode()+ "'";
+ sql+=" AND (" + getFileDto.getMdbEntrustCode() + " = TRIM('" + getFileDto.getEntrustCode()+ "')";
if(StringUtils.isNotBlank(getFileDto.getLotBatchNo())){
- sql+=" OR "+ getFileDto.getMdbEntrustCode() + " = '" + getFileDto.getLotBatchNo()+ "'";
+ sql+=" OR "+ getFileDto.getMdbEntrustCode() + " = TRIM('" + getFileDto.getLotBatchNo()+ "')";
}
sql+=")";
}
@@ -382,7 +312,6 @@
if(StringUtils.isNotBlank(getFileDto.getCableTag())){
sql+=" AND Color = '" + getFileDto.getCableTag() + "'";
}
-
// 鍒涘缓 PreparedStatement 瀵硅薄鎵ц SQL
preparedStatement = connection.prepareStatement(sql);
resultSet = preparedStatement.executeQuery();
@@ -468,6 +397,19 @@
return tableMap;
}
+ /**
+ * ocr璇嗗埆鍥剧墖鍐呭
+ * @param imagePath 鍥剧墖璺緞
+ * @return
+ */
+ public static String ocrImageContext(String imagePath){
+ //璋冪敤ocr璇嗗埆鏈嶅姟
+ Map<String,Object> jsonMap = new HashMap<>();
+ jsonMap.put("imagePath",imagePath);
+ String requestBody = JSONUtil.toJsonStr(jsonMap);
+ return HttpRequest.post("localhost:8080/ocr/recognize").body(requestBody).execute().body();
+ }
+
public static Object readPngFile(File file) throws IOException, TesseractException {
String canonicalPath32 = FileUtil.file(".", "/jre_32/tessdata").getCanonicalPath();
String canonicalPath64 = FileUtil.file(".", "/jre_64/tessdata").getCanonicalPath();
--
Gitblit v1.9.3