From 866a3e6cbd2df9841dfbbd733e1128938cef3e00 Mon Sep 17 00:00:00 2001
From: zouyu <2723363702@qq.com>
Date: 星期二, 09 十二月 2025 17:45:16 +0800
Subject: [PATCH] 采集器调整
---
src/main/java/com/chinaztt/mes/docx/util/TakeWords.java | 224 ++++++++++++++++++++++++++++++++++++++++++++++++--------
1 files changed, 192 insertions(+), 32 deletions(-)
diff --git a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
index 9d791dd..58fb34a 100644
--- a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
+++ b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
@@ -1,6 +1,8 @@
package com.chinaztt.mes.docx.util;
import cn.hutool.core.io.FileUtil;
+import cn.hutool.http.HttpRequest;
+import cn.hutool.json.JSONUtil;
import com.chinaztt.mes.docx.dto.GetFileDto;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
@@ -9,12 +11,16 @@
import net.sourceforge.tess4j.TesseractException;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
-import org.apache.poi.POIXMLDocument;
-import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.hssf.usermodel.HSSFPictureData;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.ooxml.POIXMLDocument;
+import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.xssf.usermodel.XSSFSheet;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import java.io.*;
@@ -52,19 +58,22 @@
return result;
}
- public static Object readExcelFile(File file) throws FileNotFoundException, IOException {
+ /**
+ * 璇诲彇excel鏂囦欢,鍏煎.xlsx,.xls鏍煎紡
+ * @param file
+ * @return
+ */
+ public static Object readExcelFile(File file) {
StringBuilder result = new StringBuilder();
- //鍒涘缓宸ヤ綔绨垮璞�
- XSSFWorkbook xssfWorkbook = new XSSFWorkbook(new FileInputStream(file));
- //鑾峰彇宸ヤ綔绨夸笅sheet鐨勪釜鏁� 鍙鍙栫涓�涓猻heet
-// int sheetNum = xssfWorkbook.getNumberOfSheets();
- //閬嶅巻宸ヤ綔绨夸腑鐨勬墍鏈夋暟鎹�
- for (int i = 0; i < 1; i++) {
- XSSFSheet sheet = xssfWorkbook.getSheetAt(i);
+ try (
+ FileInputStream fis = new FileInputStream(file);
+ Workbook workbook = WorkbookFactory.create(fis)
+ ) {
+ //閬嶅巻宸ヤ綔绨夸腑鐨勬墍鏈夋暟鎹�
+ Sheet sheet = workbook.getSheetAt(0);
//鑾峰彇鏈�鍚庝竴琛岀殑num锛屽嵆鎬昏鏁般�傛澶勪粠0寮�濮�
int maxRow = sheet.getLastRowNum();
for (int row = 0; row <= maxRow; row++) {
- //鑾峰彇鏈�鍚庡崟鍏冩牸num锛屽嵆鎬诲崟鍏冩牸鏁� ***娉ㄦ剰锛氭澶勪粠1寮�濮嬭鏁�***
int maxRol = sheet.getRow(row).getLastCellNum();
StringBuilder aLine = new StringBuilder();
for (int rol = 0; rol < maxRol; rol++) {
@@ -72,8 +81,79 @@
}
result.append(aLine).append("\n");
}
+ } catch (Exception e) {
+ e.printStackTrace();
}
return result.toString();
+ }
+
+ /**
+ * 璇诲彇excel鏂囦欢涓殑鍥剧墖鍐呭
+ * @param file
+ * @return
+ * @throws IOException
+ */
+ public static Object readPngContextInExcel(File file) throws IOException {
+ String result = "";
+ try (FileInputStream fis = new FileInputStream(file);
+ Workbook workbook = new HSSFWorkbook(fis)) {
+ // 鑾峰彇绗竴涓伐浣滆〃
+ Sheet sheet = workbook.getSheetAt(0);
+ // 璇诲彇鍥剧墖淇℃伅
+ if (workbook instanceof HSSFWorkbook) {
+ HSSFWorkbook hssfWorkbook = (HSSFWorkbook) workbook;
+ List<HSSFPictureData> pictures = hssfWorkbook.getAllPictures();
+ //澶勭悊鏈�鍚庝竴寮犲浘鐗囨暟鎹�
+ HSSFPictureData lastPicture = pictures.get(pictures.size()-1);
+ // 鑾峰彇鍥剧墖绫诲瀷
+ String pictureType = lastPicture.suggestFileExtension();
+ // 鑾峰彇鍥剧墖鏁版嵁
+ byte[] pictureData = lastPicture.getData();
+ // 鍒涘缓涓存椂鏂囦欢
+ File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType);
+ try (FileOutputStream fos = new FileOutputStream(tempFile)) {
+ fos.write(pictureData);
+ }
+ String ocrResult;
+ try {
+ ocrResult = ocrImageContext(tempFile.getAbsolutePath());
+ } finally {
+ // 鍒犻櫎涓存椂鏂囦欢
+ tempFile.delete();
+ }
+ result = ocrResult;
+ }
+ }
+ return result;
+
+ }
+
+ public static Object readPngFile1(File file) throws IOException, TesseractException {
+ // 鑾峰彇 tessdata 鐩綍鐨勭粷瀵硅矾寰�
+ String arch = System.getProperty("sun.arch.data.model");
+ File tessDataDir;
+ if (arch.contains("32")) {
+ tessDataDir = FileUtil.file(".", "/jre_32/tessdata");
+ } else {
+ tessDataDir = FileUtil.file(".", "/jre_64/tessdata");
+ }
+ String path = tessDataDir.getCanonicalPath();
+ // 妫�鏌� chi_sim.traineddata 鏂囦欢鏄惁瀛樺湪
+ File chiSimFile = new File(path, "chi_sim.traineddata");
+ if (!chiSimFile.exists()) {
+ throw new FileNotFoundException("chi_sim.traineddata 鏂囦欢鏈壘鍒帮紝璇锋鏌ヨ矾寰�: " + chiSimFile.getAbsolutePath());
+ }
+ // 璁剧疆閰嶇疆鏂囦欢澶广�佽瘑鍒瑷�銆佽瘑鍒ā寮�
+ Tesseract tesseract = new Tesseract();
+ tesseract.setDatapath(path);
+ // 璁剧疆璇嗗埆璇█涓轰腑鏂囩畝浣撳拰鑻辨枃锛堝鏋滆璁剧疆涓鸿嫳鏂囧彲鏀逛负 "eng"锛�
+ tesseract.setLanguage("chi_sim+eng");
+ // 浣跨敤 OSD 杩涜鑷姩椤甸潰鍒嗗壊浠ヨ繘琛屽浘鍍忓鐞�
+ tesseract.setPageSegMode(1);
+ // 璁剧疆寮曟搸妯″紡鏄缁忕綉缁� LSTM 寮曟搸
+ tesseract.setOcrEngineMode(1);
+ // 寮�濮嬭瘑鍒暣寮犲浘鐗囦腑鐨勬枃瀛�
+ return tesseract.doOCR(file);
}
public static Object readTxtFile(File file) throws IOException {
@@ -83,6 +163,7 @@
StringBuilder stringBuilder = new StringBuilder();
String strTmp = "";
while ((strTmp = buffReader.readLine()) != null) {
+ strTmp = strTmp.replaceAll("\t",",");
stringBuilder.append(strTmp).append("\n");
}
buffReader.close();
@@ -90,23 +171,9 @@
}
public static Object readCsvFile(File file) {
+
StringBuilder stringBuilder = new StringBuilder();
// 鍒涘缓 reader
-// try (BufferedReader br = Files.newBufferedReader(file.toPath())) {
-// // CSV鏂囦欢鐨勫垎闅旂
-// String DELIMITER = ",";
-// // 鎸夎璇诲彇
-// String line;
-// System.out.println(br.readLine());
-// while ((line = br.readLine()) != null) {
-// // 鍒嗗壊
-// String[] columns = line.split(DELIMITER);
-// // 鎵撳嵃琛�
-// stringBuilder.append(String.join(splitIdentifier, columns)).append("\n");
-// }
-// } catch (IOException ex) {
-// ex.printStackTrace();
-// }
try (FileReader fileReader = new FileReader(file);
CSVReader csvReader = new CSVReaderBuilder(fileReader).build()) {
@@ -145,8 +212,8 @@
Properties prop = new Properties();
//璁剧疆缂栫爜
prop.put("charSet", "UTF-8");
- prop.put("user", "");
- prop.put("password", "");
+ prop.put("user", StringUtils.isNotBlank(getFileDto.getDbUserName())?getFileDto.getDbUserName():"");
+ prop.put("password", StringUtils.isNotBlank(getFileDto.getDbPassword())?getFileDto.getDbPassword():"");
//鏁版嵁鍦板潃
String dbUrl = "jdbc:ucanaccess://" + file.getPath();
//寮曞叆椹卞姩
@@ -160,8 +227,6 @@
try {
List<Object> list = new ArrayList<>();
//閬嶅巻鑾峰彇澶氬紶琛ㄦ暟鎹�
-// String s = "select * from " + getFileDto.getDbFileName() + " where 1=1" + getFileDto.getMdbEntrustCode() + " = '" + getFileDto.getEntrustCode() +
-// "' and " + getFileDto.getMdbSampleCode() + " = '" + getFileDto.getSampleCode() + "'";
String s = "select * from " + getFileDto.getDbFileName() + " where 1=1";
if(StringUtils.isNotBlank(getFileDto.getMdbEntrustCode())){
s+=" and " + getFileDto.getMdbEntrustCode() + " = '" + getFileDto.getEntrustCode()+ "'";
@@ -169,6 +234,10 @@
if(StringUtils.isNotBlank(getFileDto.getMdbSampleCode())){
s+=" and " + getFileDto.getMdbSampleCode() + " = '" + getFileDto.getSampleCode() + "'";
}
+ if(StringUtils.isNotBlank(getFileDto.getBushingColorField())){
+ s+=" and " + getFileDto.getBushingColorField() + " = '" + getFileDto.getBushingColor() + "'";
+ }
+
preparedStatement = conn.prepareStatement(s);
rs = preparedStatement.executeQuery();
ResultSetMetaData data = rs.getMetaData();
@@ -183,6 +252,7 @@
}
tableMap.put("data", list);
} catch (Exception e) {
+ e.printStackTrace();
} finally {
closeA1l(conn, preparedStatement, rs);
}
@@ -202,6 +272,81 @@
}
} catch (Exception ignore) {
}
+ }
+
+ public static Object getMysqlFile(GetFileDto getFileDto){
+ Map<String, Object> tableMap = new HashMap<>(16);
+ // 浠� GetFileDto 鑾峰彇鏁版嵁搴撳悕锛屽搴斻�愭枃浠跺悕绉般�戝瓧娈�
+ String dbName = getFileDto.getDbFileName();
+ String user = getFileDto.getDbUserName();
+ String password = getFileDto.getDbPassword();
+ // 浠� GetFileDto 鑾峰彇鏁版嵁琛ㄥ悕锛屽搴斻�愭暟鎹簱琛ㄥ悕銆戝瓧娈�
+ String table = getFileDto.getDbTable();
+ // 妫�鏌ユ暟鎹簱鍚嶅拰琛ㄥ悕鏄惁涓虹┖
+ if (dbName == null || dbName.isEmpty() || table == null || table.isEmpty()) {
+ return R.failed("鏁版嵁搴撳悕鎴栬〃鍚嶄笉鑳戒负绌�");
+ }
+ // 鏁版嵁搴撹繛鎺ヤ俊鎭�
+ String url = "jdbc:mysql://localhost:3306/"+dbName+"?useSSL=false&serverTimezone=GMT%2B8&allowPublicKeyRetrieval=true&characterEncoding=utf8";
+ Connection connection = null;
+ PreparedStatement preparedStatement = null;
+ ResultSet resultSet = null;
+ List<Map<String, Object>> dataList = new ArrayList<>();
+
+ try {
+ // 寤虹珛杩炴帴
+ connection = DriverManager.getConnection(url, user, password);
+ // 鏋勫缓鍩虹 SQL
+
+ String sql = "SELECT * FROM "+table+" WHERE 1=1";
+ if(StringUtils.isNotBlank(getFileDto.getMdbEntrustCode()) ){
+ sql+=" AND (" + getFileDto.getMdbEntrustCode() + " = TRIM('" + getFileDto.getEntrustCode()+ "')";
+ if(StringUtils.isNotBlank(getFileDto.getLotBatchNo())){
+ sql+=" OR "+ getFileDto.getMdbEntrustCode() + " = TRIM('" + getFileDto.getLotBatchNo()+ "')";
+ }
+ sql+=")";
+ }
+ if(StringUtils.isNotBlank(getFileDto.getMdbSampleCode())){
+ sql+=" AND " + getFileDto.getMdbSampleCode() + " = '" + getFileDto.getSampleCode() + "'";
+ }
+ if(StringUtils.isNotBlank(getFileDto.getCableTag())){
+ sql+=" AND Color = '" + getFileDto.getCableTag() + "'";
+ }
+ // 鍒涘缓 PreparedStatement 瀵硅薄鎵ц SQL
+ preparedStatement = connection.prepareStatement(sql);
+ resultSet = preparedStatement.executeQuery();
+ ResultSetMetaData metaData = resultSet.getMetaData();
+ int columnCount = metaData.getColumnCount();
+ // 閬嶅巻缁撴灉闆嗚幏鍙栨暟鎹�
+ while (resultSet.next()) {
+ Map<String, Object> rowData = new HashMap<>();
+ for (int i = 1; i <= columnCount; i++) {
+ String columnName = metaData.getColumnName(i);
+ rowData.put(columnName, resultSet.getObject(i));
+ }
+ dataList.add(rowData);
+ }
+ tableMap.put("data", dataList);
+ } catch (Exception e) {
+ e.printStackTrace();
+ // 鍋囪 R 绫绘湁 failed 鏂规硶锛岃嫢娌℃湁闇�琛ュ厖瀹炵幇
+ return R.failed("鏁版嵁搴撴煡璇㈠嚭閿�: " + e.getMessage());
+ } finally {
+ try {
+ if (resultSet != null) {
+ resultSet.close();
+ }
+ if (preparedStatement != null) {
+ preparedStatement.close();
+ }
+ if (connection != null) {
+ connection.close();
+ }
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ }
+ return tableMap;
}
public static Object readDbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException {
@@ -252,6 +397,19 @@
return tableMap;
}
+ /**
+ * ocr璇嗗埆鍥剧墖鍐呭
+ * @param imagePath 鍥剧墖璺緞
+ * @return
+ */
+ public static String ocrImageContext(String imagePath){
+ //璋冪敤ocr璇嗗埆鏈嶅姟
+ Map<String,Object> jsonMap = new HashMap<>();
+ jsonMap.put("imagePath",imagePath);
+ String requestBody = JSONUtil.toJsonStr(jsonMap);
+ return HttpRequest.post("localhost:8080/ocr/recognize").body(requestBody).execute().body();
+ }
+
public static Object readPngFile(File file) throws IOException, TesseractException {
String canonicalPath32 = FileUtil.file(".", "/jre_32/tessdata").getCanonicalPath();
String canonicalPath64 = FileUtil.file(".", "/jre_64/tessdata").getCanonicalPath();
@@ -263,6 +421,8 @@
} else {
path = canonicalPath64.replaceAll("/chi_sim.traineddata", "").replaceAll("\\\\", "/");
}
+ // 璁剧疆 TESSDATA_PREFIX 鐜鍙橀噺
+// System.setProperty("TESSDATA_PREFIX", path);
//璁剧疆閰嶇疆鏂囦欢澶瑰井瑙嗐�佽瘑鍒瑷�銆佽瘑鍒ā寮�
Tesseract tesseract = new Tesseract();
tesseract.setDatapath(path);
--
Gitblit v1.9.3