From b1ff3475c44738107398f4e502d9b54aac97fc48 Mon Sep 17 00:00:00 2001
From: zouyu <2723363702@qq.com>
Date: 星期二, 21 十月 2025 17:14:47 +0800
Subject: [PATCH] 数采调整2
---
src/main/java/com/chinaztt/mes/docx/util/TakeWords.java | 296 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
1 files changed, 270 insertions(+), 26 deletions(-)
diff --git a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
index 3d77d75..70f5c15 100644
--- a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
+++ b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
@@ -2,14 +2,21 @@
import cn.hutool.core.io.FileUtil;
import com.chinaztt.mes.docx.dto.GetFileDto;
+import com.opencsv.CSVReader;
+import com.opencsv.CSVReaderBuilder;
+import com.opencsv.exceptions.CsvValidationException;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.hssf.usermodel.HSSFPictureData;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
@@ -18,10 +25,16 @@
import java.nio.file.Files;
import java.sql.*;
import java.util.*;
+import java.util.regex.Pattern;
public class TakeWords {
private static final String splitIdentifier = "@-@"; // 鑷畾涔夊敮涓�鏍囪瘑绗�
+
+ // 绉戝璁℃暟娉曟鍒欐ā寮�
+ private static final Pattern SCIENTIFIC_PATTERN = Pattern.compile(
+ "^[+-]?\\d+(\\.\\d+)?[eE][+-]?\\d+$"
+ );
public static Object readWordFile(File file) {
String result = "";
@@ -44,28 +57,171 @@
return result;
}
- public static Object readExcelFile(File file) throws FileNotFoundException, IOException {
+ public static Object readExcelFile(File file) throws IOException {
StringBuilder result = new StringBuilder();
//鍒涘缓宸ヤ綔绨垮璞�
- XSSFWorkbook xssfWorkbook = new XSSFWorkbook(new FileInputStream(file));
+ XSSFWorkbook xssfWorkbook = new XSSFWorkbook(Files.newInputStream(file.toPath()));
//鑾峰彇宸ヤ綔绨夸笅sheet鐨勪釜鏁� 鍙鍙栫涓�涓猻heet
// int sheetNum = xssfWorkbook.getNumberOfSheets();
//閬嶅巻宸ヤ綔绨夸腑鐨勬墍鏈夋暟鎹�
- for (int i = 0; i < 1; i++) {
- XSSFSheet sheet = xssfWorkbook.getSheetAt(i);
- //鑾峰彇鏈�鍚庝竴琛岀殑num锛屽嵆鎬昏鏁般�傛澶勪粠0寮�濮�
- int maxRow = sheet.getLastRowNum();
- for (int row = 0; row <= maxRow; row++) {
- //鑾峰彇鏈�鍚庡崟鍏冩牸num锛屽嵆鎬诲崟鍏冩牸鏁� ***娉ㄦ剰锛氭澶勪粠1寮�濮嬭鏁�***
- int maxRol = sheet.getRow(row).getLastCellNum();
- StringBuilder aLine = new StringBuilder();
- for (int rol = 0; rol < maxRol; rol++) {
- aLine.append(sheet.getRow(row).getCell(rol)).append(splitIdentifier);
- }
- result.append(aLine).append("\n");
+ XSSFSheet sheet = xssfWorkbook.getSheetAt(0);
+ //鑾峰彇鏈�鍚庝竴琛岀殑num锛屽嵆鎬昏鏁般�傛澶勪粠0寮�濮�
+ int maxRow = sheet.getLastRowNum();
+ for (int row = 1; row <= maxRow; row++) {
+ //鑾峰彇鏈�鍚庡崟鍏冩牸num锛屽嵆鎬诲崟鍏冩牸鏁� ***娉ㄦ剰锛氭澶勪粠1寮�濮嬭鏁�***
+ int maxRol = sheet.getRow(row).getLastCellNum();
+ StringBuilder aLine = new StringBuilder();
+ for (int rol = 0; rol < maxRol; rol++) {
+ aLine.append(sheet.getRow(row).getCell(rol)).append(splitIdentifier);
}
+ result.append(aLine).append("\n");
}
return result.toString();
+ }
+
+ public static Object readExcelxlsFile(File file) throws IOException {
+ String result = "";
+ try (FileInputStream fis = new FileInputStream(file);
+ Workbook workbook = new HSSFWorkbook(fis)) {
+ // 鑾峰彇绗竴涓伐浣滆〃
+ Sheet sheet = workbook.getSheetAt(0);
+ // 璇诲彇鍥剧墖淇℃伅
+ if (workbook instanceof HSSFWorkbook) {
+ HSSFWorkbook hssfWorkbook = (HSSFWorkbook) workbook;
+ List<HSSFPictureData> pictures = hssfWorkbook.getAllPictures();
+ //澶勭悊鏈�鍚庝竴寮犲浘鐗囨暟鎹�
+ HSSFPictureData lastPicture = pictures.get(pictures.size()-1);
+ // 鑾峰彇鍥剧墖绫诲瀷
+ String pictureType = lastPicture.suggestFileExtension();
+ // 鑾峰彇鍥剧墖鏁版嵁
+ byte[] pictureData = lastPicture.getData();
+ // 鍒涘缓涓存椂鏂囦欢
+ File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType);
+ try (FileOutputStream fos = new FileOutputStream(tempFile)) {
+ fos.write(pictureData);
+ }
+ String ocrResult = "";
+ try {
+ ocrResult = (String) readPngFile(tempFile);
+ } catch (TesseractException e) {
+ ocrResult = "OCR璇嗗埆澶辫触: " + e.getMessage();
+ } finally {
+ // 鍒犻櫎涓存椂鏂囦欢
+ tempFile.delete();
+ }
+ result = ocrResult;
+// String ocrText = fixOcrText(ocrResult);
+// result.append("OCR Result:").append(ocrText).append(",");
+
+
+// for (HSSFPictureData picture : pictures) {
+// // 鑾峰彇鍥剧墖绫诲瀷
+// String pictureType = picture.suggestFileExtension();
+// // 鑾峰彇鍥剧墖鏁版嵁
+// byte[] pictureData = picture.getData();
+// // 鍒涘缓涓存椂鏂囦欢
+// File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType);
+// try (FileOutputStream fos = new FileOutputStream(tempFile)) {
+// fos.write(pictureData);
+// }
+// // 鍥剧墖棰勫鐞�
+//// File processedFile = preprocessImage(tempFile, pictureType);
+// // 璋冪敤 readPngFile1 鏂规硶璇诲彇鍥剧墖鏂囧瓧淇℃伅
+// String ocrResult = "";
+// try {
+// ocrResult = (String) readPngFile(tempFile);
+//// ocrResult = (String) readPngFile(tempFile);
+//// ocrResult = (String) readPngFile(processedFile);
+// } catch (TesseractException e) {
+// ocrResult = "OCR璇嗗埆澶辫触: " + e.getMessage();
+// } finally {
+// // 鍒犻櫎涓存椂鏂囦欢
+//// tempFile.delete();
+//// processedFile.delete();
+// }
+//
+// // 灏嗗浘鐗囦俊鎭坊鍔犲埌缁撴灉涓�
+//// result.append("Picture Type: ").append(pictureType)
+//// .append(", Picture Size: ").append(pictureData.length)
+//// .append(" bytes")
+//// .append(", OCR Result: ").append(ocrResult)
+//// .append(",");
+// String ocrText = fixOcrText(ocrResult);
+// result.append("OCR Result:").append(ocrText).append(",");
+// }
+ }
+//
+// // 閬嶅巻姣忎竴琛�
+// for (Row row : sheet) {
+// // 閬嶅巻姣忎竴鍒�
+// for (Cell cell : row) {
+// CellType cellType = CellType.forInt(cell.getCellType());
+// switch (cellType) {
+// case STRING:
+// result.append(cell.getStringCellValue());
+// break;
+// case NUMERIC:
+// if (DateUtil.isCellDateFormatted(cell)) {
+// result.append(cell.getDateCellValue());
+// } else {
+// result.append(cell.getNumericCellValue());
+// }
+// break;
+// case BOOLEAN:
+// result.append(cell.getBooleanCellValue());
+// break;
+// case FORMULA:
+// result.append(cell.getCellFormula());
+// break;
+// default:
+// result.append("");
+// }
+// result.append("\t");
+// }
+// result.append("\n");
+// }
+ }
+ return result;
+
+ }
+
+ // 淇 OCR 璇嗗埆鏂囨湰涓殑閿欒鍏抽敭璇�
+ public static String fixOcrText(String ocrText) {
+ // 瀹氫箟閿欒鍏抽敭璇嶅拰姝g‘鍐呭鐨勬槧灏勶紝杩欓噷澶勭悊鈥滃嚮 瀹� 寮� 搴炩�濅慨姝d负鈥滃嚮绌垮己搴︹��
+ // 鑰冭檻鍒板彲鑳芥湁绌烘牸鍒嗛殧锛岀敤姝e垯鍖归厤鍖呭惈杩欎簺瀛楃殑鍐呭
+ ocrText = ocrText.replaceAll("鍑籠\s*瀹嘰\s*寮篭\s*搴�", "鍑荤┛寮哄害");
+ // 杩樺彲浠ョ户缁坊鍔犲叾浠栭敊璇慨姝o紝姣斿涓嬮潰鍋囪鈥滅數 鍘� \\(HV\\)鈥濋噷鐨勭┖鏍煎奖鍝嶏紝涔熶慨姝d笅
+ ocrText = ocrText.replaceAll("鐢礬\s*鍘媆\s*\\(HV\\)", "鐢靛帇(KV)");
+ ocrText = ocrText.replaceAll("鐢礬\s*娴乗\s*\\(nt\\)", "鐢垫祦(mA)");
+ return ocrText;
+ }
+
+ public static Object readPngFile1(File file) throws IOException, TesseractException {
+ // 鑾峰彇 tessdata 鐩綍鐨勭粷瀵硅矾寰�
+ String arch = System.getProperty("sun.arch.data.model");
+ File tessDataDir;
+ if (arch.contains("32")) {
+ tessDataDir = FileUtil.file(".", "/jre_32/tessdata");
+ } else {
+ tessDataDir = FileUtil.file(".", "/jre_64/tessdata");
+ }
+ String path = tessDataDir.getCanonicalPath();
+ // 妫�鏌� chi_sim.traineddata 鏂囦欢鏄惁瀛樺湪
+ File chiSimFile = new File(path, "chi_sim.traineddata");
+ if (!chiSimFile.exists()) {
+ throw new FileNotFoundException("chi_sim.traineddata 鏂囦欢鏈壘鍒帮紝璇锋鏌ヨ矾寰�: " + chiSimFile.getAbsolutePath());
+ }
+ // 璁剧疆閰嶇疆鏂囦欢澶广�佽瘑鍒瑷�銆佽瘑鍒ā寮�
+ Tesseract tesseract = new Tesseract();
+ tesseract.setDatapath(path);
+ // 璁剧疆璇嗗埆璇█涓轰腑鏂囩畝浣撳拰鑻辨枃锛堝鏋滆璁剧疆涓鸿嫳鏂囧彲鏀逛负 "eng"锛�
+ tesseract.setLanguage("chi_sim+eng");
+ // 浣跨敤 OSD 杩涜鑷姩椤甸潰鍒嗗壊浠ヨ繘琛屽浘鍍忓鐞�
+ tesseract.setPageSegMode(1);
+ // 璁剧疆寮曟搸妯″紡鏄缁忕綉缁� LSTM 寮曟搸
+ tesseract.setOcrEngineMode(1);
+ // 寮�濮嬭瘑鍒暣寮犲浘鐗囦腑鐨勬枃瀛�
+ return tesseract.doOCR(file);
}
public static Object readTxtFile(File file) throws IOException {
@@ -82,23 +238,40 @@
}
public static Object readCsvFile(File file) {
+
StringBuilder stringBuilder = new StringBuilder();
// 鍒涘缓 reader
- try (BufferedReader br = Files.newBufferedReader(file.toPath())) {
- // CSV鏂囦欢鐨勫垎闅旂
- String DELIMITER = ",";
- // 鎸夎璇诲彇
- String line;
- while ((line = br.readLine()) != null) {
- // 鍒嗗壊
- String[] columns = line.split(DELIMITER);
- // 鎵撳嵃琛�
- stringBuilder.append(String.join(splitIdentifier, columns)).append("\n");
+ try (FileReader fileReader = new FileReader(file);
+ CSVReader csvReader = new CSVReaderBuilder(fileReader).build()) {
+
+ String[] nextLine;
+ while ((nextLine = csvReader.readNext()) != null) {
+ // 澶勭悊姣忎竴琛屾暟鎹�
+ for (String cell : nextLine) {
+ if(StringUtils.isNotBlank(cell)){
+ stringBuilder.append(scientificToNumber(cell)).append(splitIdentifier);
+ }
+ }
+ stringBuilder.append("\n");
}
- } catch (IOException ex) {
- ex.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ } catch (CsvValidationException e) {
+ throw new RuntimeException(e);
}
return stringBuilder.toString();
+ }
+
+ /**
+ * 灏嗙瀛﹁鏁版硶杞崲涓烘暟瀛�
+ * @param cell
+ * @return
+ */
+ public static String scientificToNumber(String cell){
+ if(SCIENTIFIC_PATTERN.matcher(cell).matches()){
+ return String.valueOf(Double.parseDouble(cell));
+ }
+ return cell;
}
public static Object readMdbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException {
@@ -144,6 +317,7 @@
}
tableMap.put("data", list);
} catch (Exception e) {
+ e.printStackTrace();
} finally {
closeA1l(conn, preparedStatement, rs);
}
@@ -163,6 +337,74 @@
}
} catch (Exception ignore) {
}
+ }
+
+ public static Object getMysqlFile(GetFileDto getFileDto){
+ Map<String, Object> tableMap = new HashMap<>(16);
+ // 浠� GetFileDto 鑾峰彇鏁版嵁搴撳悕锛屽搴斻�愭枃浠跺悕绉般�戝瓧娈�
+ String dbName = getFileDto.getDbFileName();
+ String user = getFileDto.getDbUserName();
+ String password = getFileDto.getDbPassword();
+ // 浠� GetFileDto 鑾峰彇鏁版嵁琛ㄥ悕锛屽搴斻�愭暟鎹簱琛ㄥ悕銆戝瓧娈�
+ String table = getFileDto.getDbTable();
+ // 妫�鏌ユ暟鎹簱鍚嶅拰琛ㄥ悕鏄惁涓虹┖
+ if (dbName == null || dbName.isEmpty() || table == null || table.isEmpty()) {
+ return R.failed("鏁版嵁搴撳悕鎴栬〃鍚嶄笉鑳戒负绌�");
+ }
+ // 鏁版嵁搴撹繛鎺ヤ俊鎭�
+ String url = "jdbc:mysql://localhost:3306/"+dbName+"?useSSL=false&serverTimezone=GMT%2B8&allowPublicKeyRetrieval=true&characterEncoding=utf8";
+ Connection connection = null;
+ PreparedStatement preparedStatement = null;
+ ResultSet resultSet = null;
+ List<Map<String, Object>> dataList = new ArrayList<>();
+
+ try {
+ // 寤虹珛杩炴帴
+ connection = DriverManager.getConnection(url, user, password);
+ // 鏋勫缓鍩虹 SQL
+
+ String sql = "SELECT * FROM "+table+" WHERE 1=1";
+ if(StringUtils.isNotBlank(getFileDto.getMdbEntrustCode())){
+ sql+=" AND " + getFileDto.getMdbEntrustCode() + " = '" + getFileDto.getEntrustCode()+ "'";
+ }
+ if(StringUtils.isNotBlank(getFileDto.getMdbSampleCode())){
+ sql+=" AND " + getFileDto.getMdbSampleCode() + " = '" + getFileDto.getSampleCode() + "'";
+ }
+ // 鍒涘缓 PreparedStatement 瀵硅薄鎵ц SQL
+ preparedStatement = connection.prepareStatement(sql);
+ resultSet = preparedStatement.executeQuery();
+ ResultSetMetaData metaData = resultSet.getMetaData();
+ int columnCount = metaData.getColumnCount();
+ // 閬嶅巻缁撴灉闆嗚幏鍙栨暟鎹�
+ while (resultSet.next()) {
+ Map<String, Object> rowData = new HashMap<>();
+ for (int i = 1; i <= columnCount; i++) {
+ String columnName = metaData.getColumnName(i);
+ rowData.put(columnName, resultSet.getObject(i));
+ }
+ dataList.add(rowData);
+ }
+ tableMap.put("data", dataList);
+ } catch (Exception e) {
+ e.printStackTrace();
+ // 鍋囪 R 绫绘湁 failed 鏂规硶锛岃嫢娌℃湁闇�琛ュ厖瀹炵幇
+ return R.failed("鏁版嵁搴撴煡璇㈠嚭閿�: " + e.getMessage());
+ } finally {
+ try {
+ if (resultSet != null) {
+ resultSet.close();
+ }
+ if (preparedStatement != null) {
+ preparedStatement.close();
+ }
+ if (connection != null) {
+ connection.close();
+ }
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ }
+ return tableMap;
}
public static Object readDbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException {
@@ -224,6 +466,8 @@
} else {
path = canonicalPath64.replaceAll("/chi_sim.traineddata", "").replaceAll("\\\\", "/");
}
+ // 璁剧疆 TESSDATA_PREFIX 鐜鍙橀噺
+// System.setProperty("TESSDATA_PREFIX", path);
//璁剧疆閰嶇疆鏂囦欢澶瑰井瑙嗐�佽瘑鍒瑷�銆佽瘑鍒ā寮�
Tesseract tesseract = new Tesseract();
tesseract.setDatapath(path);
--
Gitblit v1.9.3