From 2d1e909070e2baaa49d3a021364df595ff1d2e72 Mon Sep 17 00:00:00 2001
From: yaowanxin <3588231647@qq.com>
Date: 星期六, 19 七月 2025 11:01:32 +0800
Subject: [PATCH] 读取
---
src/main/java/com/chinaztt/mes/docx/dto/ThicknessData.java | 22 +++
src/main/java/com/chinaztt/mes/docx/service/impl/DocxServiceImpl.java | 9 +
src/main/resources/META-INF/MANIFEST.MF | 3
pom.xml | 28 ++++
src/main/java/com/chinaztt/mes/docx/service/DocxService.java | 2
src/main/java/com/chinaztt/mes/docx/dto/GetFileDto.java | 6
src/main/java/com/chinaztt/mes/docx/util/TakeWords.java | 242 ++++++++++++++++++++++++++++++++++++++++
7 files changed, 309 insertions(+), 3 deletions(-)
diff --git a/pom.xml b/pom.xml
index 0ea035f..542ce7c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -14,6 +14,16 @@
<spring-boot.version>2.6.3</spring-boot.version>
</properties>
<dependencies>
+ <dependency>
+ <groupId>com.alibaba</groupId>
+ <artifactId>fastjson</artifactId>
+ <version>2.0.23</version>
+ </dependency>
+
+ <dependency>
+ <groupId>mysql</groupId>
+ <artifactId>mysql-connector-java</artifactId>
+ </dependency>
<!--lombok-->
<dependency>
<groupId>org.projectlombok</groupId>
@@ -25,6 +35,24 @@
<artifactId>tess4j</artifactId>
<version>5.12.0</version>
</dependency>
+ <dependency>
+ <groupId>org.bytedeco</groupId>
+ <artifactId>javacv-platform</artifactId>
+ <!-- 鎸囧畾鍏蜂綋鐗堟湰鍙� -->
+ <version>1.5.10</version>
+ </dependency>
+ <!-- 澶勭悊 .xls (OLE2) 鏍煎紡 -->
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi</artifactId>
+ <version>5.2.3</version>
+ </dependency>
+ <!-- 澶勭悊 .xlsx (OOXML) 鏍煎紡 -->
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi-ooxml</artifactId>
+ <version>5.2.3</version>
+ </dependency>
<dependency>
<groupId>org.xerial</groupId>
diff --git a/src/main/java/com/chinaztt/mes/docx/dto/GetFileDto.java b/src/main/java/com/chinaztt/mes/docx/dto/GetFileDto.java
index d219560..93c054e 100644
--- a/src/main/java/com/chinaztt/mes/docx/dto/GetFileDto.java
+++ b/src/main/java/com/chinaztt/mes/docx/dto/GetFileDto.java
@@ -16,10 +16,14 @@
private String mdbEntrustCode;
// 鏍峰搧缂栧彿瀛楁
private String mdbSampleCode;
- // db鏂囦欢鍚嶇О
+ // db鏂囦欢鍚嶇О鏁版嵁搴撳悕绉�
private String dbFileName;
// 鍏夌氦甯﹀瓧娈�
private String fiberOpticRibbonCode;
// 鍏夌氦甯︽暟鎹�
private String fiberOpticRibbon;
+ //鏁版嵁搴撶敤鎴峰悕
+ private String dbUserName;
+ //鏁版嵁搴撳瘑鐮�
+ private String dbPassword;
}
diff --git a/src/main/java/com/chinaztt/mes/docx/dto/ThicknessData.java b/src/main/java/com/chinaztt/mes/docx/dto/ThicknessData.java
new file mode 100644
index 0000000..b0c6125
--- /dev/null
+++ b/src/main/java/com/chinaztt/mes/docx/dto/ThicknessData.java
@@ -0,0 +1,22 @@
+package com.chinaztt.mes.docx.dto;
+
+import lombok.Data;
+
+@Data
+public class ThicknessData {
+ private double thinnestPoint;
+ private double averageThickness;
+
+ public ThicknessData(double thinnestPoint, double averageThickness) {
+ this.thinnestPoint = thinnestPoint;
+ this.averageThickness = averageThickness;
+ }
+
+ public double getThinnestPoint() {
+ return thinnestPoint;
+ }
+
+ public double getAverageThickness() {
+ return averageThickness;
+ }
+}
diff --git a/src/main/java/com/chinaztt/mes/docx/service/DocxService.java b/src/main/java/com/chinaztt/mes/docx/service/DocxService.java
index 48a0814..1c51019 100644
--- a/src/main/java/com/chinaztt/mes/docx/service/DocxService.java
+++ b/src/main/java/com/chinaztt/mes/docx/service/DocxService.java
@@ -11,4 +11,6 @@
R<?> getFile(GetFileDto getFileDto) throws IOException, SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException, TesseractException;
R<?> moveFile(String startFilePath, String endFilePath, String fileType);
+
+
}
diff --git a/src/main/java/com/chinaztt/mes/docx/service/impl/DocxServiceImpl.java b/src/main/java/com/chinaztt/mes/docx/service/impl/DocxServiceImpl.java
index 1843c1a..4a45243 100644
--- a/src/main/java/com/chinaztt/mes/docx/service/impl/DocxServiceImpl.java
+++ b/src/main/java/com/chinaztt/mes/docx/service/impl/DocxServiceImpl.java
@@ -3,6 +3,7 @@
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.ObjectUtil;
import com.chinaztt.mes.docx.dto.GetFileDto;
+import com.chinaztt.mes.docx.dto.ThicknessData;
import com.chinaztt.mes.docx.service.DocxService;
import com.chinaztt.mes.docx.util.R;
import com.chinaztt.mes.docx.util.TakeWords;
@@ -11,7 +12,7 @@
import org.springframework.stereotype.Service;
import java.io.*;
-import java.sql.SQLException;
+import java.sql.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
@@ -28,6 +29,9 @@
switch (getFileDto.getFileExtension()) {
case ".docx":
return R.ok(TakeWords.readWordFile(file));
+ //鍚庣紑涓�.xls鐨勬枃浠�
+ case ".xls":
+ return R.ok(TakeWords.readExcelxlsFile(file));
case ".xlsx":
try {
return R.ok(TakeWords.readExcelFile(file));
@@ -47,7 +51,7 @@
if (ObjectUtil.isEmpty(getFileDto.getDbFileName()) || Objects.equals(getFileDto.getDbFileName(), "null")) {
return R.failed("鏈厤缃�.db閲囬泦鏂囦欢鍚嶇О锛�");
}
- return R.ok(TakeWords.readDbFile(file, getFileDto));
+ return R.ok(TakeWords.getmysqlFile(getFileDto));
case ".png":
return R.ok(TakeWords.readPngFile(file));
default:
@@ -80,6 +84,7 @@
}
}
+
public static File getLatestFile(List<File> files) {
File latestFile = null;
long latestTime = 0;
diff --git a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
index 3d77d75..39ba107 100644
--- a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
+++ b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
@@ -2,22 +2,34 @@
import cn.hutool.core.io.FileUtil;
import com.chinaztt.mes.docx.dto.GetFileDto;
+import com.chinaztt.mes.docx.dto.ThicknessData;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.hssf.usermodel.HSSFPicture;
+import org.apache.poi.hssf.usermodel.HSSFPictureData;
+import org.apache.poi.hssf.usermodel.HSSFSheet;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import java.awt.Color;
+import java.awt.image.BufferedImage;
+import javax.imageio.ImageIO;
import java.io.*;
import java.nio.file.Files;
import java.sql.*;
import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
public class TakeWords {
@@ -68,6 +80,205 @@
return result.toString();
}
+ public static Object readExcelxlsFile(File file) throws IOException {
+ StringBuilder result = new StringBuilder();
+ try (FileInputStream fis = new FileInputStream(file);
+ Workbook workbook = new HSSFWorkbook(fis)) {
+ // 鑾峰彇绗竴涓伐浣滆〃
+ Sheet sheet = workbook.getSheetAt(0);
+ // 璇诲彇鍥剧墖淇℃伅
+ if (workbook instanceof HSSFWorkbook) {
+ HSSFWorkbook hssfWorkbook = (HSSFWorkbook) workbook;
+ List<HSSFPictureData> pictures = hssfWorkbook.getAllPictures();
+ for (HSSFPictureData picture : pictures) {
+ // 鑾峰彇鍥剧墖绫诲瀷
+ String pictureType = picture.suggestFileExtension();
+ // 鑾峰彇鍥剧墖鏁版嵁
+ byte[] pictureData = picture.getData();
+ // 鍒涘缓涓存椂鏂囦欢
+ File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType);
+ try (FileOutputStream fos = new FileOutputStream(tempFile)) {
+ fos.write(pictureData);
+ }
+ // 鍥剧墖棰勫鐞�
+ File processedFile = preprocessImage(tempFile, pictureType);
+// ocrResult = (String) readPngFile(tempFile);
+
+ // 璋冪敤 readPngFile1 鏂规硶璇诲彇鍥剧墖鏂囧瓧淇℃伅
+ String ocrResult = "";
+ try {
+ ocrResult = (String) readPngFile1(tempFile);
+// ocrResult = (String) readPngFile1(processedFile);
+ } catch (TesseractException e) {
+ ocrResult = "OCR璇嗗埆澶辫触: " + e.getMessage();
+ } finally {
+ // 鍒犻櫎涓存椂鏂囦欢
+ tempFile.delete();
+ processedFile.delete();
+ }
+
+ // 灏嗗浘鐗囦俊鎭坊鍔犲埌缁撴灉涓�
+// result.append("Picture Type: ").append(pictureType)
+// .append(", Picture Size: ").append(pictureData.length)
+// .append(" bytes")
+// .append(", OCR Result: ").append(ocrResult)
+// .append(",");
+ String ocrText = fixOcrText(ocrResult);
+ result.append("OCR Result:").append(ocrText).append(",");
+ }
+ }
+//
+// // 閬嶅巻姣忎竴琛�
+// for (Row row : sheet) {
+// // 閬嶅巻姣忎竴鍒�
+// for (Cell cell : row) {
+// CellType cellType = CellType.forInt(cell.getCellType());
+// switch (cellType) {
+// case STRING:
+// result.append(cell.getStringCellValue());
+// break;
+// case NUMERIC:
+// if (DateUtil.isCellDateFormatted(cell)) {
+// result.append(cell.getDateCellValue());
+// } else {
+// result.append(cell.getNumericCellValue());
+// }
+// break;
+// case BOOLEAN:
+// result.append(cell.getBooleanCellValue());
+// break;
+// case FORMULA:
+// result.append(cell.getCellFormula());
+// break;
+// default:
+// result.append("");
+// }
+// result.append("\t");
+// }
+// result.append("\n");
+// }
+ }
+ return result;
+
+ }
+
+ // 淇 OCR 璇嗗埆鏂囨湰涓殑閿欒鍏抽敭璇�
+ public static String fixOcrText(String ocrText) {
+ // 瀹氫箟閿欒鍏抽敭璇嶅拰姝g‘鍐呭鐨勬槧灏勶紝杩欓噷澶勭悊鈥滃嚮 瀹� 寮� 搴炩�濅慨姝d负鈥滃嚮绌垮己搴︹��
+ // 鑰冭檻鍒板彲鑳芥湁绌烘牸鍒嗛殧锛岀敤姝e垯鍖归厤鍖呭惈杩欎簺瀛楃殑鍐呭
+ ocrText = ocrText.replaceAll("鍑籠\s*瀹嘰\s*寮篭\s*搴�", "鍑荤┛寮哄害");
+ // 杩樺彲浠ョ户缁坊鍔犲叾浠栭敊璇慨姝o紝姣斿涓嬮潰鍋囪鈥滅數 鍘� \\(HV\\)鈥濋噷鐨勭┖鏍煎奖鍝嶏紝涔熶慨姝d笅
+ ocrText = ocrText.replaceAll("鐢礬\s*鍘媆\s*\\(HV\\)", "鐢靛帇(KV)");
+ ocrText = ocrText.replaceAll("鐢礬\s*娴乗\s*\\(nt\\)", "鐢垫祦(mA)");
+ return ocrText;
+ }
+
+ /**
+ * 瀵瑰浘鐗囪繘琛岄澶勭悊锛屽寘鎷伆搴﹀寲銆佷簩鍊煎寲鍜岄攼鍖�
+ * @param inputFile 杈撳叆鐨勫浘鐗囨枃浠�
+ * @param formatName 鍥剧墖鏍煎紡鍚嶇О
+ * @return 澶勭悊鍚庣殑鍥剧墖鏂囦欢
+ * @throws IOException 璇诲彇鎴栧啓鍏ュ浘鐗囨椂鍙兘鎶涘嚭鐨勫紓甯�
+ */
+ private static File preprocessImage(File inputFile, String formatName) throws IOException {
+ // 璇诲彇鍥剧墖
+ BufferedImage image = ImageIO.read(inputFile);
+
+ // 鐏板害鍖�
+ image = convertToGrayscale(image);
+ // 浜屽�煎寲
+ image = applyThreshold(image, 128);
+ // 閿愬寲
+ image = applySharpening(image);
+
+ // 鍒涘缓澶勭悊鍚庣殑涓存椂鏂囦欢
+ File outputFile = File.createTempFile(UUID.randomUUID().toString(), "." + formatName);
+ ImageIO.write(image, formatName, outputFile);
+ return outputFile;
+ }
+
+ /**
+ * 灏嗗浘鐗囪浆鎹负鐏板害鍥�
+ * @param image 杈撳叆鐨勫浘鐗�
+ * @return 鐏板害鍖栧悗鐨勫浘鐗�
+ */
+ private static BufferedImage convertToGrayscale(BufferedImage image) {
+ BufferedImage grayImage = new BufferedImage(
+ image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
+ grayImage.getGraphics().drawImage(image, 0, 0, null);
+ return grayImage;
+ }
+
+ /**
+ * 瀵瑰浘鐗囪繘琛屼簩鍊煎寲澶勭悊
+ * @param image 杈撳叆鐨勫浘鐗�
+ * @param threshold 浜屽�煎寲闃堝��
+ * @return 浜屽�煎寲鍚庣殑鍥剧墖
+ */
+ private static BufferedImage applyThreshold(BufferedImage image, int threshold) {
+ BufferedImage binaryImage = new BufferedImage(
+ image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY);
+ for (int y = 0; y < image.getHeight(); y++) {
+ for (int x = 0; x < image.getWidth(); x++) {
+ int rgb = image.getRGB(x, y);
+ int gray = (rgb >> 16) & 0xff;
+ if (gray < threshold) {
+ binaryImage.setRGB(x, y, Color.BLACK.getRGB());
+ } else {
+ binaryImage.setRGB(x, y, Color.WHITE.getRGB());
+ }
+ }
+ }
+ return binaryImage;
+ }
+
+ /**
+ * 瀵瑰浘鐗囪繘琛岄攼鍖栧鐞�
+ * @param image 杈撳叆鐨勫浘鐗�
+ * @return 閿愬寲鍚庣殑鍥剧墖
+ */
+ private static BufferedImage applySharpening(BufferedImage image) {
+ float[] sharpenMatrix = {
+ 0f, -1f, 0f,
+ -1f, 5f, -1f,
+ 0f, -1f, 0f
+ };
+ java.awt.image.Kernel kernel = new java.awt.image.Kernel(3, 3, sharpenMatrix);
+ java.awt.image.ConvolveOp op = new java.awt.image.ConvolveOp(kernel, java.awt.image.ConvolveOp.EDGE_NO_OP, null);
+ return op.filter(image, null);
+ }
+
+ public static Object readPngFile1(File file) throws IOException, TesseractException {
+ // 鑾峰彇 tessdata 鐩綍鐨勭粷瀵硅矾寰�
+ String arch = System.getProperty("sun.arch.data.model");
+ File tessDataDir;
+ if (arch.contains("32")) {
+ tessDataDir = FileUtil.file(".", "/jre_32/tessdata");
+ } else {
+ tessDataDir = FileUtil.file(".", "/jre_64/tessdata");
+ }
+ String path = tessDataDir.getCanonicalPath();
+ // 妫�鏌� chi_sim.traineddata 鏂囦欢鏄惁瀛樺湪
+ File chiSimFile = new File(path, "chi_sim.traineddata");
+ if (!chiSimFile.exists()) {
+ throw new FileNotFoundException("chi_sim.traineddata 鏂囦欢鏈壘鍒帮紝璇锋鏌ヨ矾寰�: " + chiSimFile.getAbsolutePath());
+ }
+ // 璁剧疆閰嶇疆鏂囦欢澶广�佽瘑鍒瑷�銆佽瘑鍒ā寮�
+ Tesseract tesseract = new Tesseract();
+ tesseract.setDatapath(path);
+ // 璁剧疆璇嗗埆璇█涓轰腑鏂囩畝浣撳拰鑻辨枃锛堝鏋滆璁剧疆涓鸿嫳鏂囧彲鏀逛负 "eng"锛�
+ tesseract.setLanguage("chi_sim+eng");
+ // 浣跨敤 OSD 杩涜鑷姩椤甸潰鍒嗗壊浠ヨ繘琛屽浘鍍忓鐞�
+ tesseract.setPageSegMode(1);
+ // 璁剧疆寮曟搸妯″紡鏄缁忕綉缁� LSTM 寮曟搸
+ tesseract.setOcrEngineMode(1);
+ // 寮�濮嬭瘑鍒暣寮犲浘鐗囦腑鐨勬枃瀛�
+ return tesseract.doOCR(file);
+ }
+
+
+
+
public static Object readTxtFile(File file) throws IOException {
FileInputStream fin = new FileInputStream(file);
InputStreamReader reader = new InputStreamReader(fin);
@@ -82,6 +293,7 @@
}
public static Object readCsvFile(File file) {
+
StringBuilder stringBuilder = new StringBuilder();
// 鍒涘缓 reader
try (BufferedReader br = Files.newBufferedReader(file.toPath())) {
@@ -164,6 +376,36 @@
} catch (Exception ignore) {
}
}
+ public static Object getmysqlFile(GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+ Map<String, Object> tableMap = new HashMap<>(16);
+ // 鏁版嵁搴撹繛鎺ヤ俊鎭�
+ String url = "jdbc:mysql://localhost:3306/"+getFileDto.getDbFileName()+"?useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true";
+ String user = getFileDto.getDbUserName();
+ String password = getFileDto.getDbPassword();
+ List<ThicknessData> dataList = new ArrayList<>();
+
+ try (
+ // 寤虹珛杩炴帴
+ Connection connection = DriverManager.getConnection(url, user, password);
+ // 鍒涘缓 Statement 瀵硅薄鎵ц SQL
+ Statement statement = connection.createStatement()
+ ) {
+ String sql = "SELECT ThinnestPoint, AverageThickness FROM model1records";
+ ResultSet resultSet = statement.executeQuery(sql);
+
+ // 閬嶅巻缁撴灉闆嗚幏鍙栨暟鎹�
+ while (resultSet.next()) {
+ double thinnestPoint = resultSet.getDouble("ThinnestPoint");
+ double averageThickness = resultSet.getDouble("AverageThickness");
+ dataList.add(new ThicknessData(thinnestPoint, averageThickness));
+ }
+ tableMap.put("data", dataList);
+ } catch (Exception e) {
+ e.printStackTrace();
+ return R.failed("鏁版嵁搴撴煡璇㈠嚭閿�: " + e.getMessage());
+ }
+ return tableMap;
+ }
public static Object readDbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException {
Map<String, Object> tableMap = new HashMap<>(16);
diff --git a/src/main/resources/META-INF/MANIFEST.MF b/src/main/resources/META-INF/MANIFEST.MF
new file mode 100644
index 0000000..66f3248
--- /dev/null
+++ b/src/main/resources/META-INF/MANIFEST.MF
@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+Main-Class: com.chinaztt.mes.docx.DataAcquisitionApplication
+
--
Gitblit v1.9.3