2e994c878f976e4e8a1c92fea9b28cdf8048ac73..d1df2c7db4331a760303294cfdc5bae1d810867f
2025-07-21 zouyu
Merge branch 'master' into ywx
d1df2c 对比 | 目录
2025-07-19 yaowanxin
修改读取图片
c60a4e 对比 | 目录
2025-07-19 yaowanxin
读取
2d1e90 对比 | 目录
已修改5个文件
已添加2个文件
238 ■■■■■ 文件已修改
pom.xml 28 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/chinaztt/mes/docx/dto/GetFileDto.java 6 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/chinaztt/mes/docx/dto/ThicknessData.java 22 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/chinaztt/mes/docx/service/DocxService.java 2 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/chinaztt/mes/docx/service/impl/DocxServiceImpl.java 9 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/chinaztt/mes/docx/util/TakeWords.java 168 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/resources/META-INF/MANIFEST.MF 3 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
pom.xml
@@ -14,6 +14,16 @@
        <spring-boot.version>2.6.3</spring-boot.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>2.0.23</version>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
        </dependency>
        <!--lombok-->
        <dependency>
            <groupId>org.projectlombok</groupId>
@@ -25,6 +35,24 @@
            <artifactId>tess4j</artifactId>
            <version>5.12.0</version>
        </dependency>
        <dependency>
            <groupId>org.bytedeco</groupId>
            <artifactId>javacv-platform</artifactId>
            <!-- æŒ‡å®šå…·ä½“版本号 -->
            <version>1.5.10</version>
        </dependency>
        <!-- å¤„理 .xls (OLE2) æ ¼å¼ -->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>5.2.3</version>
        </dependency>
        <!-- å¤„理 .xlsx (OOXML) æ ¼å¼ -->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>5.2.3</version>
        </dependency>
        <dependency>
            <groupId>org.xerial</groupId>
src/main/java/com/chinaztt/mes/docx/dto/GetFileDto.java
@@ -22,8 +22,8 @@
    private String fiberOpticRibbonCode;
    // å…‰çº¤å¸¦æ•°æ®
    private String fiberOpticRibbon;
    //数据库连接账号
    private String dbUser;
    //数据库连接密码
    //数据库用户名
    private String dbUserName;
    //数据库密码
    private String dbPassword;
}
src/main/java/com/chinaztt/mes/docx/dto/ThicknessData.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,22 @@
package com.chinaztt.mes.docx.dto;
import lombok.Data;
@Data
public class ThicknessData {
    private double thinnestPoint;
    private double averageThickness;
    public ThicknessData(double thinnestPoint, double averageThickness) {
        this.thinnestPoint = thinnestPoint;
        this.averageThickness = averageThickness;
    }
    public double getThinnestPoint() {
        return thinnestPoint;
    }
    public double getAverageThickness() {
        return averageThickness;
    }
}
src/main/java/com/chinaztt/mes/docx/service/DocxService.java
@@ -11,4 +11,6 @@
    R<?> getFile(GetFileDto getFileDto) throws IOException, SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException, TesseractException;
    R<?> moveFile(String startFilePath, String endFilePath, String fileType);
}
src/main/java/com/chinaztt/mes/docx/service/impl/DocxServiceImpl.java
@@ -3,6 +3,7 @@
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.ObjectUtil;
import com.chinaztt.mes.docx.dto.GetFileDto;
import com.chinaztt.mes.docx.dto.ThicknessData;
import com.chinaztt.mes.docx.service.DocxService;
import com.chinaztt.mes.docx.util.R;
import com.chinaztt.mes.docx.util.TakeWords;
@@ -11,7 +12,7 @@
import org.springframework.stereotype.Service;
import java.io.*;
import java.sql.SQLException;
import java.sql.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
@@ -28,6 +29,9 @@
        switch (getFileDto.getFileExtension()) {
            case ".docx":
                return R.ok(TakeWords.readWordFile(file));
            //后缀为.xls的文件
            case ".xls":
                return R.ok(TakeWords.readExcelxlsFile(file));
            case ".xlsx":
                try {
                    return R.ok(TakeWords.readExcelFile(file));
@@ -47,7 +51,7 @@
                if (ObjectUtil.isEmpty(getFileDto.getDbFileName()) || Objects.equals(getFileDto.getDbFileName(), "null")) {
                    return R.failed("未配置.db采集文件名称!");
                }
                return R.ok(TakeWords.readDbFile(file, getFileDto));
                return R.ok(TakeWords.getmysqlFile(getFileDto));
            case ".png":
                return R.ok(TakeWords.readPngFile(file));
            default:
@@ -80,6 +84,7 @@
        }
    }
    public static File getLatestFile(List<File> files) {
        File latestFile = null;
        long latestTime = 0;
src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
@@ -5,21 +5,33 @@
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import com.opencsv.exceptions.CsvValidationException;
import com.chinaztt.mes.docx.dto.ThicknessData;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hssf.usermodel.HSSFPicture;
import org.apache.poi.hssf.usermodel.HSSFPictureData;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import java.awt.Color;
import java.awt.image.BufferedImage;
import javax.imageio.ImageIO;
import java.io.*;
import java.sql.*;
import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TakeWords {
@@ -76,6 +88,129 @@
        return result.toString();
    }
    public static Object readExcelxlsFile(File file) throws IOException {
        StringBuilder result = new StringBuilder();
        try (FileInputStream fis = new FileInputStream(file);
             Workbook workbook = new HSSFWorkbook(fis)) {
            // èŽ·å–ç¬¬ä¸€ä¸ªå·¥ä½œè¡¨
            Sheet sheet = workbook.getSheetAt(0);
            // è¯»å–图片信息
            if (workbook instanceof HSSFWorkbook) {
                HSSFWorkbook hssfWorkbook = (HSSFWorkbook) workbook;
                List<HSSFPictureData> pictures = hssfWorkbook.getAllPictures();
                for (HSSFPictureData picture : pictures) {
                    // èŽ·å–å›¾ç‰‡ç±»åž‹
                    String pictureType = picture.suggestFileExtension();
                    // èŽ·å–å›¾ç‰‡æ•°æ®
                    byte[] pictureData = picture.getData();
                    // åˆ›å»ºä¸´æ—¶æ–‡ä»¶
                    File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType);
                    try (FileOutputStream fos = new FileOutputStream(tempFile)) {
                        fos.write(pictureData);
                    }
                    // å›¾ç‰‡é¢„处理
//                    File processedFile = preprocessImage(tempFile, pictureType);
                    // è°ƒç”¨ readPngFile1 æ–¹æ³•读取图片文字信息
                    String ocrResult = "";
                    try {
                        ocrResult = (String) readPngFile1(tempFile);
//                        ocrResult = (String) readPngFile(tempFile);
//                        ocrResult = (String) readPngFile(processedFile);
                    } catch (TesseractException e) {
                        ocrResult = "OCR识别失败: " + e.getMessage();
                    } finally {
                        // åˆ é™¤ä¸´æ—¶æ–‡ä»¶
                        tempFile.delete();
//                        processedFile.delete();
                    }
                    // å°†å›¾ç‰‡ä¿¡æ¯æ·»åŠ åˆ°ç»“æžœä¸­
//                    result.append("Picture Type: ").append(pictureType)
//                            .append(", Picture Size: ").append(pictureData.length)
//                            .append(" bytes")
//                            .append(", OCR Result: ").append(ocrResult)
//                            .append(",");
                    String ocrText = fixOcrText(ocrResult);
                    result.append("OCR Result:").append(ocrText).append(",");
                }
            }
//
//            // éåŽ†æ¯ä¸€è¡Œ
//            for (Row row : sheet) {
//                // éåŽ†æ¯ä¸€åˆ—
//                for (Cell cell : row) {
//                    CellType cellType = CellType.forInt(cell.getCellType());
//                    switch (cellType) {
//                        case STRING:
//                            result.append(cell.getStringCellValue());
//                            break;
//                        case NUMERIC:
//                            if (DateUtil.isCellDateFormatted(cell)) {
//                                result.append(cell.getDateCellValue());
//                            } else {
//                                result.append(cell.getNumericCellValue());
//                            }
//                            break;
//                        case BOOLEAN:
//                            result.append(cell.getBooleanCellValue());
//                            break;
//                        case FORMULA:
//                            result.append(cell.getCellFormula());
//                            break;
//                        default:
//                            result.append("");
//                    }
//                    result.append("\t");
//                }
//                result.append("\n");
//            }
        }
        return result;
    }
    // ä¿®æ­£ OCR è¯†åˆ«æ–‡æœ¬ä¸­çš„错误关键词
    public static String fixOcrText(String ocrText) {
        // å®šä¹‰é”™è¯¯å…³é”®è¯å’Œæ­£ç¡®å†…容的映射,这里处理“击 å®‡ å¼º åºžâ€ä¿®æ­£ä¸ºâ€œå‡»ç©¿å¼ºåº¦â€
        // è€ƒè™‘到可能有空格分隔,用正则匹配包含这些字的内容
        ocrText = ocrText.replaceAll("击\\s*宇\\s*强\\s*庞", "击穿强度");
        // è¿˜å¯ä»¥ç»§ç»­æ·»åŠ å…¶ä»–é”™è¯¯ä¿®æ­£ï¼Œæ¯”å¦‚ä¸‹é¢å‡è®¾â€œç”µ åŽ‹ \\(HV\\)”里的空格影响,也修正下
        ocrText = ocrText.replaceAll("电\\s*压\\s*\\(HV\\)", "电压(KV)");
        ocrText = ocrText.replaceAll("电\\s*流\\s*\\(nt\\)", "电流(mA)");
        return ocrText;
    }
    public static Object readPngFile1(File file) throws IOException, TesseractException {
        // èŽ·å– tessdata ç›®å½•的绝对路径
        String arch = System.getProperty("sun.arch.data.model");
        File tessDataDir;
        if (arch.contains("32")) {
            tessDataDir = FileUtil.file(".", "/jre_32/tessdata");
        } else {
            tessDataDir = FileUtil.file(".", "/jre_64/tessdata");
        }
        String path = tessDataDir.getCanonicalPath();
        // æ£€æŸ¥ chi_sim.traineddata æ–‡ä»¶æ˜¯å¦å­˜åœ¨
        File chiSimFile = new File(path, "chi_sim.traineddata");
        if (!chiSimFile.exists()) {
            throw new FileNotFoundException("chi_sim.traineddata æ–‡ä»¶æœªæ‰¾åˆ°ï¼Œè¯·æ£€æŸ¥è·¯å¾„: " + chiSimFile.getAbsolutePath());
        }
        // è®¾ç½®é…ç½®æ–‡ä»¶å¤¹ã€è¯†åˆ«è¯­è¨€ã€è¯†åˆ«æ¨¡å¼
        Tesseract tesseract = new Tesseract();
        tesseract.setDatapath(path);
        // è®¾ç½®è¯†åˆ«è¯­è¨€ä¸ºä¸­æ–‡ç®€ä½“和英文(如果要设置为英文可改为 "eng")
        tesseract.setLanguage("chi_sim+eng");
        // ä½¿ç”¨ OSD è¿›è¡Œè‡ªåŠ¨é¡µé¢åˆ†å‰²ä»¥è¿›è¡Œå›¾åƒå¤„ç†
        tesseract.setPageSegMode(1);
        // è®¾ç½®å¼•擎模式是神经网络 LSTM å¼•擎
        tesseract.setOcrEngineMode(1);
        // å¼€å§‹è¯†åˆ«æ•´å¼ å›¾ç‰‡ä¸­çš„æ–‡å­—
        return tesseract.doOCR(file);
    }
    public static Object readTxtFile(File file) throws IOException {
        FileInputStream fin = new FileInputStream(file);
        InputStreamReader reader = new InputStreamReader(fin);
@@ -90,6 +225,7 @@
    }
    public static Object readCsvFile(File file) {
        StringBuilder stringBuilder = new StringBuilder();
        // åˆ›å»º reader
//        try (BufferedReader br = Files.newBufferedReader(file.toPath())) {
@@ -203,6 +339,36 @@
        } catch (Exception ignore) {
        }
    }
    public static Object getmysqlFile(GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException {
        Map<String, Object> tableMap = new HashMap<>(16);
        // æ•°æ®åº“连接信息
        String url = "jdbc:mysql://localhost:3306/"+getFileDto.getDbFileName()+"?useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true";
        String user = getFileDto.getDbUserName();
        String password = getFileDto.getDbPassword();
        List<ThicknessData> dataList = new ArrayList<>();
        try (
                // å»ºç«‹è¿žæŽ¥
                Connection connection = DriverManager.getConnection(url, user, password);
                // åˆ›å»º Statement å¯¹è±¡æ‰§è¡Œ SQL
                Statement statement = connection.createStatement()
        ) {
            String sql = "SELECT ThinnestPoint, AverageThickness FROM model1records";
            ResultSet resultSet = statement.executeQuery(sql);
            // éåŽ†ç»“æžœé›†èŽ·å–æ•°æ®
            while (resultSet.next()) {
                double thinnestPoint = resultSet.getDouble("ThinnestPoint");
                double averageThickness = resultSet.getDouble("AverageThickness");
                dataList.add(new ThicknessData(thinnestPoint, averageThickness));
            }
            tableMap.put("data", dataList);
        } catch (Exception e) {
            e.printStackTrace();
            return R.failed("数据库查询出错: " + e.getMessage());
        }
        return tableMap;
    }
    public static Object readDbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException {
        Map<String, Object> tableMap = new HashMap<>(16);
@@ -263,6 +429,8 @@
        } else {
            path = canonicalPath64.replaceAll("/chi_sim.traineddata", "").replaceAll("\\\\", "/");
        }
        // è®¾ç½® TESSDATA_PREFIX çŽ¯å¢ƒå˜é‡
//        System.setProperty("TESSDATA_PREFIX", path);
        //设置配置文件夹微视、识别语言、识别模式
        Tesseract tesseract = new Tesseract();
        tesseract.setDatapath(path);
src/main/resources/META-INF/MANIFEST.MF
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,3 @@
Manifest-Version: 1.0
Main-Class: com.chinaztt.mes.docx.DataAcquisitionApplication