yaowanxin
2025-07-24 9ff6b2fa38167d93de00e6d730d2b038d7731776
src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
@@ -2,6 +2,9 @@
import cn.hutool.core.io.FileUtil;
import com.chinaztt.mes.docx.dto.GetFileDto;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import com.opencsv.exceptions.CsvValidationException;
import com.chinaztt.mes.docx.dto.ThicknessData;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
@@ -25,15 +28,20 @@
import javax.imageio.ImageIO;
import java.io.*;
import java.nio.file.Files;
import java.sql.*;
import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TakeWords {
    private static final String splitIdentifier = "@-@"; // 自定义唯一标识符
    // 科学计数法正则模式
    private static final Pattern SCIENTIFIC_PATTERN = Pattern.compile(
            "^[+-]?\\d+(\\.\\d+)?[eE][+-]?\\d+$"
    );
    public static Object readWordFile(File file) {
        String result = "";
@@ -101,20 +109,19 @@
                        fos.write(pictureData);
                    }
                    // 图片预处理
                    File processedFile = preprocessImage(tempFile, pictureType);
//                        ocrResult = (String) readPngFile(tempFile);
//                    File processedFile = preprocessImage(tempFile, pictureType);
                    // 调用 readPngFile1 方法读取图片文字信息
                    String ocrResult = "";
                    try {
                        ocrResult = (String) readPngFile1(tempFile);
//                        ocrResult = (String) readPngFile1(processedFile);
//                        ocrResult = (String) readPngFile(tempFile);
//                        ocrResult = (String) readPngFile(processedFile);
                    } catch (TesseractException e) {
                        ocrResult = "OCR识别失败: " + e.getMessage();
                    } finally {
                        // 删除临时文件
                        tempFile.delete();
                        processedFile.delete();
//                        processedFile.delete();
                    }
                    // 将图片信息添加到结果中
@@ -173,81 +180,6 @@
        return ocrText;
    }
    /**
     * 对图片进行预处理,包括灰度化、二值化和锐化
     * @param inputFile 输入的图片文件
     * @param formatName 图片格式名称
     * @return 处理后的图片文件
     * @throws IOException 读取或写入图片时可能抛出的异常
     */
    private static File preprocessImage(File inputFile, String formatName) throws IOException {
        // 读取图片
        BufferedImage image = ImageIO.read(inputFile);
        // 灰度化
        image = convertToGrayscale(image);
        // 二值化
        image = applyThreshold(image, 128);
        // 锐化
        image = applySharpening(image);
        // 创建处理后的临时文件
        File outputFile = File.createTempFile(UUID.randomUUID().toString(), "." + formatName);
        ImageIO.write(image, formatName, outputFile);
        return outputFile;
    }
    /**
     * 将图片转换为灰度图
     * @param image 输入的图片
     * @return 灰度化后的图片
     */
    private static BufferedImage convertToGrayscale(BufferedImage image) {
        BufferedImage grayImage = new BufferedImage(
                image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
        grayImage.getGraphics().drawImage(image, 0, 0, null);
        return grayImage;
    }
    /**
     * 对图片进行二值化处理
     * @param image 输入的图片
     * @param threshold 二值化阈值
     * @return 二值化后的图片
     */
    private static BufferedImage applyThreshold(BufferedImage image, int threshold) {
        BufferedImage binaryImage = new BufferedImage(
                image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY);
        for (int y = 0; y < image.getHeight(); y++) {
            for (int x = 0; x < image.getWidth(); x++) {
                int rgb = image.getRGB(x, y);
                int gray = (rgb >> 16) & 0xff;
                if (gray < threshold) {
                    binaryImage.setRGB(x, y, Color.BLACK.getRGB());
                } else {
                    binaryImage.setRGB(x, y, Color.WHITE.getRGB());
                }
            }
        }
        return binaryImage;
    }
    /**
     * 对图片进行锐化处理
     * @param image 输入的图片
     * @return 锐化后的图片
     */
    private static BufferedImage applySharpening(BufferedImage image) {
        float[] sharpenMatrix = {
                0f, -1f, 0f,
                -1f, 5f, -1f,
                0f, -1f, 0f
        };
        java.awt.image.Kernel kernel = new java.awt.image.Kernel(3, 3, sharpenMatrix);
        java.awt.image.ConvolveOp op = new java.awt.image.ConvolveOp(kernel, java.awt.image.ConvolveOp.EDGE_NO_OP, null);
        return op.filter(image, null);
    }
    public static Object readPngFile1(File file) throws IOException, TesseractException {
        // 获取 tessdata 目录的绝对路径
        String arch = System.getProperty("sun.arch.data.model");
@@ -296,21 +228,52 @@
        StringBuilder stringBuilder = new StringBuilder();
        // 创建 reader
        try (BufferedReader br = Files.newBufferedReader(file.toPath())) {
            // CSV文件的分隔符
            String DELIMITER = ",";
            // 按行读取
            String line;
            while ((line = br.readLine()) != null) {
                // 分割
                String[] columns = line.split(DELIMITER);
                // 打印行
                stringBuilder.append(String.join(splitIdentifier, columns)).append("\n");
//        try (BufferedReader br = Files.newBufferedReader(file.toPath())) {
//            // CSV文件的分隔符
//            String DELIMITER = ",";
//            // 按行读取
//            String line;
//            System.out.println(br.readLine());
//            while ((line = br.readLine()) != null) {
//                // 分割
//                String[] columns = line.split(DELIMITER);
//                // 打印行
//                stringBuilder.append(String.join(splitIdentifier, columns)).append("\n");
//            }
//        } catch (IOException ex) {
//            ex.printStackTrace();
//        }
        try (FileReader fileReader = new FileReader(file);
             CSVReader csvReader = new CSVReaderBuilder(fileReader).build()) {
            String[] nextLine;
            while ((nextLine = csvReader.readNext()) != null) {
                // 处理每一行数据
                for (String cell : nextLine) {
                    if(StringUtils.isNotBlank(cell)){
                        stringBuilder.append(scientificToNumber(cell)).append(splitIdentifier);
                    }
                }
                stringBuilder.append("\n");
            }
        } catch (IOException ex) {
            ex.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (CsvValidationException e) {
            throw new RuntimeException(e);
        }
        return stringBuilder.toString();
    }
    /**
     * 将科学计数法转换为数字
     * @param cell
     * @return
     */
    public static String scientificToNumber(String cell){
        if(SCIENTIFIC_PATTERN.matcher(cell).matches()){
            return String.valueOf(Double.parseDouble(cell));
        }
        return cell;
    }
    public static Object readMdbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException {
@@ -376,33 +339,69 @@
        } catch (Exception ignore) {
        }
    }
    public static Object getmysqlFile(GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException {
    public static Object getMysqlFile(GetFileDto getFileDto){
        Map<String, Object> tableMap = new HashMap<>(16);
        // 数据库连接信息
        String url = "jdbc:mysql://localhost:3306/"+getFileDto.getDbFileName()+"?useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true";
        // 从 GetFileDto 获取数据库名,对应【文件名称】字段
        String dbName = getFileDto.getDbFileName();
        String user = getFileDto.getDbUserName();
        String password = getFileDto.getDbPassword();
        List<ThicknessData> dataList = new ArrayList<>();
        // 从 GetFileDto 获取数据表名,对应【数据库表名】字段
        String table = getFileDto.getDbTable();
        // 检查数据库名和表名是否为空
        if (dbName == null || dbName.isEmpty() || table == null || table.isEmpty()) {
            return R.failed("数据库名或表名不能为空");
        }
        // 数据库连接信息
        String url = "jdbc:mysql://localhost:3306/"+dbName+"?useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true";
        Connection connection = null;
        PreparedStatement preparedStatement = null;
        ResultSet resultSet = null;
        List<Map<String, Object>> dataList = new ArrayList<>();
        try (
                // 建立连接
                Connection connection = DriverManager.getConnection(url, user, password);
                // 创建 Statement 对象执行 SQL
                Statement statement = connection.createStatement()
        ) {
            String sql = "SELECT ThinnestPoint, AverageThickness FROM model1records";
            ResultSet resultSet = statement.executeQuery(sql);
        try {
            // 建立连接
            connection = DriverManager.getConnection(url, user, password);
            // 构建基础 SQL
            StringBuilder sql = new StringBuilder("SELECT * FROM ").append(table);
            // 创建 PreparedStatement 对象执行 SQL
            preparedStatement = connection.prepareStatement(sql.toString());
            resultSet = preparedStatement.executeQuery();
            ResultSetMetaData metaData = resultSet.getMetaData();
            int columnCount = metaData.getColumnCount();
            // 遍历结果集获取数据
            while (resultSet.next()) {
                double thinnestPoint = resultSet.getDouble("ThinnestPoint");
                double averageThickness = resultSet.getDouble("AverageThickness");
                dataList.add(new ThicknessData(thinnestPoint, averageThickness));
                Map<String, Object> rowData = new HashMap<>();
                for (int i = 1; i <= columnCount; i++) {
                    String columnName = metaData.getColumnName(i);
                    rowData.put(columnName, resultSet.getObject(i));
                }
                dataList.add(rowData);
            }
//            while (resultSet.next()) {
//                double thinnestPoint = resultSet.getDouble("ThinnestPoint");
//                double averageThickness = resultSet.getDouble("AverageThickness");
//                dataList.add(new ThicknessData(thinnestPoint, averageThickness));
//            }
            tableMap.put("data", dataList);
        } catch (Exception e) {
            e.printStackTrace();
            // 假设 R 类有 failed 方法,若没有需补充实现
            return R.failed("数据库查询出错: " + e.getMessage());
        } finally {
            try {
                if (resultSet != null) {
                    resultSet.close();
                }
                if (preparedStatement != null) {
                    preparedStatement.close();
                }
                if (connection != null) {
                    connection.close();
                }
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
        return tableMap;
    }
@@ -466,6 +465,8 @@
        } else {
            path = canonicalPath64.replaceAll("/chi_sim.traineddata", "").replaceAll("\\\\", "/");
        }
        // 设置 TESSDATA_PREFIX 环境变量
//        System.setProperty("TESSDATA_PREFIX", path);
        //设置配置文件夹微视、识别语言、识别模式
        Tesseract tesseract = new Tesseract();
        tesseract.setDatapath(path);