zouyu
2025-10-21 3e08f23e92e24734e937b99a128da9b217ffef46
src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
@@ -2,38 +2,39 @@
import cn.hutool.core.io.FileUtil;
import com.chinaztt.mes.docx.dto.GetFileDto;
import com.chinaztt.mes.docx.dto.ThicknessData;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import com.opencsv.exceptions.CsvValidationException;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hssf.usermodel.HSSFPicture;
import org.apache.poi.hssf.usermodel.HSSFPictureData;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import java.awt.Color;
import java.awt.image.BufferedImage;
import javax.imageio.ImageIO;
import java.io.*;
import java.nio.file.Files;
import java.sql.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TakeWords {
    private static final String splitIdentifier = "@-@"; // 自定义唯一标识符
    // 科学计数法正则模式
    private static final Pattern SCIENTIFIC_PATTERN = Pattern.compile(
            "^[+-]?\\d+(\\.\\d+)?[eE][+-]?\\d+$"
    );
    public static Object readWordFile(File file) {
        String result = "";
@@ -56,32 +57,30 @@
        return result;
    }
    public static Object readExcelFile(File file) throws FileNotFoundException, IOException {
    public static Object readExcelFile(File file) throws IOException {
        StringBuilder result = new StringBuilder();
        //创建工作簿对象
        XSSFWorkbook xssfWorkbook = new XSSFWorkbook(new FileInputStream(file));
        XSSFWorkbook xssfWorkbook = new XSSFWorkbook(Files.newInputStream(file.toPath()));
        //获取工作簿下sheet的个数 只读取第一个sheet
//            int sheetNum = xssfWorkbook.getNumberOfSheets();
        //遍历工作簿中的所有数据
        for (int i = 0; i < 1; i++) {
            XSSFSheet sheet = xssfWorkbook.getSheetAt(i);
            //获取最后一行的num,即总行数。此处从0开始
            int maxRow = sheet.getLastRowNum();
            for (int row = 0; row <= maxRow; row++) {
                //获取最后单元格num,即总单元格数 ***注意:此处从1开始计数***
                int maxRol = sheet.getRow(row).getLastCellNum();
                StringBuilder aLine = new StringBuilder();
                for (int rol = 0; rol < maxRol; rol++) {
                    aLine.append(sheet.getRow(row).getCell(rol)).append(splitIdentifier);
                }
                result.append(aLine).append("\n");
        XSSFSheet sheet = xssfWorkbook.getSheetAt(0);
        //获取最后一行的num,即总行数。此处从0开始
        int maxRow = sheet.getLastRowNum();
        for (int row = 1; row <= maxRow; row++) {
            //获取最后单元格num,即总单元格数 ***注意:此处从1开始计数***
            int maxRol = sheet.getRow(row).getLastCellNum();
            StringBuilder aLine = new StringBuilder();
            for (int rol = 0; rol < maxRol; rol++) {
                aLine.append(sheet.getRow(row).getCell(rol)).append(splitIdentifier);
            }
            result.append(aLine).append("\n");
        }
        return result.toString();
    }
    public static Object readExcelxlsFile(File file) throws IOException {
        StringBuilder result = new StringBuilder();
        String result = "";
        try (FileInputStream fis = new FileInputStream(file);
             Workbook workbook = new HSSFWorkbook(fis)) {
            // 获取第一个工作表
@@ -90,41 +89,66 @@
            if (workbook instanceof HSSFWorkbook) {
                HSSFWorkbook hssfWorkbook = (HSSFWorkbook) workbook;
                List<HSSFPictureData> pictures = hssfWorkbook.getAllPictures();
                for (HSSFPictureData picture : pictures) {
                    // 获取图片类型
                    String pictureType = picture.suggestFileExtension();
                    // 获取图片数据
                    byte[] pictureData = picture.getData();
                    // 创建临时文件
                    File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType);
                    try (FileOutputStream fos = new FileOutputStream(tempFile)) {
                        fos.write(pictureData);
                    }
                    // 图片预处理
//                    File processedFile = preprocessImage(tempFile, pictureType);
                    // 调用 readPngFile1 方法读取图片文字信息
                    String ocrResult = "";
                    try {
                        ocrResult = (String) readPngFile1(tempFile);
//                        ocrResult = (String) readPngFile(tempFile);
//                        ocrResult = (String) readPngFile(processedFile);
                    } catch (TesseractException e) {
                        ocrResult = "OCR识别失败: " + e.getMessage();
                    } finally {
                        // 删除临时文件
                        tempFile.delete();
//                        processedFile.delete();
                    }
                    // 将图片信息添加到结果中
//                    result.append("Picture Type: ").append(pictureType)
//                            .append(", Picture Size: ").append(pictureData.length)
//                            .append(" bytes")
//                            .append(", OCR Result: ").append(ocrResult)
//                            .append(",");
                    String ocrText = fixOcrText(ocrResult);
                    result.append("OCR Result:").append(ocrText).append(",");
                //处理最后一张图片数据
                HSSFPictureData lastPicture = pictures.get(pictures.size()-1);
                // 获取图片类型
                String pictureType = lastPicture.suggestFileExtension();
                // 获取图片数据
                byte[] pictureData = lastPicture.getData();
                // 创建临时文件
                File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType);
                try (FileOutputStream fos = new FileOutputStream(tempFile)) {
                    fos.write(pictureData);
                }
                String ocrResult = "";
                try {
                    ocrResult = (String) readPngFile(tempFile);
                } catch (TesseractException e) {
                    ocrResult = "OCR识别失败: " + e.getMessage();
                } finally {
                    // 删除临时文件
                    tempFile.delete();
                }
                result = ocrResult;
//                String ocrText = fixOcrText(ocrResult);
//                result.append("OCR Result:").append(ocrText).append(",");
//                for (HSSFPictureData picture : pictures) {
//                    // 获取图片类型
//                    String pictureType = picture.suggestFileExtension();
//                    // 获取图片数据
//                    byte[] pictureData = picture.getData();
//                    // 创建临时文件
//                    File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType);
//                    try (FileOutputStream fos = new FileOutputStream(tempFile)) {
//                        fos.write(pictureData);
//                    }
//                    // 图片预处理
////                    File processedFile = preprocessImage(tempFile, pictureType);
//                    // 调用 readPngFile1 方法读取图片文字信息
//                    String ocrResult = "";
//                    try {
//                        ocrResult = (String) readPngFile(tempFile);
////                        ocrResult = (String) readPngFile(tempFile);
////                        ocrResult = (String) readPngFile(processedFile);
//                    } catch (TesseractException e) {
//                        ocrResult = "OCR识别失败: " + e.getMessage();
//                    } finally {
//                        // 删除临时文件
////                        tempFile.delete();
////                        processedFile.delete();
//                    }
//
//                    // 将图片信息添加到结果中
////                    result.append("Picture Type: ").append(pictureType)
////                            .append(", Picture Size: ").append(pictureData.length)
////                            .append(" bytes")
////                            .append(", OCR Result: ").append(ocrResult)
////                            .append(",");
//                    String ocrText = fixOcrText(ocrResult);
//                    result.append("OCR Result:").append(ocrText).append(",");
//                }
            }
//
//            // 遍历每一行
@@ -200,9 +224,6 @@
        return tesseract.doOCR(file);
    }
    public static Object readTxtFile(File file) throws IOException {
        FileInputStream fin = new FileInputStream(file);
        InputStreamReader reader = new InputStreamReader(fin);
@@ -220,21 +241,37 @@
        StringBuilder stringBuilder = new StringBuilder();
        // 创建 reader
        try (BufferedReader br = Files.newBufferedReader(file.toPath())) {
            // CSV文件的分隔符
            String DELIMITER = ",";
            // 按行读取
            String line;
            while ((line = br.readLine()) != null) {
                // 分割
                String[] columns = line.split(DELIMITER);
                // 打印行
                stringBuilder.append(String.join(splitIdentifier, columns)).append("\n");
        try (FileReader fileReader = new FileReader(file);
             CSVReader csvReader = new CSVReaderBuilder(fileReader).build()) {
            String[] nextLine;
            while ((nextLine = csvReader.readNext()) != null) {
                // 处理每一行数据
                for (String cell : nextLine) {
                    if(StringUtils.isNotBlank(cell)){
                        stringBuilder.append(scientificToNumber(cell)).append(splitIdentifier);
                    }
                }
                stringBuilder.append("\n");
            }
        } catch (IOException ex) {
            ex.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (CsvValidationException e) {
            throw new RuntimeException(e);
        }
        return stringBuilder.toString();
    }
    /**
     * 将科学计数法转换为数字
     * @param cell
     * @return
     */
    public static String scientificToNumber(String cell){
        if(SCIENTIFIC_PATTERN.matcher(cell).matches()){
            return String.valueOf(Double.parseDouble(cell));
        }
        return cell;
    }
    public static Object readMdbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException {
@@ -280,6 +317,7 @@
            }
            tableMap.put("data", list);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            closeA1l(conn, preparedStatement, rs);
        }
@@ -300,6 +338,7 @@
        } catch (Exception ignore) {
        }
    }
    public static Object getMysqlFile(GetFileDto getFileDto){
        Map<String, Object> tableMap = new HashMap<>(16);
        // 从 GetFileDto 获取数据库名,对应【文件名称】字段
@@ -313,7 +352,7 @@
            return R.failed("数据库名或表名不能为空");
        }
        // 数据库连接信息
        String url = "jdbc:mysql://localhost:3306/"+dbName+"?useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true";
        String url = "jdbc:mysql://localhost:3306/"+dbName+"?useSSL=false&serverTimezone=GMT%2B8&allowPublicKeyRetrieval=true&characterEncoding=utf8";
        Connection connection = null;
        PreparedStatement preparedStatement = null;
        ResultSet resultSet = null;
@@ -324,9 +363,15 @@
            connection = DriverManager.getConnection(url, user, password);
            // 构建基础 SQL
            StringBuilder sql = new StringBuilder("SELECT * FROM ").append(table);
            String sql = "SELECT * FROM "+table+" WHERE 1=1";
            if(StringUtils.isNotBlank(getFileDto.getMdbEntrustCode())){
                sql+=" AND " + getFileDto.getMdbEntrustCode() + " = '" + getFileDto.getEntrustCode()+ "'";
            }
            if(StringUtils.isNotBlank(getFileDto.getMdbSampleCode())){
                sql+=" AND " + getFileDto.getMdbSampleCode() + " = '" + getFileDto.getSampleCode() + "'";
            }
            // 创建 PreparedStatement 对象执行 SQL
            preparedStatement = connection.prepareStatement(sql.toString());
            preparedStatement = connection.prepareStatement(sql);
            resultSet = preparedStatement.executeQuery();
            ResultSetMetaData metaData = resultSet.getMetaData();
            int columnCount = metaData.getColumnCount();
@@ -339,11 +384,6 @@
                }
                dataList.add(rowData);
            }
//            while (resultSet.next()) {
//                double thinnestPoint = resultSet.getDouble("ThinnestPoint");
//                double averageThickness = resultSet.getDouble("AverageThickness");
//                dataList.add(new ThicknessData(thinnestPoint, averageThickness));
//            }
            tableMap.put("data", dataList);
        } catch (Exception e) {
            e.printStackTrace();