zouyu
2025-12-09 866a3e6cbd2df9841dfbbd733e1128938cef3e00
src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
@@ -1,27 +1,41 @@
package com.chinaztt.mes.docx.util;
import cn.hutool.core.io.FileUtil;
import cn.hutool.http.HttpRequest;
import cn.hutool.json.JSONUtil;
import com.chinaztt.mes.docx.dto.GetFileDto;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import com.opencsv.exceptions.CsvValidationException;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hssf.usermodel.HSSFPictureData;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.ooxml.POIXMLDocument;
import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import java.io.*;
import java.nio.file.Files;
import java.sql.*;
import java.util.*;
import java.util.regex.Pattern;
public class TakeWords {
    private static final String splitIdentifier = "@-@"; // 自定义唯一标识符
    // 科学计数法正则模式
    private static final Pattern SCIENTIFIC_PATTERN = Pattern.compile(
            "^[+-]?\\d+(\\.\\d+)?[eE][+-]?\\d+$"
    );
    public static Object readWordFile(File file) {
        String result = "";
@@ -44,19 +58,22 @@
        return result;
    }
    public static Object readExcelFile(File file) throws FileNotFoundException, IOException {
    /**
     * 读取excel文件,兼容.xlsx,.xls格式
     * @param file
     * @return
     */
    public static Object readExcelFile(File file) {
        StringBuilder result = new StringBuilder();
        //创建工作簿对象
        XSSFWorkbook xssfWorkbook = new XSSFWorkbook(new FileInputStream(file));
        //获取工作簿下sheet的个数 只读取第一个sheet
//            int sheetNum = xssfWorkbook.getNumberOfSheets();
        //遍历工作簿中的所有数据
        for (int i = 0; i < 1; i++) {
            XSSFSheet sheet = xssfWorkbook.getSheetAt(i);
        try (
                FileInputStream fis = new FileInputStream(file);
                Workbook workbook = WorkbookFactory.create(fis)
        ) {
            //遍历工作簿中的所有数据
            Sheet sheet = workbook.getSheetAt(0);
            //获取最后一行的num,即总行数。此处从0开始
            int maxRow = sheet.getLastRowNum();
            for (int row = 0; row <= maxRow; row++) {
                //获取最后单元格num,即总单元格数 ***注意:此处从1开始计数***
                int maxRol = sheet.getRow(row).getLastCellNum();
                StringBuilder aLine = new StringBuilder();
                for (int rol = 0; rol < maxRol; rol++) {
@@ -64,8 +81,79 @@
                }
                result.append(aLine).append("\n");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return result.toString();
    }
    /**
     * 读取excel文件中的图片内容
     * @param file
     * @return
     * @throws IOException
     */
    public static Object readPngContextInExcel(File file) throws IOException {
        String result = "";
        try (FileInputStream fis = new FileInputStream(file);
             Workbook workbook = new HSSFWorkbook(fis)) {
            // 获取第一个工作表
            Sheet sheet = workbook.getSheetAt(0);
            // 读取图片信息
            if (workbook instanceof HSSFWorkbook) {
                HSSFWorkbook hssfWorkbook = (HSSFWorkbook) workbook;
                List<HSSFPictureData> pictures = hssfWorkbook.getAllPictures();
                //处理最后一张图片数据
                HSSFPictureData lastPicture = pictures.get(pictures.size()-1);
                // 获取图片类型
                String pictureType = lastPicture.suggestFileExtension();
                // 获取图片数据
                byte[] pictureData = lastPicture.getData();
                // 创建临时文件
                File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType);
                try (FileOutputStream fos = new FileOutputStream(tempFile)) {
                    fos.write(pictureData);
                }
                String ocrResult;
                try {
                    ocrResult = ocrImageContext(tempFile.getAbsolutePath());
                } finally {
                    // 删除临时文件
                    tempFile.delete();
                }
                result = ocrResult;
            }
        }
        return result;
    }
    public static Object readPngFile1(File file) throws IOException, TesseractException {
        // 获取 tessdata 目录的绝对路径
        String arch = System.getProperty("sun.arch.data.model");
        File tessDataDir;
        if (arch.contains("32")) {
            tessDataDir = FileUtil.file(".", "/jre_32/tessdata");
        } else {
            tessDataDir = FileUtil.file(".", "/jre_64/tessdata");
        }
        String path = tessDataDir.getCanonicalPath();
        // 检查 chi_sim.traineddata 文件是否存在
        File chiSimFile = new File(path, "chi_sim.traineddata");
        if (!chiSimFile.exists()) {
            throw new FileNotFoundException("chi_sim.traineddata 文件未找到,请检查路径: " + chiSimFile.getAbsolutePath());
        }
        // 设置配置文件夹、识别语言、识别模式
        Tesseract tesseract = new Tesseract();
        tesseract.setDatapath(path);
        // 设置识别语言为中文简体和英文(如果要设置为英文可改为 "eng")
        tesseract.setLanguage("chi_sim+eng");
        // 使用 OSD 进行自动页面分割以进行图像处理
        tesseract.setPageSegMode(1);
        // 设置引擎模式是神经网络 LSTM 引擎
        tesseract.setOcrEngineMode(1);
        // 开始识别整张图片中的文字
        return tesseract.doOCR(file);
    }
    public static Object readTxtFile(File file) throws IOException {
@@ -75,6 +163,7 @@
        StringBuilder stringBuilder = new StringBuilder();
        String strTmp = "";
        while ((strTmp = buffReader.readLine()) != null) {
            strTmp = strTmp.replaceAll("\t",",");
            stringBuilder.append(strTmp).append("\n");
        }
        buffReader.close();
@@ -82,23 +171,40 @@
    }
    public static Object readCsvFile(File file) {
        StringBuilder stringBuilder = new StringBuilder();
        // 创建 reader
        try (BufferedReader br = Files.newBufferedReader(file.toPath())) {
            // CSV文件的分隔符
            String DELIMITER = ",";
            // 按行读取
            String line;
            while ((line = br.readLine()) != null) {
                // 分割
                String[] columns = line.split(DELIMITER);
                // 打印行
                stringBuilder.append(String.join(splitIdentifier, columns)).append("\n");
        try (FileReader fileReader = new FileReader(file);
             CSVReader csvReader = new CSVReaderBuilder(fileReader).build()) {
            String[] nextLine;
            while ((nextLine = csvReader.readNext()) != null) {
                // 处理每一行数据
                for (String cell : nextLine) {
                    if(StringUtils.isNotBlank(cell)){
                        stringBuilder.append(scientificToNumber(cell)).append(splitIdentifier);
                    }
                }
                stringBuilder.append("\n");
            }
        } catch (IOException ex) {
            ex.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (CsvValidationException e) {
            throw new RuntimeException(e);
        }
        return stringBuilder.toString();
    }
    /**
     * 将科学计数法转换为数字
     * @param cell
     * @return
     */
    public static String scientificToNumber(String cell){
        if(SCIENTIFIC_PATTERN.matcher(cell).matches()){
            return String.valueOf(Double.parseDouble(cell));
        }
        return cell;
    }
    public static Object readMdbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException {
@@ -106,8 +212,8 @@
        Properties prop = new Properties();
        //设置编码
        prop.put("charSet", "UTF-8");
        prop.put("user", "");
        prop.put("password", "");
        prop.put("user",  StringUtils.isNotBlank(getFileDto.getDbUserName())?getFileDto.getDbUserName():"");
        prop.put("password", StringUtils.isNotBlank(getFileDto.getDbPassword())?getFileDto.getDbPassword():"");
        //数据地址
        String dbUrl = "jdbc:ucanaccess://" + file.getPath();
        //引入驱动
@@ -121,8 +227,6 @@
        try {
            List<Object> list = new ArrayList<>();
            //遍历获取多张表数据
//            String s = "select * from " + getFileDto.getDbFileName() + " where 1=1" + getFileDto.getMdbEntrustCode() + " = '" + getFileDto.getEntrustCode() +
//                    "' and " + getFileDto.getMdbSampleCode() + " = '" + getFileDto.getSampleCode() + "'";
            String s = "select * from " + getFileDto.getDbFileName() + " where 1=1";
            if(StringUtils.isNotBlank(getFileDto.getMdbEntrustCode())){
                s+=" and " + getFileDto.getMdbEntrustCode() + " = '" + getFileDto.getEntrustCode()+ "'";
@@ -130,6 +234,10 @@
            if(StringUtils.isNotBlank(getFileDto.getMdbSampleCode())){
                s+=" and " + getFileDto.getMdbSampleCode() + " = '" + getFileDto.getSampleCode() + "'";
            }
            if(StringUtils.isNotBlank(getFileDto.getBushingColorField())){
                s+=" and " + getFileDto.getBushingColorField() + " = '" + getFileDto.getBushingColor() + "'";
            }
            preparedStatement = conn.prepareStatement(s);
            rs = preparedStatement.executeQuery();
            ResultSetMetaData data = rs.getMetaData();
@@ -144,6 +252,7 @@
            }
            tableMap.put("data", list);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            closeA1l(conn, preparedStatement, rs);
        }
@@ -163,6 +272,81 @@
            }
        } catch (Exception ignore) {
        }
    }
    public static Object getMysqlFile(GetFileDto getFileDto){
        Map<String, Object> tableMap = new HashMap<>(16);
        // 从 GetFileDto 获取数据库名,对应【文件名称】字段
        String dbName = getFileDto.getDbFileName();
        String user = getFileDto.getDbUserName();
        String password = getFileDto.getDbPassword();
        // 从 GetFileDto 获取数据表名,对应【数据库表名】字段
        String table = getFileDto.getDbTable();
        // 检查数据库名和表名是否为空
        if (dbName == null || dbName.isEmpty() || table == null || table.isEmpty()) {
            return R.failed("数据库名或表名不能为空");
        }
        // 数据库连接信息
        String url = "jdbc:mysql://localhost:3306/"+dbName+"?useSSL=false&serverTimezone=GMT%2B8&allowPublicKeyRetrieval=true&characterEncoding=utf8";
        Connection connection = null;
        PreparedStatement preparedStatement = null;
        ResultSet resultSet = null;
        List<Map<String, Object>> dataList = new ArrayList<>();
        try {
            // 建立连接
            connection = DriverManager.getConnection(url, user, password);
            // 构建基础 SQL
            String sql = "SELECT * FROM "+table+" WHERE 1=1";
            if(StringUtils.isNotBlank(getFileDto.getMdbEntrustCode()) ){
                sql+=" AND (" + getFileDto.getMdbEntrustCode() + " = TRIM('" + getFileDto.getEntrustCode()+ "')";
                if(StringUtils.isNotBlank(getFileDto.getLotBatchNo())){
                    sql+=" OR "+ getFileDto.getMdbEntrustCode() + " = TRIM('" + getFileDto.getLotBatchNo()+ "')";
                }
                sql+=")";
            }
            if(StringUtils.isNotBlank(getFileDto.getMdbSampleCode())){
                sql+=" AND " + getFileDto.getMdbSampleCode() + " = '" + getFileDto.getSampleCode() + "'";
            }
            if(StringUtils.isNotBlank(getFileDto.getCableTag())){
                sql+=" AND Color = '" + getFileDto.getCableTag() + "'";
            }
            // 创建 PreparedStatement 对象执行 SQL
            preparedStatement = connection.prepareStatement(sql);
            resultSet = preparedStatement.executeQuery();
            ResultSetMetaData metaData = resultSet.getMetaData();
            int columnCount = metaData.getColumnCount();
            // 遍历结果集获取数据
            while (resultSet.next()) {
                Map<String, Object> rowData = new HashMap<>();
                for (int i = 1; i <= columnCount; i++) {
                    String columnName = metaData.getColumnName(i);
                    rowData.put(columnName, resultSet.getObject(i));
                }
                dataList.add(rowData);
            }
            tableMap.put("data", dataList);
        } catch (Exception e) {
            e.printStackTrace();
            // 假设 R 类有 failed 方法,若没有需补充实现
            return R.failed("数据库查询出错: " + e.getMessage());
        } finally {
            try {
                if (resultSet != null) {
                    resultSet.close();
                }
                if (preparedStatement != null) {
                    preparedStatement.close();
                }
                if (connection != null) {
                    connection.close();
                }
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
        return tableMap;
    }
    public static Object readDbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException {
@@ -213,6 +397,19 @@
        return tableMap;
    }
    /**
     * ocr识别图片内容
     * @param imagePath 图片路径
     * @return
     */
    public static String ocrImageContext(String imagePath){
        //调用ocr识别服务
        Map<String,Object> jsonMap = new HashMap<>();
        jsonMap.put("imagePath",imagePath);
        String requestBody = JSONUtil.toJsonStr(jsonMap);
        return HttpRequest.post("localhost:8080/ocr/recognize").body(requestBody).execute().body();
    }
    public static Object readPngFile(File file) throws IOException, TesseractException {
        String canonicalPath32 = FileUtil.file(".", "/jre_32/tessdata").getCanonicalPath();
        String canonicalPath64 = FileUtil.file(".", "/jre_64/tessdata").getCanonicalPath();
@@ -224,6 +421,8 @@
        } else {
            path = canonicalPath64.replaceAll("/chi_sim.traineddata", "").replaceAll("\\\\", "/");
        }
        // 设置 TESSDATA_PREFIX 环境变量
//        System.setProperty("TESSDATA_PREFIX", path);
        //设置配置文件夹微视、识别语言、识别模式
        Tesseract tesseract = new Tesseract();
        tesseract.setDatapath(path);