From 2d1e909070e2baaa49d3a021364df595ff1d2e72 Mon Sep 17 00:00:00 2001
From: yaowanxin <3588231647@qq.com>
Date: 星期六, 19 七月 2025 11:01:32 +0800
Subject: [PATCH] 读取

---
 src/main/java/com/chinaztt/mes/docx/util/TakeWords.java |  242 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 242 insertions(+), 0 deletions(-)

diff --git a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
index 3d77d75..39ba107 100644
--- a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
+++ b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
@@ -2,22 +2,34 @@
 
 import cn.hutool.core.io.FileUtil;
 import com.chinaztt.mes.docx.dto.GetFileDto;
+import com.chinaztt.mes.docx.dto.ThicknessData;
 import net.sourceforge.tess4j.Tesseract;
 import net.sourceforge.tess4j.TesseractException;
 import org.apache.commons.lang3.ObjectUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.poi.POIXMLDocument;
 import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.hssf.usermodel.HSSFPicture;
+import org.apache.poi.hssf.usermodel.HSSFPictureData;
+import org.apache.poi.hssf.usermodel.HSSFSheet;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.usermodel.*;
 import org.apache.poi.xssf.usermodel.XSSFSheet;
 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import java.awt.Color;
+import java.awt.image.BufferedImage;
+import javax.imageio.ImageIO;
 
 import java.io.*;
 import java.nio.file.Files;
 import java.sql.*;
 import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 public class TakeWords {
 
@@ -68,6 +80,205 @@
         return result.toString();
     }
 
+    public static Object readExcelxlsFile(File file) throws IOException {
+        StringBuilder result = new StringBuilder();
+        try (FileInputStream fis = new FileInputStream(file);
+             Workbook workbook = new HSSFWorkbook(fis)) {
+            // 鑾峰彇绗竴涓伐浣滆〃
+            Sheet sheet = workbook.getSheetAt(0);
+            // 璇诲彇鍥剧墖淇℃伅
+            if (workbook instanceof HSSFWorkbook) {
+                HSSFWorkbook hssfWorkbook = (HSSFWorkbook) workbook;
+                List<HSSFPictureData> pictures = hssfWorkbook.getAllPictures();
+                for (HSSFPictureData picture : pictures) {
+                    // 鑾峰彇鍥剧墖绫诲瀷
+                    String pictureType = picture.suggestFileExtension();
+                    // 鑾峰彇鍥剧墖鏁版嵁
+                    byte[] pictureData = picture.getData();
+                    // 鍒涘缓涓存椂鏂囦欢
+                    File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType);
+                    try (FileOutputStream fos = new FileOutputStream(tempFile)) {
+                        fos.write(pictureData);
+                    }
+                    // 鍥剧墖棰勫鐞�
+                    File processedFile = preprocessImage(tempFile, pictureType);
+//                        ocrResult = (String) readPngFile(tempFile);
+
+                    // 璋冪敤 readPngFile1 鏂规硶璇诲彇鍥剧墖鏂囧瓧淇℃伅
+                    String ocrResult = "";
+                    try {
+                        ocrResult = (String) readPngFile1(tempFile);
+//                        ocrResult = (String) readPngFile1(processedFile);
+                    } catch (TesseractException e) {
+                        ocrResult = "OCR璇嗗埆澶辫触: " + e.getMessage();
+                    } finally {
+                        // 鍒犻櫎涓存椂鏂囦欢
+                        tempFile.delete();
+                        processedFile.delete();
+                    }
+
+                    // 灏嗗浘鐗囦俊鎭坊鍔犲埌缁撴灉涓�
+//                    result.append("Picture Type: ").append(pictureType)
+//                            .append(", Picture Size: ").append(pictureData.length)
+//                            .append(" bytes")
+//                            .append(", OCR Result: ").append(ocrResult)
+//                            .append(",");
+                    String ocrText = fixOcrText(ocrResult);
+                    result.append("OCR Result:").append(ocrText).append(",");
+                }
+            }
+//
+//            // 閬嶅巻姣忎竴琛�
+//            for (Row row : sheet) {
+//                // 閬嶅巻姣忎竴鍒�
+//                for (Cell cell : row) {
+//                    CellType cellType = CellType.forInt(cell.getCellType());
+//                    switch (cellType) {
+//                        case STRING:
+//                            result.append(cell.getStringCellValue());
+//                            break;
+//                        case NUMERIC:
+//                            if (DateUtil.isCellDateFormatted(cell)) {
+//                                result.append(cell.getDateCellValue());
+//                            } else {
+//                                result.append(cell.getNumericCellValue());
+//                            }
+//                            break;
+//                        case BOOLEAN:
+//                            result.append(cell.getBooleanCellValue());
+//                            break;
+//                        case FORMULA:
+//                            result.append(cell.getCellFormula());
+//                            break;
+//                        default:
+//                            result.append("");
+//                    }
+//                    result.append("\t");
+//                }
+//                result.append("\n");
+//            }
+        }
+        return result;
+
+    }
+
+    // 淇 OCR 璇嗗埆鏂囨湰涓殑閿欒鍏抽敭璇�
+    public static String fixOcrText(String ocrText) {
+        // 瀹氫箟閿欒鍏抽敭璇嶅拰姝g‘鍐呭鐨勬槧灏勶紝杩欓噷澶勭悊鈥滃嚮 瀹� 寮� 搴炩�濅慨姝d负鈥滃嚮绌垮己搴︹��
+        // 鑰冭檻鍒板彲鑳芥湁绌烘牸鍒嗛殧锛岀敤姝e垯鍖归厤鍖呭惈杩欎簺瀛楃殑鍐呭
+        ocrText = ocrText.replaceAll("鍑籠\s*瀹嘰\s*寮篭\s*搴�", "鍑荤┛寮哄害");
+        // 杩樺彲浠ョ户缁坊鍔犲叾浠栭敊璇慨姝o紝姣斿涓嬮潰鍋囪鈥滅數 鍘� \\(HV\\)鈥濋噷鐨勭┖鏍煎奖鍝嶏紝涔熶慨姝d笅
+        ocrText = ocrText.replaceAll("鐢礬\s*鍘媆\s*\\(HV\\)", "鐢靛帇(KV)");
+        ocrText = ocrText.replaceAll("鐢礬\s*娴乗\s*\\(nt\\)", "鐢垫祦(mA)");
+        return ocrText;
+    }
+
+    /**
+     * 瀵瑰浘鐗囪繘琛岄澶勭悊锛屽寘鎷伆搴﹀寲銆佷簩鍊煎寲鍜岄攼鍖�
+     * @param inputFile 杈撳叆鐨勫浘鐗囨枃浠�
+     * @param formatName 鍥剧墖鏍煎紡鍚嶇О
+     * @return 澶勭悊鍚庣殑鍥剧墖鏂囦欢
+     * @throws IOException 璇诲彇鎴栧啓鍏ュ浘鐗囨椂鍙兘鎶涘嚭鐨勫紓甯�
+     */
+    private static File preprocessImage(File inputFile, String formatName) throws IOException {
+        // 璇诲彇鍥剧墖
+        BufferedImage image = ImageIO.read(inputFile);
+
+        // 鐏板害鍖�
+        image = convertToGrayscale(image);
+        // 浜屽�煎寲
+        image = applyThreshold(image, 128);
+        // 閿愬寲
+        image = applySharpening(image);
+
+        // 鍒涘缓澶勭悊鍚庣殑涓存椂鏂囦欢
+        File outputFile = File.createTempFile(UUID.randomUUID().toString(), "." + formatName);
+        ImageIO.write(image, formatName, outputFile);
+        return outputFile;
+    }
+
+    /**
+     * 灏嗗浘鐗囪浆鎹负鐏板害鍥�
+     * @param image 杈撳叆鐨勫浘鐗�
+     * @return 鐏板害鍖栧悗鐨勫浘鐗�
+     */
+    private static BufferedImage convertToGrayscale(BufferedImage image) {
+        BufferedImage grayImage = new BufferedImage(
+                image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
+        grayImage.getGraphics().drawImage(image, 0, 0, null);
+        return grayImage;
+    }
+
+    /**
+     * 瀵瑰浘鐗囪繘琛屼簩鍊煎寲澶勭悊
+     * @param image 杈撳叆鐨勫浘鐗�
+     * @param threshold 浜屽�煎寲闃堝��
+     * @return 浜屽�煎寲鍚庣殑鍥剧墖
+     */
+    private static BufferedImage applyThreshold(BufferedImage image, int threshold) {
+        BufferedImage binaryImage = new BufferedImage(
+                image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY);
+        for (int y = 0; y < image.getHeight(); y++) {
+            for (int x = 0; x < image.getWidth(); x++) {
+                int rgb = image.getRGB(x, y);
+                int gray = (rgb >> 16) & 0xff;
+                if (gray < threshold) {
+                    binaryImage.setRGB(x, y, Color.BLACK.getRGB());
+                } else {
+                    binaryImage.setRGB(x, y, Color.WHITE.getRGB());
+                }
+            }
+        }
+        return binaryImage;
+    }
+
+    /**
+     * 瀵瑰浘鐗囪繘琛岄攼鍖栧鐞�
+     * @param image 杈撳叆鐨勫浘鐗�
+     * @return 閿愬寲鍚庣殑鍥剧墖
+     */
+    private static BufferedImage applySharpening(BufferedImage image) {
+        float[] sharpenMatrix = {
+                0f, -1f, 0f,
+                -1f, 5f, -1f,
+                0f, -1f, 0f
+        };
+        java.awt.image.Kernel kernel = new java.awt.image.Kernel(3, 3, sharpenMatrix);
+        java.awt.image.ConvolveOp op = new java.awt.image.ConvolveOp(kernel, java.awt.image.ConvolveOp.EDGE_NO_OP, null);
+        return op.filter(image, null);
+    }
+
+    public static Object readPngFile1(File file) throws IOException, TesseractException {
+        // 鑾峰彇 tessdata 鐩綍鐨勭粷瀵硅矾寰�
+        String arch = System.getProperty("sun.arch.data.model");
+        File tessDataDir;
+        if (arch.contains("32")) {
+            tessDataDir = FileUtil.file(".", "/jre_32/tessdata");
+        } else {
+            tessDataDir = FileUtil.file(".", "/jre_64/tessdata");
+        }
+        String path = tessDataDir.getCanonicalPath();
+        // 妫�鏌� chi_sim.traineddata 鏂囦欢鏄惁瀛樺湪
+        File chiSimFile = new File(path, "chi_sim.traineddata");
+        if (!chiSimFile.exists()) {
+            throw new FileNotFoundException("chi_sim.traineddata 鏂囦欢鏈壘鍒帮紝璇锋鏌ヨ矾寰�: " + chiSimFile.getAbsolutePath());
+        }
+        // 璁剧疆閰嶇疆鏂囦欢澶广�佽瘑鍒瑷�銆佽瘑鍒ā寮�
+        Tesseract tesseract = new Tesseract();
+        tesseract.setDatapath(path);
+        // 璁剧疆璇嗗埆璇█涓轰腑鏂囩畝浣撳拰鑻辨枃锛堝鏋滆璁剧疆涓鸿嫳鏂囧彲鏀逛负 "eng"锛�
+        tesseract.setLanguage("chi_sim+eng");
+        // 浣跨敤 OSD 杩涜鑷姩椤甸潰鍒嗗壊浠ヨ繘琛屽浘鍍忓鐞�
+        tesseract.setPageSegMode(1);
+        // 璁剧疆寮曟搸妯″紡鏄缁忕綉缁� LSTM 寮曟搸
+        tesseract.setOcrEngineMode(1);
+        // 寮�濮嬭瘑鍒暣寮犲浘鐗囦腑鐨勬枃瀛�
+        return tesseract.doOCR(file);
+    }
+
+
+
+
     public static Object readTxtFile(File file) throws IOException {
         FileInputStream fin = new FileInputStream(file);
         InputStreamReader reader = new InputStreamReader(fin);
@@ -82,6 +293,7 @@
     }
 
     public static Object readCsvFile(File file) {
+
         StringBuilder stringBuilder = new StringBuilder();
         // 鍒涘缓 reader
         try (BufferedReader br = Files.newBufferedReader(file.toPath())) {
@@ -164,6 +376,36 @@
         } catch (Exception ignore) {
         }
     }
+    public static Object getmysqlFile(GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException {
+        Map<String, Object> tableMap = new HashMap<>(16);
+        // 鏁版嵁搴撹繛鎺ヤ俊鎭�
+        String url = "jdbc:mysql://localhost:3306/"+getFileDto.getDbFileName()+"?useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true";
+        String user = getFileDto.getDbUserName();
+        String password = getFileDto.getDbPassword();
+        List<ThicknessData> dataList = new ArrayList<>();
+
+        try (
+                // 寤虹珛杩炴帴
+                Connection connection = DriverManager.getConnection(url, user, password);
+                // 鍒涘缓 Statement 瀵硅薄鎵ц SQL
+                Statement statement = connection.createStatement()
+        ) {
+            String sql = "SELECT ThinnestPoint, AverageThickness FROM model1records";
+            ResultSet resultSet = statement.executeQuery(sql);
+
+            // 閬嶅巻缁撴灉闆嗚幏鍙栨暟鎹�
+            while (resultSet.next()) {
+                double thinnestPoint = resultSet.getDouble("ThinnestPoint");
+                double averageThickness = resultSet.getDouble("AverageThickness");
+                dataList.add(new ThicknessData(thinnestPoint, averageThickness));
+            }
+            tableMap.put("data", dataList);
+        } catch (Exception e) {
+            e.printStackTrace();
+            return R.failed("鏁版嵁搴撴煡璇㈠嚭閿�: " + e.getMessage());
+        }
+        return tableMap;
+    }
 
     public static Object readDbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException {
         Map<String, Object> tableMap = new HashMap<>(16);

--
Gitblit v1.9.3