From 3b258a2fd69dd2c4fcf291773672cc154514569a Mon Sep 17 00:00:00 2001
From: zouyu <2723363702@qq.com>
Date: 星期二, 22 七月 2025 17:10:53 +0800
Subject: [PATCH] 采集器.db与.mysql方法调用错误问题修复

---
 src/main/java/com/chinaztt/mes/docx/util/TakeWords.java |  160 +++++++++++++++++++---------------------------------
 1 files changed, 59 insertions(+), 101 deletions(-)

diff --git a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
index 39ba107..2814371 100644
--- a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
+++ b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
@@ -3,37 +3,38 @@
 import cn.hutool.core.io.FileUtil;
 import com.chinaztt.mes.docx.dto.GetFileDto;
 import com.chinaztt.mes.docx.dto.ThicknessData;
+import com.opencsv.CSVReader;
+import com.opencsv.CSVReaderBuilder;
+import com.opencsv.exceptions.CsvValidationException;
 import net.sourceforge.tess4j.Tesseract;
 import net.sourceforge.tess4j.TesseractException;
 import org.apache.commons.lang3.ObjectUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.poi.POIXMLDocument;
 import org.apache.poi.POIXMLTextExtractor;
-import org.apache.poi.hssf.usermodel.HSSFPicture;
 import org.apache.poi.hssf.usermodel.HSSFPictureData;
-import org.apache.poi.hssf.usermodel.HSSFSheet;
 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.hwpf.extractor.WordExtractor;
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.ss.usermodel.*;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
 import org.apache.poi.xssf.usermodel.XSSFSheet;
 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
-import java.awt.Color;
-import java.awt.image.BufferedImage;
-import javax.imageio.ImageIO;
 
 import java.io.*;
-import java.nio.file.Files;
 import java.sql.*;
 import java.util.*;
-import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 public class TakeWords {
 
     private static final String splitIdentifier = "@-@"; // 鑷畾涔夊敮涓�鏍囪瘑绗�
+
+    // 绉戝璁℃暟娉曟鍒欐ā寮�
+    private static final Pattern SCIENTIFIC_PATTERN = Pattern.compile(
+            "^[+-]?\\d+(\\.\\d+)?[eE][+-]?\\d+$"
+    );
 
     public static Object readWordFile(File file) {
         String result = "";
@@ -101,20 +102,19 @@
                         fos.write(pictureData);
                     }
                     // 鍥剧墖棰勫鐞�
-                    File processedFile = preprocessImage(tempFile, pictureType);
-//                        ocrResult = (String) readPngFile(tempFile);
-
+//                    File processedFile = preprocessImage(tempFile, pictureType);
                     // 璋冪敤 readPngFile1 鏂规硶璇诲彇鍥剧墖鏂囧瓧淇℃伅
                     String ocrResult = "";
                     try {
                         ocrResult = (String) readPngFile1(tempFile);
-//                        ocrResult = (String) readPngFile1(processedFile);
+//                        ocrResult = (String) readPngFile(tempFile);
+//                        ocrResult = (String) readPngFile(processedFile);
                     } catch (TesseractException e) {
                         ocrResult = "OCR璇嗗埆澶辫触: " + e.getMessage();
                     } finally {
                         // 鍒犻櫎涓存椂鏂囦欢
                         tempFile.delete();
-                        processedFile.delete();
+//                        processedFile.delete();
                     }
 
                     // 灏嗗浘鐗囦俊鎭坊鍔犲埌缁撴灉涓�
@@ -173,81 +173,6 @@
         return ocrText;
     }
 
-    /**
-     * 瀵瑰浘鐗囪繘琛岄澶勭悊锛屽寘鎷伆搴﹀寲銆佷簩鍊煎寲鍜岄攼鍖�
-     * @param inputFile 杈撳叆鐨勫浘鐗囨枃浠�
-     * @param formatName 鍥剧墖鏍煎紡鍚嶇О
-     * @return 澶勭悊鍚庣殑鍥剧墖鏂囦欢
-     * @throws IOException 璇诲彇鎴栧啓鍏ュ浘鐗囨椂鍙兘鎶涘嚭鐨勫紓甯�
-     */
-    private static File preprocessImage(File inputFile, String formatName) throws IOException {
-        // 璇诲彇鍥剧墖
-        BufferedImage image = ImageIO.read(inputFile);
-
-        // 鐏板害鍖�
-        image = convertToGrayscale(image);
-        // 浜屽�煎寲
-        image = applyThreshold(image, 128);
-        // 閿愬寲
-        image = applySharpening(image);
-
-        // 鍒涘缓澶勭悊鍚庣殑涓存椂鏂囦欢
-        File outputFile = File.createTempFile(UUID.randomUUID().toString(), "." + formatName);
-        ImageIO.write(image, formatName, outputFile);
-        return outputFile;
-    }
-
-    /**
-     * 灏嗗浘鐗囪浆鎹负鐏板害鍥�
-     * @param image 杈撳叆鐨勫浘鐗�
-     * @return 鐏板害鍖栧悗鐨勫浘鐗�
-     */
-    private static BufferedImage convertToGrayscale(BufferedImage image) {
-        BufferedImage grayImage = new BufferedImage(
-                image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
-        grayImage.getGraphics().drawImage(image, 0, 0, null);
-        return grayImage;
-    }
-
-    /**
-     * 瀵瑰浘鐗囪繘琛屼簩鍊煎寲澶勭悊
-     * @param image 杈撳叆鐨勫浘鐗�
-     * @param threshold 浜屽�煎寲闃堝��
-     * @return 浜屽�煎寲鍚庣殑鍥剧墖
-     */
-    private static BufferedImage applyThreshold(BufferedImage image, int threshold) {
-        BufferedImage binaryImage = new BufferedImage(
-                image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY);
-        for (int y = 0; y < image.getHeight(); y++) {
-            for (int x = 0; x < image.getWidth(); x++) {
-                int rgb = image.getRGB(x, y);
-                int gray = (rgb >> 16) & 0xff;
-                if (gray < threshold) {
-                    binaryImage.setRGB(x, y, Color.BLACK.getRGB());
-                } else {
-                    binaryImage.setRGB(x, y, Color.WHITE.getRGB());
-                }
-            }
-        }
-        return binaryImage;
-    }
-
-    /**
-     * 瀵瑰浘鐗囪繘琛岄攼鍖栧鐞�
-     * @param image 杈撳叆鐨勫浘鐗�
-     * @return 閿愬寲鍚庣殑鍥剧墖
-     */
-    private static BufferedImage applySharpening(BufferedImage image) {
-        float[] sharpenMatrix = {
-                0f, -1f, 0f,
-                -1f, 5f, -1f,
-                0f, -1f, 0f
-        };
-        java.awt.image.Kernel kernel = new java.awt.image.Kernel(3, 3, sharpenMatrix);
-        java.awt.image.ConvolveOp op = new java.awt.image.ConvolveOp(kernel, java.awt.image.ConvolveOp.EDGE_NO_OP, null);
-        return op.filter(image, null);
-    }
-
     public static Object readPngFile1(File file) throws IOException, TesseractException {
         // 鑾峰彇 tessdata 鐩綍鐨勭粷瀵硅矾寰�
         String arch = System.getProperty("sun.arch.data.model");
@@ -296,21 +221,52 @@
 
         StringBuilder stringBuilder = new StringBuilder();
         // 鍒涘缓 reader
-        try (BufferedReader br = Files.newBufferedReader(file.toPath())) {
-            // CSV鏂囦欢鐨勫垎闅旂
-            String DELIMITER = ",";
-            // 鎸夎璇诲彇
-            String line;
-            while ((line = br.readLine()) != null) {
-                // 鍒嗗壊
-                String[] columns = line.split(DELIMITER);
-                // 鎵撳嵃琛�
-                stringBuilder.append(String.join(splitIdentifier, columns)).append("\n");
+//        try (BufferedReader br = Files.newBufferedReader(file.toPath())) {
+//            // CSV鏂囦欢鐨勫垎闅旂
+//            String DELIMITER = ",";
+//            // 鎸夎璇诲彇
+//            String line;
+//            System.out.println(br.readLine());
+//            while ((line = br.readLine()) != null) {
+//                // 鍒嗗壊
+//                String[] columns = line.split(DELIMITER);
+//                // 鎵撳嵃琛�
+//                stringBuilder.append(String.join(splitIdentifier, columns)).append("\n");
+//            }
+//        } catch (IOException ex) {
+//            ex.printStackTrace();
+//        }
+        try (FileReader fileReader = new FileReader(file);
+             CSVReader csvReader = new CSVReaderBuilder(fileReader).build()) {
+
+            String[] nextLine;
+            while ((nextLine = csvReader.readNext()) != null) {
+                // 澶勭悊姣忎竴琛屾暟鎹�
+                for (String cell : nextLine) {
+                    if(StringUtils.isNotBlank(cell)){
+                        stringBuilder.append(scientificToNumber(cell)).append(splitIdentifier);
+                    }
+                }
+                stringBuilder.append("\n");
             }
-        } catch (IOException ex) {
-            ex.printStackTrace();
+        } catch (IOException e) {
+            e.printStackTrace();
+        } catch (CsvValidationException e) {
+            throw new RuntimeException(e);
         }
         return stringBuilder.toString();
+    }
+
+    /**
+     * 灏嗙瀛﹁鏁版硶杞崲涓烘暟瀛�
+     * @param cell
+     * @return
+     */
+    public static String scientificToNumber(String cell){
+        if(SCIENTIFIC_PATTERN.matcher(cell).matches()){
+            return String.valueOf(Double.parseDouble(cell));
+        }
+        return cell;
     }
 
     public static Object readMdbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException {
@@ -466,6 +422,8 @@
         } else {
             path = canonicalPath64.replaceAll("/chi_sim.traineddata", "").replaceAll("\\\\", "/");
         }
+        // 璁剧疆 TESSDATA_PREFIX 鐜鍙橀噺
+//        System.setProperty("TESSDATA_PREFIX", path);
         //璁剧疆閰嶇疆鏂囦欢澶瑰井瑙嗐�佽瘑鍒瑷�銆佽瘑鍒ā寮�
         Tesseract tesseract = new Tesseract();
         tesseract.setDatapath(path);

--
Gitblit v1.9.3