From 866a3e6cbd2df9841dfbbd733e1128938cef3e00 Mon Sep 17 00:00:00 2001
From: zouyu <2723363702@qq.com>
Date: 星期二, 09 十二月 2025 17:45:16 +0800
Subject: [PATCH] 采集器调整

---
 src/main/java/com/chinaztt/mes/docx/util/TakeWords.java |  224 ++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 192 insertions(+), 32 deletions(-)

diff --git a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
index 9d791dd..58fb34a 100644
--- a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
+++ b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java
@@ -1,6 +1,8 @@
 package com.chinaztt.mes.docx.util;
 
 import cn.hutool.core.io.FileUtil;
+import cn.hutool.http.HttpRequest;
+import cn.hutool.json.JSONUtil;
 import com.chinaztt.mes.docx.dto.GetFileDto;
 import com.opencsv.CSVReader;
 import com.opencsv.CSVReaderBuilder;
@@ -9,12 +11,16 @@
 import net.sourceforge.tess4j.TesseractException;
 import org.apache.commons.lang3.ObjectUtils;
 import org.apache.commons.lang3.StringUtils;
-import org.apache.poi.POIXMLDocument;
-import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.hssf.usermodel.HSSFPictureData;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.ooxml.POIXMLDocument;
+import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
 import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.xssf.usermodel.XSSFSheet;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.ss.usermodel.WorkbookFactory;
 import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
 
 import java.io.*;
@@ -52,19 +58,22 @@
         return result;
     }
 
-    public static Object readExcelFile(File file) throws FileNotFoundException, IOException {
+    /**
+     * 璇诲彇excel鏂囦欢,鍏煎.xlsx,.xls鏍煎紡
+     * @param file
+     * @return
+     */
+    public static Object readExcelFile(File file) {
         StringBuilder result = new StringBuilder();
-        //鍒涘缓宸ヤ綔绨垮璞�
-        XSSFWorkbook xssfWorkbook = new XSSFWorkbook(new FileInputStream(file));
-        //鑾峰彇宸ヤ綔绨夸笅sheet鐨勪釜鏁� 鍙鍙栫涓�涓猻heet
-//            int sheetNum = xssfWorkbook.getNumberOfSheets();
-        //閬嶅巻宸ヤ綔绨夸腑鐨勬墍鏈夋暟鎹�
-        for (int i = 0; i < 1; i++) {
-            XSSFSheet sheet = xssfWorkbook.getSheetAt(i);
+        try (
+                FileInputStream fis = new FileInputStream(file);
+                Workbook workbook = WorkbookFactory.create(fis)
+        ) {
+            //閬嶅巻宸ヤ綔绨夸腑鐨勬墍鏈夋暟鎹�
+            Sheet sheet = workbook.getSheetAt(0);
             //鑾峰彇鏈�鍚庝竴琛岀殑num锛屽嵆鎬昏鏁般�傛澶勪粠0寮�濮�
             int maxRow = sheet.getLastRowNum();
             for (int row = 0; row <= maxRow; row++) {
-                //鑾峰彇鏈�鍚庡崟鍏冩牸num锛屽嵆鎬诲崟鍏冩牸鏁� ***娉ㄦ剰锛氭澶勪粠1寮�濮嬭鏁�***
                 int maxRol = sheet.getRow(row).getLastCellNum();
                 StringBuilder aLine = new StringBuilder();
                 for (int rol = 0; rol < maxRol; rol++) {
@@ -72,8 +81,79 @@
                 }
                 result.append(aLine).append("\n");
             }
+        } catch (Exception e) {
+            e.printStackTrace();
         }
         return result.toString();
+    }
+
+    /**
+     * 璇诲彇excel鏂囦欢涓殑鍥剧墖鍐呭
+     * @param file
+     * @return
+     * @throws IOException
+     */
+    public static Object readPngContextInExcel(File file) throws IOException {
+        String result = "";
+        try (FileInputStream fis = new FileInputStream(file);
+             Workbook workbook = new HSSFWorkbook(fis)) {
+            // 鑾峰彇绗竴涓伐浣滆〃
+            Sheet sheet = workbook.getSheetAt(0);
+            // 璇诲彇鍥剧墖淇℃伅
+            if (workbook instanceof HSSFWorkbook) {
+                HSSFWorkbook hssfWorkbook = (HSSFWorkbook) workbook;
+                List<HSSFPictureData> pictures = hssfWorkbook.getAllPictures();
+                //澶勭悊鏈�鍚庝竴寮犲浘鐗囨暟鎹�
+                HSSFPictureData lastPicture = pictures.get(pictures.size()-1);
+                // 鑾峰彇鍥剧墖绫诲瀷
+                String pictureType = lastPicture.suggestFileExtension();
+                // 鑾峰彇鍥剧墖鏁版嵁
+                byte[] pictureData = lastPicture.getData();
+                // 鍒涘缓涓存椂鏂囦欢
+                File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType);
+                try (FileOutputStream fos = new FileOutputStream(tempFile)) {
+                    fos.write(pictureData);
+                }
+                String ocrResult;
+                try {
+                    ocrResult = ocrImageContext(tempFile.getAbsolutePath());
+                } finally {
+                    // 鍒犻櫎涓存椂鏂囦欢
+                    tempFile.delete();
+                }
+                result = ocrResult;
+            }
+        }
+        return result;
+
+    }
+
+    public static Object readPngFile1(File file) throws IOException, TesseractException {
+        // 鑾峰彇 tessdata 鐩綍鐨勭粷瀵硅矾寰�
+        String arch = System.getProperty("sun.arch.data.model");
+        File tessDataDir;
+        if (arch.contains("32")) {
+            tessDataDir = FileUtil.file(".", "/jre_32/tessdata");
+        } else {
+            tessDataDir = FileUtil.file(".", "/jre_64/tessdata");
+        }
+        String path = tessDataDir.getCanonicalPath();
+        // 妫�鏌� chi_sim.traineddata 鏂囦欢鏄惁瀛樺湪
+        File chiSimFile = new File(path, "chi_sim.traineddata");
+        if (!chiSimFile.exists()) {
+            throw new FileNotFoundException("chi_sim.traineddata 鏂囦欢鏈壘鍒帮紝璇锋鏌ヨ矾寰�: " + chiSimFile.getAbsolutePath());
+        }
+        // 璁剧疆閰嶇疆鏂囦欢澶广�佽瘑鍒瑷�銆佽瘑鍒ā寮�
+        Tesseract tesseract = new Tesseract();
+        tesseract.setDatapath(path);
+        // 璁剧疆璇嗗埆璇█涓轰腑鏂囩畝浣撳拰鑻辨枃锛堝鏋滆璁剧疆涓鸿嫳鏂囧彲鏀逛负 "eng"锛�
+        tesseract.setLanguage("chi_sim+eng");
+        // 浣跨敤 OSD 杩涜鑷姩椤甸潰鍒嗗壊浠ヨ繘琛屽浘鍍忓鐞�
+        tesseract.setPageSegMode(1);
+        // 璁剧疆寮曟搸妯″紡鏄缁忕綉缁� LSTM 寮曟搸
+        tesseract.setOcrEngineMode(1);
+        // 寮�濮嬭瘑鍒暣寮犲浘鐗囦腑鐨勬枃瀛�
+        return tesseract.doOCR(file);
     }
 
     public static Object readTxtFile(File file) throws IOException {
@@ -83,6 +163,7 @@
         StringBuilder stringBuilder = new StringBuilder();
         String strTmp = "";
         while ((strTmp = buffReader.readLine()) != null) {
+            strTmp = strTmp.replaceAll("\t",",");
             stringBuilder.append(strTmp).append("\n");
         }
         buffReader.close();
@@ -90,23 +171,9 @@
     }
 
     public static Object readCsvFile(File file) {
+
         StringBuilder stringBuilder = new StringBuilder();
         // 鍒涘缓 reader
-//        try (BufferedReader br = Files.newBufferedReader(file.toPath())) {
-//            // CSV鏂囦欢鐨勫垎闅旂
-//            String DELIMITER = ",";
-//            // 鎸夎璇诲彇
-//            String line;
-//            System.out.println(br.readLine());
-//            while ((line = br.readLine()) != null) {
-//                // 鍒嗗壊
-//                String[] columns = line.split(DELIMITER);
-//                // 鎵撳嵃琛�
-//                stringBuilder.append(String.join(splitIdentifier, columns)).append("\n");
-//            }
-//        } catch (IOException ex) {
-//            ex.printStackTrace();
-//        }
         try (FileReader fileReader = new FileReader(file);
              CSVReader csvReader = new CSVReaderBuilder(fileReader).build()) {
 
@@ -145,8 +212,8 @@
         Properties prop = new Properties();
         //璁剧疆缂栫爜
         prop.put("charSet", "UTF-8");
-        prop.put("user", "");
-        prop.put("password", "");
+        prop.put("user",  StringUtils.isNotBlank(getFileDto.getDbUserName())?getFileDto.getDbUserName():"");
+        prop.put("password", StringUtils.isNotBlank(getFileDto.getDbPassword())?getFileDto.getDbPassword():"");
         //鏁版嵁鍦板潃
         String dbUrl = "jdbc:ucanaccess://" + file.getPath();
         //寮曞叆椹卞姩
@@ -160,8 +227,6 @@
         try {
             List<Object> list = new ArrayList<>();
             //閬嶅巻鑾峰彇澶氬紶琛ㄦ暟鎹�
-//            String s = "select * from " + getFileDto.getDbFileName() + " where 1=1" + getFileDto.getMdbEntrustCode() + " = '" + getFileDto.getEntrustCode() +
-//                    "' and " + getFileDto.getMdbSampleCode() + " = '" + getFileDto.getSampleCode() + "'";
             String s = "select * from " + getFileDto.getDbFileName() + " where 1=1";
             if(StringUtils.isNotBlank(getFileDto.getMdbEntrustCode())){
                 s+=" and " + getFileDto.getMdbEntrustCode() + " = '" + getFileDto.getEntrustCode()+ "'";
@@ -169,6 +234,10 @@
             if(StringUtils.isNotBlank(getFileDto.getMdbSampleCode())){
                 s+=" and " + getFileDto.getMdbSampleCode() + " = '" + getFileDto.getSampleCode() + "'";
             }
+            if(StringUtils.isNotBlank(getFileDto.getBushingColorField())){
+                s+=" and " + getFileDto.getBushingColorField() + " = '" + getFileDto.getBushingColor() + "'";
+            }
+
             preparedStatement = conn.prepareStatement(s);
             rs = preparedStatement.executeQuery();
             ResultSetMetaData data = rs.getMetaData();
@@ -183,6 +252,7 @@
             }
             tableMap.put("data", list);
         } catch (Exception e) {
+            e.printStackTrace();
         } finally {
             closeA1l(conn, preparedStatement, rs);
         }
@@ -202,6 +272,81 @@
             }
         } catch (Exception ignore) {
         }
+    }
+
+    public static Object getMysqlFile(GetFileDto getFileDto){
+        Map<String, Object> tableMap = new HashMap<>(16);
+        // 浠� GetFileDto 鑾峰彇鏁版嵁搴撳悕锛屽搴斻�愭枃浠跺悕绉般�戝瓧娈�
+        String dbName = getFileDto.getDbFileName();
+        String user = getFileDto.getDbUserName();
+        String password = getFileDto.getDbPassword();
+        // 浠� GetFileDto 鑾峰彇鏁版嵁琛ㄥ悕锛屽搴斻�愭暟鎹簱琛ㄥ悕銆戝瓧娈�
+        String table = getFileDto.getDbTable();
+        // 妫�鏌ユ暟鎹簱鍚嶅拰琛ㄥ悕鏄惁涓虹┖
+        if (dbName == null || dbName.isEmpty() || table == null || table.isEmpty()) {
+            return R.failed("鏁版嵁搴撳悕鎴栬〃鍚嶄笉鑳戒负绌�");
+        }
+        // 鏁版嵁搴撹繛鎺ヤ俊鎭�
+        String url = "jdbc:mysql://localhost:3306/"+dbName+"?useSSL=false&serverTimezone=GMT%2B8&allowPublicKeyRetrieval=true&characterEncoding=utf8";
+        Connection connection = null;
+        PreparedStatement preparedStatement = null;
+        ResultSet resultSet = null;
+        List<Map<String, Object>> dataList = new ArrayList<>();
+
+        try {
+            // 寤虹珛杩炴帴
+            connection = DriverManager.getConnection(url, user, password);
+            // 鏋勫缓鍩虹 SQL
+
+            String sql = "SELECT * FROM "+table+" WHERE 1=1";
+            if(StringUtils.isNotBlank(getFileDto.getMdbEntrustCode()) ){
+                sql+=" AND (" + getFileDto.getMdbEntrustCode() + " = TRIM('" + getFileDto.getEntrustCode()+ "')";
+                if(StringUtils.isNotBlank(getFileDto.getLotBatchNo())){
+                    sql+=" OR "+ getFileDto.getMdbEntrustCode() + " = TRIM('" + getFileDto.getLotBatchNo()+ "')";
+                }
+                sql+=")";
+            }
+            if(StringUtils.isNotBlank(getFileDto.getMdbSampleCode())){
+                sql+=" AND " + getFileDto.getMdbSampleCode() + " = '" + getFileDto.getSampleCode() + "'";
+            }
+            if(StringUtils.isNotBlank(getFileDto.getCableTag())){
+                sql+=" AND Color = '" + getFileDto.getCableTag() + "'";
+            }
+            // 鍒涘缓 PreparedStatement 瀵硅薄鎵ц SQL
+            preparedStatement = connection.prepareStatement(sql);
+            resultSet = preparedStatement.executeQuery();
+            ResultSetMetaData metaData = resultSet.getMetaData();
+            int columnCount = metaData.getColumnCount();
+            // 閬嶅巻缁撴灉闆嗚幏鍙栨暟鎹�
+            while (resultSet.next()) {
+                Map<String, Object> rowData = new HashMap<>();
+                for (int i = 1; i <= columnCount; i++) {
+                    String columnName = metaData.getColumnName(i);
+                    rowData.put(columnName, resultSet.getObject(i));
+                }
+                dataList.add(rowData);
+            }
+            tableMap.put("data", dataList);
+        } catch (Exception e) {
+            e.printStackTrace();
+            // 鍋囪 R 绫绘湁 failed 鏂规硶锛岃嫢娌℃湁闇�琛ュ厖瀹炵幇
+            return R.failed("鏁版嵁搴撴煡璇㈠嚭閿�: " + e.getMessage());
+        } finally {
+            try {
+                if (resultSet != null) {
+                    resultSet.close();
+                }
+                if (preparedStatement != null) {
+                    preparedStatement.close();
+                }
+                if (connection != null) {
+                    connection.close();
+                }
+            } catch (SQLException e) {
+                e.printStackTrace();
+            }
+        }
+        return tableMap;
     }
 
     public static Object readDbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException {
@@ -252,6 +397,19 @@
         return tableMap;
     }
 
+    /**
+     * ocr璇嗗埆鍥剧墖鍐呭
+     * @param imagePath 鍥剧墖璺緞
+     * @return
+     */
+    public static String ocrImageContext(String imagePath){
+        //璋冪敤ocr璇嗗埆鏈嶅姟
+        Map<String,Object> jsonMap = new HashMap<>();
+        jsonMap.put("imagePath",imagePath);
+        String requestBody = JSONUtil.toJsonStr(jsonMap);
+        return HttpRequest.post("localhost:8080/ocr/recognize").body(requestBody).execute().body();
+    }
+
     public static Object readPngFile(File file) throws IOException, TesseractException {
         String canonicalPath32 = FileUtil.file(".", "/jre_32/tessdata").getCanonicalPath();
         String canonicalPath64 = FileUtil.file(".", "/jre_64/tessdata").getCanonicalPath();
@@ -263,6 +421,8 @@
         } else {
             path = canonicalPath64.replaceAll("/chi_sim.traineddata", "").replaceAll("\\\\", "/");
         }
+        // 璁剧疆 TESSDATA_PREFIX 鐜鍙橀噺
+//        System.setProperty("TESSDATA_PREFIX", path);
         //璁剧疆閰嶇疆鏂囦欢澶瑰井瑙嗐�佽瘑鍒瑷�銆佽瘑鍒ā寮�
         Tesseract tesseract = new Tesseract();
         tesseract.setDatapath(path);

--
Gitblit v1.9.3