From 3b258a2fd69dd2c4fcf291773672cc154514569a Mon Sep 17 00:00:00 2001 From: zouyu <2723363702@qq.com> Date: 星期二, 22 七月 2025 17:10:53 +0800 Subject: [PATCH] 采集器.db与.mysql方法调用错误问题修复 --- src/main/java/com/chinaztt/mes/docx/util/TakeWords.java | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 161 insertions(+), 0 deletions(-) diff --git a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java index 9d791dd..2814371 100644 --- a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java +++ b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java @@ -2,6 +2,7 @@ import cn.hutool.core.io.FileUtil; import com.chinaztt.mes.docx.dto.GetFileDto; +import com.chinaztt.mes.docx.dto.ThicknessData; import com.opencsv.CSVReader; import com.opencsv.CSVReaderBuilder; import com.opencsv.exceptions.CsvValidationException; @@ -11,8 +12,12 @@ import org.apache.commons.lang3.StringUtils; import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLTextExtractor; +import org.apache.poi.hssf.usermodel.HSSFPictureData; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; @@ -76,6 +81,129 @@ return result.toString(); } + public static Object readExcelxlsFile(File file) throws IOException { + StringBuilder result = new StringBuilder(); + try (FileInputStream fis = new FileInputStream(file); + Workbook workbook = new HSSFWorkbook(fis)) { + // 鑾峰彇绗竴涓伐浣滆〃 + Sheet sheet = workbook.getSheetAt(0); + // 璇诲彇鍥剧墖淇℃伅 + if (workbook instanceof HSSFWorkbook) { + HSSFWorkbook hssfWorkbook = (HSSFWorkbook) workbook; + List<HSSFPictureData> pictures = hssfWorkbook.getAllPictures(); + for (HSSFPictureData picture : pictures) { + // 鑾峰彇鍥剧墖绫诲瀷 + String pictureType = picture.suggestFileExtension(); + // 鑾峰彇鍥剧墖鏁版嵁 + byte[] pictureData = picture.getData(); + // 鍒涘缓涓存椂鏂囦欢 + File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType); + try (FileOutputStream fos = new FileOutputStream(tempFile)) { + fos.write(pictureData); + } + // 鍥剧墖棰勫鐞� +// File processedFile = preprocessImage(tempFile, pictureType); + // 璋冪敤 readPngFile1 鏂规硶璇诲彇鍥剧墖鏂囧瓧淇℃伅 + String ocrResult = ""; + try { + ocrResult = (String) readPngFile1(tempFile); +// ocrResult = (String) readPngFile(tempFile); +// ocrResult = (String) readPngFile(processedFile); + } catch (TesseractException e) { + ocrResult = "OCR璇嗗埆澶辫触: " + e.getMessage(); + } finally { + // 鍒犻櫎涓存椂鏂囦欢 + tempFile.delete(); +// processedFile.delete(); + } + + // 灏嗗浘鐗囦俊鎭坊鍔犲埌缁撴灉涓� +// result.append("Picture Type: ").append(pictureType) +// .append(", Picture Size: ").append(pictureData.length) +// .append(" bytes") +// .append(", OCR Result: ").append(ocrResult) +// .append(","); + String ocrText = fixOcrText(ocrResult); + result.append("OCR Result:").append(ocrText).append(","); + } + } +// +// // 閬嶅巻姣忎竴琛� +// for (Row row : sheet) { +// // 閬嶅巻姣忎竴鍒� +// for (Cell cell : row) { +// CellType cellType = CellType.forInt(cell.getCellType()); +// switch (cellType) { +// case STRING: +// result.append(cell.getStringCellValue()); +// break; +// case NUMERIC: +// if (DateUtil.isCellDateFormatted(cell)) { +// result.append(cell.getDateCellValue()); +// } else { +// result.append(cell.getNumericCellValue()); +// } +// break; +// case BOOLEAN: +// result.append(cell.getBooleanCellValue()); +// break; +// case FORMULA: +// result.append(cell.getCellFormula()); +// break; +// default: +// result.append(""); +// } +// result.append("\t"); +// } +// result.append("\n"); +// } + } + return result; + + } + + // 淇 OCR 璇嗗埆鏂囨湰涓殑閿欒鍏抽敭璇� + public static String fixOcrText(String ocrText) { + // 瀹氫箟閿欒鍏抽敭璇嶅拰姝g‘鍐呭鐨勬槧灏勶紝杩欓噷澶勭悊鈥滃嚮 瀹� 寮� 搴炩�濅慨姝d负鈥滃嚮绌垮己搴︹�� + // 鑰冭檻鍒板彲鑳芥湁绌烘牸鍒嗛殧锛岀敤姝e垯鍖归厤鍖呭惈杩欎簺瀛楃殑鍐呭 + ocrText = ocrText.replaceAll("鍑籠\s*瀹嘰\s*寮篭\s*搴�", "鍑荤┛寮哄害"); + // 杩樺彲浠ョ户缁坊鍔犲叾浠栭敊璇慨姝o紝姣斿涓嬮潰鍋囪鈥滅數 鍘� \\(HV\\)鈥濋噷鐨勭┖鏍煎奖鍝嶏紝涔熶慨姝d笅 + ocrText = ocrText.replaceAll("鐢礬\s*鍘媆\s*\\(HV\\)", "鐢靛帇(KV)"); + ocrText = ocrText.replaceAll("鐢礬\s*娴乗\s*\\(nt\\)", "鐢垫祦(mA)"); + return ocrText; + } + + public static Object readPngFile1(File file) throws IOException, TesseractException { + // 鑾峰彇 tessdata 鐩綍鐨勭粷瀵硅矾寰� + String arch = System.getProperty("sun.arch.data.model"); + File tessDataDir; + if (arch.contains("32")) { + tessDataDir = FileUtil.file(".", "/jre_32/tessdata"); + } else { + tessDataDir = FileUtil.file(".", "/jre_64/tessdata"); + } + String path = tessDataDir.getCanonicalPath(); + // 妫�鏌� chi_sim.traineddata 鏂囦欢鏄惁瀛樺湪 + File chiSimFile = new File(path, "chi_sim.traineddata"); + if (!chiSimFile.exists()) { + throw new FileNotFoundException("chi_sim.traineddata 鏂囦欢鏈壘鍒帮紝璇锋鏌ヨ矾寰�: " + chiSimFile.getAbsolutePath()); + } + // 璁剧疆閰嶇疆鏂囦欢澶广�佽瘑鍒瑷�銆佽瘑鍒ā寮� + Tesseract tesseract = new Tesseract(); + tesseract.setDatapath(path); + // 璁剧疆璇嗗埆璇█涓轰腑鏂囩畝浣撳拰鑻辨枃锛堝鏋滆璁剧疆涓鸿嫳鏂囧彲鏀逛负 "eng"锛� + tesseract.setLanguage("chi_sim+eng"); + // 浣跨敤 OSD 杩涜鑷姩椤甸潰鍒嗗壊浠ヨ繘琛屽浘鍍忓鐞� + tesseract.setPageSegMode(1); + // 璁剧疆寮曟搸妯″紡鏄缁忕綉缁� LSTM 寮曟搸 + tesseract.setOcrEngineMode(1); + // 寮�濮嬭瘑鍒暣寮犲浘鐗囦腑鐨勬枃瀛� + return tesseract.doOCR(file); + } + + + + public static Object readTxtFile(File file) throws IOException { FileInputStream fin = new FileInputStream(file); InputStreamReader reader = new InputStreamReader(fin); @@ -90,6 +218,7 @@ } public static Object readCsvFile(File file) { + StringBuilder stringBuilder = new StringBuilder(); // 鍒涘缓 reader // try (BufferedReader br = Files.newBufferedReader(file.toPath())) { @@ -203,6 +332,36 @@ } catch (Exception ignore) { } } + public static Object getmysqlFile(GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException { + Map<String, Object> tableMap = new HashMap<>(16); + // 鏁版嵁搴撹繛鎺ヤ俊鎭� + String url = "jdbc:mysql://localhost:3306/"+getFileDto.getDbFileName()+"?useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true"; + String user = getFileDto.getDbUserName(); + String password = getFileDto.getDbPassword(); + List<ThicknessData> dataList = new ArrayList<>(); + + try ( + // 寤虹珛杩炴帴 + Connection connection = DriverManager.getConnection(url, user, password); + // 鍒涘缓 Statement 瀵硅薄鎵ц SQL + Statement statement = connection.createStatement() + ) { + String sql = "SELECT ThinnestPoint, AverageThickness FROM model1records"; + ResultSet resultSet = statement.executeQuery(sql); + + // 閬嶅巻缁撴灉闆嗚幏鍙栨暟鎹� + while (resultSet.next()) { + double thinnestPoint = resultSet.getDouble("ThinnestPoint"); + double averageThickness = resultSet.getDouble("AverageThickness"); + dataList.add(new ThicknessData(thinnestPoint, averageThickness)); + } + tableMap.put("data", dataList); + } catch (Exception e) { + e.printStackTrace(); + return R.failed("鏁版嵁搴撴煡璇㈠嚭閿�: " + e.getMessage()); + } + return tableMap; + } public static Object readDbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException { Map<String, Object> tableMap = new HashMap<>(16); @@ -263,6 +422,8 @@ } else { path = canonicalPath64.replaceAll("/chi_sim.traineddata", "").replaceAll("\\\\", "/"); } + // 璁剧疆 TESSDATA_PREFIX 鐜鍙橀噺 +// System.setProperty("TESSDATA_PREFIX", path); //璁剧疆閰嶇疆鏂囦欢澶瑰井瑙嗐�佽瘑鍒瑷�銆佽瘑鍒ā寮� Tesseract tesseract = new Tesseract(); tesseract.setDatapath(path); -- Gitblit v1.9.3