From 9ff6b2fa38167d93de00e6d730d2b038d7731776 Mon Sep 17 00:00:00 2001 From: yaowanxin <3588231647@qq.com> Date: 星期四, 24 七月 2025 13:21:52 +0800 Subject: [PATCH] Merge remote-tracking branch 'origin/ywx' into ywx --- src/main/java/com/chinaztt/mes/docx/util/TakeWords.java | 204 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 204 insertions(+), 0 deletions(-) diff --git a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java index 9d791dd..c3469d4 100644 --- a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java +++ b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java @@ -5,21 +5,33 @@ import com.opencsv.CSVReader; import com.opencsv.CSVReaderBuilder; import com.opencsv.exceptions.CsvValidationException; +import com.chinaztt.mes.docx.dto.ThicknessData; import net.sourceforge.tess4j.Tesseract; import net.sourceforge.tess4j.TesseractException; import org.apache.commons.lang3.ObjectUtils; import org.apache.commons.lang3.StringUtils; import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLTextExtractor; +import org.apache.poi.hssf.usermodel.HSSFPicture; +import org.apache.poi.hssf.usermodel.HSSFPictureData; +import org.apache.poi.hssf.usermodel.HSSFSheet; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hwpf.extractor.WordExtractor; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.ss.usermodel.*; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; +import java.awt.Color; +import java.awt.image.BufferedImage; +import javax.imageio.ImageIO; import java.io.*; import java.sql.*; import java.util.*; +import java.util.regex.Pattern; +import java.util.regex.Matcher; import java.util.regex.Pattern; public class TakeWords { @@ -76,6 +88,129 @@ return result.toString(); } + public static Object readExcelxlsFile(File file) throws IOException { + StringBuilder result = new StringBuilder(); + try (FileInputStream fis = new FileInputStream(file); + Workbook workbook = new HSSFWorkbook(fis)) { + // 鑾峰彇绗竴涓伐浣滆〃 + Sheet sheet = workbook.getSheetAt(0); + // 璇诲彇鍥剧墖淇℃伅 + if (workbook instanceof HSSFWorkbook) { + HSSFWorkbook hssfWorkbook = (HSSFWorkbook) workbook; + List<HSSFPictureData> pictures = hssfWorkbook.getAllPictures(); + for (HSSFPictureData picture : pictures) { + // 鑾峰彇鍥剧墖绫诲瀷 + String pictureType = picture.suggestFileExtension(); + // 鑾峰彇鍥剧墖鏁版嵁 + byte[] pictureData = picture.getData(); + // 鍒涘缓涓存椂鏂囦欢 + File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType); + try (FileOutputStream fos = new FileOutputStream(tempFile)) { + fos.write(pictureData); + } + // 鍥剧墖棰勫鐞� +// File processedFile = preprocessImage(tempFile, pictureType); + // 璋冪敤 readPngFile1 鏂规硶璇诲彇鍥剧墖鏂囧瓧淇℃伅 + String ocrResult = ""; + try { + ocrResult = (String) readPngFile1(tempFile); +// ocrResult = (String) readPngFile(tempFile); +// ocrResult = (String) readPngFile(processedFile); + } catch (TesseractException e) { + ocrResult = "OCR璇嗗埆澶辫触: " + e.getMessage(); + } finally { + // 鍒犻櫎涓存椂鏂囦欢 + tempFile.delete(); +// processedFile.delete(); + } + + // 灏嗗浘鐗囦俊鎭坊鍔犲埌缁撴灉涓� +// result.append("Picture Type: ").append(pictureType) +// .append(", Picture Size: ").append(pictureData.length) +// .append(" bytes") +// .append(", OCR Result: ").append(ocrResult) +// .append(","); + String ocrText = fixOcrText(ocrResult); + result.append("OCR Result:").append(ocrText).append(","); + } + } +// +// // 閬嶅巻姣忎竴琛� +// for (Row row : sheet) { +// // 閬嶅巻姣忎竴鍒� +// for (Cell cell : row) { +// CellType cellType = CellType.forInt(cell.getCellType()); +// switch (cellType) { +// case STRING: +// result.append(cell.getStringCellValue()); +// break; +// case NUMERIC: +// if (DateUtil.isCellDateFormatted(cell)) { +// result.append(cell.getDateCellValue()); +// } else { +// result.append(cell.getNumericCellValue()); +// } +// break; +// case BOOLEAN: +// result.append(cell.getBooleanCellValue()); +// break; +// case FORMULA: +// result.append(cell.getCellFormula()); +// break; +// default: +// result.append(""); +// } +// result.append("\t"); +// } +// result.append("\n"); +// } + } + return result; + + } + + // 淇 OCR 璇嗗埆鏂囨湰涓殑閿欒鍏抽敭璇� + public static String fixOcrText(String ocrText) { + // 瀹氫箟閿欒鍏抽敭璇嶅拰姝g‘鍐呭鐨勬槧灏勶紝杩欓噷澶勭悊鈥滃嚮 瀹� 寮� 搴炩�濅慨姝d负鈥滃嚮绌垮己搴︹�� + // 鑰冭檻鍒板彲鑳芥湁绌烘牸鍒嗛殧锛岀敤姝e垯鍖归厤鍖呭惈杩欎簺瀛楃殑鍐呭 + ocrText = ocrText.replaceAll("鍑籠\s*瀹嘰\s*寮篭\s*搴�", "鍑荤┛寮哄害"); + // 杩樺彲浠ョ户缁坊鍔犲叾浠栭敊璇慨姝o紝姣斿涓嬮潰鍋囪鈥滅數 鍘� \\(HV\\)鈥濋噷鐨勭┖鏍煎奖鍝嶏紝涔熶慨姝d笅 + ocrText = ocrText.replaceAll("鐢礬\s*鍘媆\s*\\(HV\\)", "鐢靛帇(KV)"); + ocrText = ocrText.replaceAll("鐢礬\s*娴乗\s*\\(nt\\)", "鐢垫祦(mA)"); + return ocrText; + } + + public static Object readPngFile1(File file) throws IOException, TesseractException { + // 鑾峰彇 tessdata 鐩綍鐨勭粷瀵硅矾寰� + String arch = System.getProperty("sun.arch.data.model"); + File tessDataDir; + if (arch.contains("32")) { + tessDataDir = FileUtil.file(".", "/jre_32/tessdata"); + } else { + tessDataDir = FileUtil.file(".", "/jre_64/tessdata"); + } + String path = tessDataDir.getCanonicalPath(); + // 妫�鏌� chi_sim.traineddata 鏂囦欢鏄惁瀛樺湪 + File chiSimFile = new File(path, "chi_sim.traineddata"); + if (!chiSimFile.exists()) { + throw new FileNotFoundException("chi_sim.traineddata 鏂囦欢鏈壘鍒帮紝璇锋鏌ヨ矾寰�: " + chiSimFile.getAbsolutePath()); + } + // 璁剧疆閰嶇疆鏂囦欢澶广�佽瘑鍒瑷�銆佽瘑鍒ā寮� + Tesseract tesseract = new Tesseract(); + tesseract.setDatapath(path); + // 璁剧疆璇嗗埆璇█涓轰腑鏂囩畝浣撳拰鑻辨枃锛堝鏋滆璁剧疆涓鸿嫳鏂囧彲鏀逛负 "eng"锛� + tesseract.setLanguage("chi_sim+eng"); + // 浣跨敤 OSD 杩涜鑷姩椤甸潰鍒嗗壊浠ヨ繘琛屽浘鍍忓鐞� + tesseract.setPageSegMode(1); + // 璁剧疆寮曟搸妯″紡鏄缁忕綉缁� LSTM 寮曟搸 + tesseract.setOcrEngineMode(1); + // 寮�濮嬭瘑鍒暣寮犲浘鐗囦腑鐨勬枃瀛� + return tesseract.doOCR(file); + } + + + + public static Object readTxtFile(File file) throws IOException { FileInputStream fin = new FileInputStream(file); InputStreamReader reader = new InputStreamReader(fin); @@ -90,6 +225,7 @@ } public static Object readCsvFile(File file) { + StringBuilder stringBuilder = new StringBuilder(); // 鍒涘缓 reader // try (BufferedReader br = Files.newBufferedReader(file.toPath())) { @@ -203,6 +339,72 @@ } catch (Exception ignore) { } } + public static Object getMysqlFile(GetFileDto getFileDto){ + Map<String, Object> tableMap = new HashMap<>(16); + // 浠� GetFileDto 鑾峰彇鏁版嵁搴撳悕锛屽搴斻�愭枃浠跺悕绉般�戝瓧娈� + String dbName = getFileDto.getDbFileName(); + String user = getFileDto.getDbUserName(); + String password = getFileDto.getDbPassword(); + // 浠� GetFileDto 鑾峰彇鏁版嵁琛ㄥ悕锛屽搴斻�愭暟鎹簱琛ㄥ悕銆戝瓧娈� + String table = getFileDto.getDbTable(); + // 妫�鏌ユ暟鎹簱鍚嶅拰琛ㄥ悕鏄惁涓虹┖ + if (dbName == null || dbName.isEmpty() || table == null || table.isEmpty()) { + return R.failed("鏁版嵁搴撳悕鎴栬〃鍚嶄笉鑳戒负绌�"); + } + // 鏁版嵁搴撹繛鎺ヤ俊鎭� + String url = "jdbc:mysql://localhost:3306/"+dbName+"?useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true"; + Connection connection = null; + PreparedStatement preparedStatement = null; + ResultSet resultSet = null; + List<Map<String, Object>> dataList = new ArrayList<>(); + + try { + // 寤虹珛杩炴帴 + connection = DriverManager.getConnection(url, user, password); + // 鏋勫缓鍩虹 SQL + + StringBuilder sql = new StringBuilder("SELECT * FROM ").append(table); + // 鍒涘缓 PreparedStatement 瀵硅薄鎵ц SQL + preparedStatement = connection.prepareStatement(sql.toString()); + resultSet = preparedStatement.executeQuery(); + ResultSetMetaData metaData = resultSet.getMetaData(); + int columnCount = metaData.getColumnCount(); + // 閬嶅巻缁撴灉闆嗚幏鍙栨暟鎹� + while (resultSet.next()) { + Map<String, Object> rowData = new HashMap<>(); + for (int i = 1; i <= columnCount; i++) { + String columnName = metaData.getColumnName(i); + rowData.put(columnName, resultSet.getObject(i)); + } + dataList.add(rowData); + } +// while (resultSet.next()) { +// double thinnestPoint = resultSet.getDouble("ThinnestPoint"); +// double averageThickness = resultSet.getDouble("AverageThickness"); +// dataList.add(new ThicknessData(thinnestPoint, averageThickness)); +// } + tableMap.put("data", dataList); + } catch (Exception e) { + e.printStackTrace(); + // 鍋囪 R 绫绘湁 failed 鏂规硶锛岃嫢娌℃湁闇�琛ュ厖瀹炵幇 + return R.failed("鏁版嵁搴撴煡璇㈠嚭閿�: " + e.getMessage()); + } finally { + try { + if (resultSet != null) { + resultSet.close(); + } + if (preparedStatement != null) { + preparedStatement.close(); + } + if (connection != null) { + connection.close(); + } + } catch (SQLException e) { + e.printStackTrace(); + } + } + return tableMap; + } public static Object readDbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException { Map<String, Object> tableMap = new HashMap<>(16); @@ -263,6 +465,8 @@ } else { path = canonicalPath64.replaceAll("/chi_sim.traineddata", "").replaceAll("\\\\", "/"); } + // 璁剧疆 TESSDATA_PREFIX 鐜鍙橀噺 +// System.setProperty("TESSDATA_PREFIX", path); //璁剧疆閰嶇疆鏂囦欢澶瑰井瑙嗐�佽瘑鍒瑷�銆佽瘑鍒ā寮� Tesseract tesseract = new Tesseract(); tesseract.setDatapath(path); -- Gitblit v1.9.3