From 2d1e909070e2baaa49d3a021364df595ff1d2e72 Mon Sep 17 00:00:00 2001 From: yaowanxin <3588231647@qq.com> Date: 星期六, 19 七月 2025 11:01:32 +0800 Subject: [PATCH] 读取 --- src/main/java/com/chinaztt/mes/docx/util/TakeWords.java | 242 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 242 insertions(+), 0 deletions(-) diff --git a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java index 3d77d75..39ba107 100644 --- a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java +++ b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java @@ -2,22 +2,34 @@ import cn.hutool.core.io.FileUtil; import com.chinaztt.mes.docx.dto.GetFileDto; +import com.chinaztt.mes.docx.dto.ThicknessData; import net.sourceforge.tess4j.Tesseract; import net.sourceforge.tess4j.TesseractException; import org.apache.commons.lang3.ObjectUtils; import org.apache.commons.lang3.StringUtils; import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLTextExtractor; +import org.apache.poi.hssf.usermodel.HSSFPicture; +import org.apache.poi.hssf.usermodel.HSSFPictureData; +import org.apache.poi.hssf.usermodel.HSSFSheet; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hwpf.extractor.WordExtractor; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.ss.usermodel.*; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; +import java.awt.Color; +import java.awt.image.BufferedImage; +import javax.imageio.ImageIO; import java.io.*; import java.nio.file.Files; import java.sql.*; import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class TakeWords { @@ -68,6 +80,205 @@ return result.toString(); } + public static Object readExcelxlsFile(File file) throws IOException { + StringBuilder result = new StringBuilder(); + try (FileInputStream fis = new FileInputStream(file); + Workbook workbook = new HSSFWorkbook(fis)) { + // 鑾峰彇绗竴涓伐浣滆〃 + Sheet sheet = workbook.getSheetAt(0); + // 璇诲彇鍥剧墖淇℃伅 + if (workbook instanceof HSSFWorkbook) { + HSSFWorkbook hssfWorkbook = (HSSFWorkbook) workbook; + List<HSSFPictureData> pictures = hssfWorkbook.getAllPictures(); + for (HSSFPictureData picture : pictures) { + // 鑾峰彇鍥剧墖绫诲瀷 + String pictureType = picture.suggestFileExtension(); + // 鑾峰彇鍥剧墖鏁版嵁 + byte[] pictureData = picture.getData(); + // 鍒涘缓涓存椂鏂囦欢 + File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType); + try (FileOutputStream fos = new FileOutputStream(tempFile)) { + fos.write(pictureData); + } + // 鍥剧墖棰勫鐞� + File processedFile = preprocessImage(tempFile, pictureType); +// ocrResult = (String) readPngFile(tempFile); + + // 璋冪敤 readPngFile1 鏂规硶璇诲彇鍥剧墖鏂囧瓧淇℃伅 + String ocrResult = ""; + try { + ocrResult = (String) readPngFile1(tempFile); +// ocrResult = (String) readPngFile1(processedFile); + } catch (TesseractException e) { + ocrResult = "OCR璇嗗埆澶辫触: " + e.getMessage(); + } finally { + // 鍒犻櫎涓存椂鏂囦欢 + tempFile.delete(); + processedFile.delete(); + } + + // 灏嗗浘鐗囦俊鎭坊鍔犲埌缁撴灉涓� +// result.append("Picture Type: ").append(pictureType) +// .append(", Picture Size: ").append(pictureData.length) +// .append(" bytes") +// .append(", OCR Result: ").append(ocrResult) +// .append(","); + String ocrText = fixOcrText(ocrResult); + result.append("OCR Result:").append(ocrText).append(","); + } + } +// +// // 閬嶅巻姣忎竴琛� +// for (Row row : sheet) { +// // 閬嶅巻姣忎竴鍒� +// for (Cell cell : row) { +// CellType cellType = CellType.forInt(cell.getCellType()); +// switch (cellType) { +// case STRING: +// result.append(cell.getStringCellValue()); +// break; +// case NUMERIC: +// if (DateUtil.isCellDateFormatted(cell)) { +// result.append(cell.getDateCellValue()); +// } else { +// result.append(cell.getNumericCellValue()); +// } +// break; +// case BOOLEAN: +// result.append(cell.getBooleanCellValue()); +// break; +// case FORMULA: +// result.append(cell.getCellFormula()); +// break; +// default: +// result.append(""); +// } +// result.append("\t"); +// } +// result.append("\n"); +// } + } + return result; + + } + + // 淇 OCR 璇嗗埆鏂囨湰涓殑閿欒鍏抽敭璇� + public static String fixOcrText(String ocrText) { + // 瀹氫箟閿欒鍏抽敭璇嶅拰姝g‘鍐呭鐨勬槧灏勶紝杩欓噷澶勭悊鈥滃嚮 瀹� 寮� 搴炩�濅慨姝d负鈥滃嚮绌垮己搴︹�� + // 鑰冭檻鍒板彲鑳芥湁绌烘牸鍒嗛殧锛岀敤姝e垯鍖归厤鍖呭惈杩欎簺瀛楃殑鍐呭 + ocrText = ocrText.replaceAll("鍑籠\s*瀹嘰\s*寮篭\s*搴�", "鍑荤┛寮哄害"); + // 杩樺彲浠ョ户缁坊鍔犲叾浠栭敊璇慨姝o紝姣斿涓嬮潰鍋囪鈥滅數 鍘� \\(HV\\)鈥濋噷鐨勭┖鏍煎奖鍝嶏紝涔熶慨姝d笅 + ocrText = ocrText.replaceAll("鐢礬\s*鍘媆\s*\\(HV\\)", "鐢靛帇(KV)"); + ocrText = ocrText.replaceAll("鐢礬\s*娴乗\s*\\(nt\\)", "鐢垫祦(mA)"); + return ocrText; + } + + /** + * 瀵瑰浘鐗囪繘琛岄澶勭悊锛屽寘鎷伆搴﹀寲銆佷簩鍊煎寲鍜岄攼鍖� + * @param inputFile 杈撳叆鐨勫浘鐗囨枃浠� + * @param formatName 鍥剧墖鏍煎紡鍚嶇О + * @return 澶勭悊鍚庣殑鍥剧墖鏂囦欢 + * @throws IOException 璇诲彇鎴栧啓鍏ュ浘鐗囨椂鍙兘鎶涘嚭鐨勫紓甯� + */ + private static File preprocessImage(File inputFile, String formatName) throws IOException { + // 璇诲彇鍥剧墖 + BufferedImage image = ImageIO.read(inputFile); + + // 鐏板害鍖� + image = convertToGrayscale(image); + // 浜屽�煎寲 + image = applyThreshold(image, 128); + // 閿愬寲 + image = applySharpening(image); + + // 鍒涘缓澶勭悊鍚庣殑涓存椂鏂囦欢 + File outputFile = File.createTempFile(UUID.randomUUID().toString(), "." + formatName); + ImageIO.write(image, formatName, outputFile); + return outputFile; + } + + /** + * 灏嗗浘鐗囪浆鎹负鐏板害鍥� + * @param image 杈撳叆鐨勫浘鐗� + * @return 鐏板害鍖栧悗鐨勫浘鐗� + */ + private static BufferedImage convertToGrayscale(BufferedImage image) { + BufferedImage grayImage = new BufferedImage( + image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY); + grayImage.getGraphics().drawImage(image, 0, 0, null); + return grayImage; + } + + /** + * 瀵瑰浘鐗囪繘琛屼簩鍊煎寲澶勭悊 + * @param image 杈撳叆鐨勫浘鐗� + * @param threshold 浜屽�煎寲闃堝�� + * @return 浜屽�煎寲鍚庣殑鍥剧墖 + */ + private static BufferedImage applyThreshold(BufferedImage image, int threshold) { + BufferedImage binaryImage = new BufferedImage( + image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY); + for (int y = 0; y < image.getHeight(); y++) { + for (int x = 0; x < image.getWidth(); x++) { + int rgb = image.getRGB(x, y); + int gray = (rgb >> 16) & 0xff; + if (gray < threshold) { + binaryImage.setRGB(x, y, Color.BLACK.getRGB()); + } else { + binaryImage.setRGB(x, y, Color.WHITE.getRGB()); + } + } + } + return binaryImage; + } + + /** + * 瀵瑰浘鐗囪繘琛岄攼鍖栧鐞� + * @param image 杈撳叆鐨勫浘鐗� + * @return 閿愬寲鍚庣殑鍥剧墖 + */ + private static BufferedImage applySharpening(BufferedImage image) { + float[] sharpenMatrix = { + 0f, -1f, 0f, + -1f, 5f, -1f, + 0f, -1f, 0f + }; + java.awt.image.Kernel kernel = new java.awt.image.Kernel(3, 3, sharpenMatrix); + java.awt.image.ConvolveOp op = new java.awt.image.ConvolveOp(kernel, java.awt.image.ConvolveOp.EDGE_NO_OP, null); + return op.filter(image, null); + } + + public static Object readPngFile1(File file) throws IOException, TesseractException { + // 鑾峰彇 tessdata 鐩綍鐨勭粷瀵硅矾寰� + String arch = System.getProperty("sun.arch.data.model"); + File tessDataDir; + if (arch.contains("32")) { + tessDataDir = FileUtil.file(".", "/jre_32/tessdata"); + } else { + tessDataDir = FileUtil.file(".", "/jre_64/tessdata"); + } + String path = tessDataDir.getCanonicalPath(); + // 妫�鏌� chi_sim.traineddata 鏂囦欢鏄惁瀛樺湪 + File chiSimFile = new File(path, "chi_sim.traineddata"); + if (!chiSimFile.exists()) { + throw new FileNotFoundException("chi_sim.traineddata 鏂囦欢鏈壘鍒帮紝璇锋鏌ヨ矾寰�: " + chiSimFile.getAbsolutePath()); + } + // 璁剧疆閰嶇疆鏂囦欢澶广�佽瘑鍒瑷�銆佽瘑鍒ā寮� + Tesseract tesseract = new Tesseract(); + tesseract.setDatapath(path); + // 璁剧疆璇嗗埆璇█涓轰腑鏂囩畝浣撳拰鑻辨枃锛堝鏋滆璁剧疆涓鸿嫳鏂囧彲鏀逛负 "eng"锛� + tesseract.setLanguage("chi_sim+eng"); + // 浣跨敤 OSD 杩涜鑷姩椤甸潰鍒嗗壊浠ヨ繘琛屽浘鍍忓鐞� + tesseract.setPageSegMode(1); + // 璁剧疆寮曟搸妯″紡鏄缁忕綉缁� LSTM 寮曟搸 + tesseract.setOcrEngineMode(1); + // 寮�濮嬭瘑鍒暣寮犲浘鐗囦腑鐨勬枃瀛� + return tesseract.doOCR(file); + } + + + + public static Object readTxtFile(File file) throws IOException { FileInputStream fin = new FileInputStream(file); InputStreamReader reader = new InputStreamReader(fin); @@ -82,6 +293,7 @@ } public static Object readCsvFile(File file) { + StringBuilder stringBuilder = new StringBuilder(); // 鍒涘缓 reader try (BufferedReader br = Files.newBufferedReader(file.toPath())) { @@ -164,6 +376,36 @@ } catch (Exception ignore) { } } + public static Object getmysqlFile(GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException { + Map<String, Object> tableMap = new HashMap<>(16); + // 鏁版嵁搴撹繛鎺ヤ俊鎭� + String url = "jdbc:mysql://localhost:3306/"+getFileDto.getDbFileName()+"?useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true"; + String user = getFileDto.getDbUserName(); + String password = getFileDto.getDbPassword(); + List<ThicknessData> dataList = new ArrayList<>(); + + try ( + // 寤虹珛杩炴帴 + Connection connection = DriverManager.getConnection(url, user, password); + // 鍒涘缓 Statement 瀵硅薄鎵ц SQL + Statement statement = connection.createStatement() + ) { + String sql = "SELECT ThinnestPoint, AverageThickness FROM model1records"; + ResultSet resultSet = statement.executeQuery(sql); + + // 閬嶅巻缁撴灉闆嗚幏鍙栨暟鎹� + while (resultSet.next()) { + double thinnestPoint = resultSet.getDouble("ThinnestPoint"); + double averageThickness = resultSet.getDouble("AverageThickness"); + dataList.add(new ThicknessData(thinnestPoint, averageThickness)); + } + tableMap.put("data", dataList); + } catch (Exception e) { + e.printStackTrace(); + return R.failed("鏁版嵁搴撴煡璇㈠嚭閿�: " + e.getMessage()); + } + return tableMap; + } public static Object readDbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException { Map<String, Object> tableMap = new HashMap<>(16); -- Gitblit v1.9.3