From c60a4e38e2512a7e5ad44a4d2b514c54d41d8801 Mon Sep 17 00:00:00 2001 From: yaowanxin <3588231647@qq.com> Date: 星期六, 19 七月 2025 12:16:46 +0800 Subject: [PATCH] 修改读取图片 --- src/main/java/com/chinaztt/mes/docx/util/TakeWords.java | 86 +++---------------------------------------- 1 files changed, 6 insertions(+), 80 deletions(-) diff --git a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java index 39ba107..3a7cf0c 100644 --- a/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java +++ b/src/main/java/com/chinaztt/mes/docx/util/TakeWords.java @@ -101,20 +101,19 @@ fos.write(pictureData); } // 鍥剧墖棰勫鐞� - File processedFile = preprocessImage(tempFile, pictureType); -// ocrResult = (String) readPngFile(tempFile); - +// File processedFile = preprocessImage(tempFile, pictureType); // 璋冪敤 readPngFile1 鏂规硶璇诲彇鍥剧墖鏂囧瓧淇℃伅 String ocrResult = ""; try { ocrResult = (String) readPngFile1(tempFile); -// ocrResult = (String) readPngFile1(processedFile); +// ocrResult = (String) readPngFile(tempFile); +// ocrResult = (String) readPngFile(processedFile); } catch (TesseractException e) { ocrResult = "OCR璇嗗埆澶辫触: " + e.getMessage(); } finally { // 鍒犻櫎涓存椂鏂囦欢 tempFile.delete(); - processedFile.delete(); +// processedFile.delete(); } // 灏嗗浘鐗囦俊鎭坊鍔犲埌缁撴灉涓� @@ -171,81 +170,6 @@ ocrText = ocrText.replaceAll("鐢礬\s*鍘媆\s*\\(HV\\)", "鐢靛帇(KV)"); ocrText = ocrText.replaceAll("鐢礬\s*娴乗\s*\\(nt\\)", "鐢垫祦(mA)"); return ocrText; - } - - /** - * 瀵瑰浘鐗囪繘琛岄澶勭悊锛屽寘鎷伆搴﹀寲銆佷簩鍊煎寲鍜岄攼鍖� - * @param inputFile 杈撳叆鐨勫浘鐗囨枃浠� - * @param formatName 鍥剧墖鏍煎紡鍚嶇О - * @return 澶勭悊鍚庣殑鍥剧墖鏂囦欢 - * @throws IOException 璇诲彇鎴栧啓鍏ュ浘鐗囨椂鍙兘鎶涘嚭鐨勫紓甯� - */ - private static File preprocessImage(File inputFile, String formatName) throws IOException { - // 璇诲彇鍥剧墖 - BufferedImage image = ImageIO.read(inputFile); - - // 鐏板害鍖� - image = convertToGrayscale(image); - // 浜屽�煎寲 - image = applyThreshold(image, 128); - // 閿愬寲 - image = applySharpening(image); - - // 鍒涘缓澶勭悊鍚庣殑涓存椂鏂囦欢 - File outputFile = File.createTempFile(UUID.randomUUID().toString(), "." + formatName); - ImageIO.write(image, formatName, outputFile); - return outputFile; - } - - /** - * 灏嗗浘鐗囪浆鎹负鐏板害鍥� - * @param image 杈撳叆鐨勫浘鐗� - * @return 鐏板害鍖栧悗鐨勫浘鐗� - */ - private static BufferedImage convertToGrayscale(BufferedImage image) { - BufferedImage grayImage = new BufferedImage( - image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY); - grayImage.getGraphics().drawImage(image, 0, 0, null); - return grayImage; - } - - /** - * 瀵瑰浘鐗囪繘琛屼簩鍊煎寲澶勭悊 - * @param image 杈撳叆鐨勫浘鐗� - * @param threshold 浜屽�煎寲闃堝�� - * @return 浜屽�煎寲鍚庣殑鍥剧墖 - */ - private static BufferedImage applyThreshold(BufferedImage image, int threshold) { - BufferedImage binaryImage = new BufferedImage( - image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY); - for (int y = 0; y < image.getHeight(); y++) { - for (int x = 0; x < image.getWidth(); x++) { - int rgb = image.getRGB(x, y); - int gray = (rgb >> 16) & 0xff; - if (gray < threshold) { - binaryImage.setRGB(x, y, Color.BLACK.getRGB()); - } else { - binaryImage.setRGB(x, y, Color.WHITE.getRGB()); - } - } - } - return binaryImage; - } - - /** - * 瀵瑰浘鐗囪繘琛岄攼鍖栧鐞� - * @param image 杈撳叆鐨勫浘鐗� - * @return 閿愬寲鍚庣殑鍥剧墖 - */ - private static BufferedImage applySharpening(BufferedImage image) { - float[] sharpenMatrix = { - 0f, -1f, 0f, - -1f, 5f, -1f, - 0f, -1f, 0f - }; - java.awt.image.Kernel kernel = new java.awt.image.Kernel(3, 3, sharpenMatrix); - java.awt.image.ConvolveOp op = new java.awt.image.ConvolveOp(kernel, java.awt.image.ConvolveOp.EDGE_NO_OP, null); - return op.filter(image, null); } public static Object readPngFile1(File file) throws IOException, TesseractException { @@ -466,6 +390,8 @@ } else { path = canonicalPath64.replaceAll("/chi_sim.traineddata", "").replaceAll("\\\\", "/"); } + // 璁剧疆 TESSDATA_PREFIX 鐜鍙橀噺 +// System.setProperty("TESSDATA_PREFIX", path); //璁剧疆閰嶇疆鏂囦欢澶瑰井瑙嗐�佽瘑鍒瑷�銆佽瘑鍒ā寮� Tesseract tesseract = new Tesseract(); tesseract.setDatapath(path); -- Gitblit v1.9.3