| | |
| | | |
| | | import cn.hutool.core.io.FileUtil; |
| | | import com.chinaztt.mes.docx.dto.GetFileDto; |
| | | import com.chinaztt.mes.docx.dto.ThicknessData; |
| | | import net.sourceforge.tess4j.Tesseract; |
| | | import net.sourceforge.tess4j.TesseractException; |
| | | import org.apache.commons.lang3.ObjectUtils; |
| | | import org.apache.commons.lang3.StringUtils; |
| | | import org.apache.poi.POIXMLDocument; |
| | | import org.apache.poi.POIXMLTextExtractor; |
| | | import org.apache.poi.hssf.usermodel.HSSFPicture; |
| | | import org.apache.poi.hssf.usermodel.HSSFPictureData; |
| | | import org.apache.poi.hssf.usermodel.HSSFSheet; |
| | | import org.apache.poi.hssf.usermodel.HSSFWorkbook; |
| | | import org.apache.poi.hwpf.extractor.WordExtractor; |
| | | import org.apache.poi.openxml4j.exceptions.InvalidFormatException; |
| | | import org.apache.poi.openxml4j.opc.OPCPackage; |
| | | import org.apache.poi.ss.usermodel.*; |
| | | import org.apache.poi.xssf.usermodel.XSSFSheet; |
| | | import org.apache.poi.xssf.usermodel.XSSFWorkbook; |
| | | import org.apache.poi.xwpf.extractor.XWPFWordExtractor; |
| | | import java.awt.Color; |
| | | import java.awt.image.BufferedImage; |
| | | import javax.imageio.ImageIO; |
| | | |
| | | import java.io.*; |
| | | import java.nio.file.Files; |
| | | import java.sql.*; |
| | | import java.util.*; |
| | | import java.util.regex.Matcher; |
| | | import java.util.regex.Pattern; |
| | | |
| | | public class TakeWords { |
| | | |
| | |
| | | return result.toString(); |
| | | } |
| | | |
| | | public static Object readExcelxlsFile(File file) throws IOException { |
| | | StringBuilder result = new StringBuilder(); |
| | | try (FileInputStream fis = new FileInputStream(file); |
| | | Workbook workbook = new HSSFWorkbook(fis)) { |
| | | // è·å第ä¸ä¸ªå·¥ä½è¡¨ |
| | | Sheet sheet = workbook.getSheetAt(0); |
| | | // 读åå¾çä¿¡æ¯ |
| | | if (workbook instanceof HSSFWorkbook) { |
| | | HSSFWorkbook hssfWorkbook = (HSSFWorkbook) workbook; |
| | | List<HSSFPictureData> pictures = hssfWorkbook.getAllPictures(); |
| | | for (HSSFPictureData picture : pictures) { |
| | | // è·åå¾çç±»å |
| | | String pictureType = picture.suggestFileExtension(); |
| | | // è·åå¾çæ°æ® |
| | | byte[] pictureData = picture.getData(); |
| | | // åå»ºä¸´æ¶æä»¶ |
| | | File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType); |
| | | try (FileOutputStream fos = new FileOutputStream(tempFile)) { |
| | | fos.write(pictureData); |
| | | } |
| | | // å¾çé¢å¤ç |
| | | File processedFile = preprocessImage(tempFile, pictureType); |
| | | // ocrResult = (String) readPngFile(tempFile); |
| | | |
| | | // è°ç¨ readPngFile1 æ¹æ³è¯»åå¾çæåä¿¡æ¯ |
| | | String ocrResult = ""; |
| | | try { |
| | | ocrResult = (String) readPngFile1(tempFile); |
| | | // ocrResult = (String) readPngFile1(processedFile); |
| | | } catch (TesseractException e) { |
| | | ocrResult = "OCRè¯å«å¤±è´¥: " + e.getMessage(); |
| | | } finally { |
| | | // å é¤ä¸´æ¶æä»¶ |
| | | tempFile.delete(); |
| | | processedFile.delete(); |
| | | } |
| | | |
| | | // å°å¾çä¿¡æ¯æ·»å å°ç»æä¸ |
| | | // result.append("Picture Type: ").append(pictureType) |
| | | // .append(", Picture Size: ").append(pictureData.length) |
| | | // .append(" bytes") |
| | | // .append(", OCR Result: ").append(ocrResult) |
| | | // .append(","); |
| | | String ocrText = fixOcrText(ocrResult); |
| | | result.append("OCR Result:").append(ocrText).append(","); |
| | | } |
| | | } |
| | | // |
| | | // // é忝ä¸è¡ |
| | | // for (Row row : sheet) { |
| | | // // é忝ä¸å |
| | | // for (Cell cell : row) { |
| | | // CellType cellType = CellType.forInt(cell.getCellType()); |
| | | // switch (cellType) { |
| | | // case STRING: |
| | | // result.append(cell.getStringCellValue()); |
| | | // break; |
| | | // case NUMERIC: |
| | | // if (DateUtil.isCellDateFormatted(cell)) { |
| | | // result.append(cell.getDateCellValue()); |
| | | // } else { |
| | | // result.append(cell.getNumericCellValue()); |
| | | // } |
| | | // break; |
| | | // case BOOLEAN: |
| | | // result.append(cell.getBooleanCellValue()); |
| | | // break; |
| | | // case FORMULA: |
| | | // result.append(cell.getCellFormula()); |
| | | // break; |
| | | // default: |
| | | // result.append(""); |
| | | // } |
| | | // result.append("\t"); |
| | | // } |
| | | // result.append("\n"); |
| | | // } |
| | | } |
| | | return result; |
| | | |
| | | } |
| | | |
| | | // ä¿®æ£ OCR è¯å«ææ¬ä¸çé误å
³é®è¯ |
| | | public static String fixOcrText(String ocrText) { |
| | | // å®ä¹é误å
³é®è¯åæ£ç¡®å
å®¹çæ å°ï¼è¿éå¤çâå» å® å¼º åºâä¿®æ£ä¸ºâå»ç©¿å¼ºåº¦â |
| | | // èèå°å¯è½æç©ºæ ¼åéï¼ç¨æ£åå¹é
å
å«è¿äºåçå
容 |
| | | ocrText = ocrText.replaceAll("å»\\s*å®\\s*强\\s*åº", "å»ç©¿å¼ºåº¦"); |
| | | // è¿å¯ä»¥ç»§ç»æ·»å å
¶ä»é误修æ£ï¼æ¯å¦ä¸é¢å设âçµ å \\(HV\\)âéçç©ºæ ¼å½±åï¼ä¹ä¿®æ£ä¸ |
| | | ocrText = ocrText.replaceAll("çµ\\s*å\\s*\\(HV\\)", "çµå(KV)"); |
| | | ocrText = ocrText.replaceAll("çµ\\s*æµ\\s*\\(nt\\)", "çµæµ(mA)"); |
| | | return ocrText; |
| | | } |
| | | |
| | | /** |
| | | * 对å¾çè¿è¡é¢å¤çï¼å
æ¬ç°åº¦åãäºå¼ååéå |
| | | * @param inputFile è¾å
¥çå¾çæä»¶ |
| | | * @param formatName å¾çæ ¼å¼åç§° |
| | | * @return å¤çåçå¾çæä»¶ |
| | | * @throws IOException 读åæåå
¥å¾çæ¶å¯è½æåºçå¼å¸¸ |
| | | */ |
| | | private static File preprocessImage(File inputFile, String formatName) throws IOException { |
| | | // 读åå¾ç |
| | | BufferedImage image = ImageIO.read(inputFile); |
| | | |
| | | // ç°åº¦å |
| | | image = convertToGrayscale(image); |
| | | // äºå¼å |
| | | image = applyThreshold(image, 128); |
| | | // éå |
| | | image = applySharpening(image); |
| | | |
| | | // å建å¤çåçä¸´æ¶æä»¶ |
| | | File outputFile = File.createTempFile(UUID.randomUUID().toString(), "." + formatName); |
| | | ImageIO.write(image, formatName, outputFile); |
| | | return outputFile; |
| | | } |
| | | |
| | | /** |
| | | * å°å¾ç转æ¢ä¸ºç°åº¦å¾ |
| | | * @param image è¾å
¥çå¾ç |
| | | * @return ç°åº¦ååçå¾ç |
| | | */ |
| | | private static BufferedImage convertToGrayscale(BufferedImage image) { |
| | | BufferedImage grayImage = new BufferedImage( |
| | | image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY); |
| | | grayImage.getGraphics().drawImage(image, 0, 0, null); |
| | | return grayImage; |
| | | } |
| | | |
| | | /** |
| | | * 对å¾çè¿è¡äºå¼åå¤ç |
| | | * @param image è¾å
¥çå¾ç |
| | | * @param threshold äºå¼åéå¼ |
| | | * @return äºå¼ååçå¾ç |
| | | */ |
| | | private static BufferedImage applyThreshold(BufferedImage image, int threshold) { |
| | | BufferedImage binaryImage = new BufferedImage( |
| | | image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_BINARY); |
| | | for (int y = 0; y < image.getHeight(); y++) { |
| | | for (int x = 0; x < image.getWidth(); x++) { |
| | | int rgb = image.getRGB(x, y); |
| | | int gray = (rgb >> 16) & 0xff; |
| | | if (gray < threshold) { |
| | | binaryImage.setRGB(x, y, Color.BLACK.getRGB()); |
| | | } else { |
| | | binaryImage.setRGB(x, y, Color.WHITE.getRGB()); |
| | | } |
| | | } |
| | | } |
| | | return binaryImage; |
| | | } |
| | | |
| | | /** |
| | | * 对å¾çè¿è¡éåå¤ç |
| | | * @param image è¾å
¥çå¾ç |
| | | * @return éååçå¾ç |
| | | */ |
| | | private static BufferedImage applySharpening(BufferedImage image) { |
| | | float[] sharpenMatrix = { |
| | | 0f, -1f, 0f, |
| | | -1f, 5f, -1f, |
| | | 0f, -1f, 0f |
| | | }; |
| | | java.awt.image.Kernel kernel = new java.awt.image.Kernel(3, 3, sharpenMatrix); |
| | | java.awt.image.ConvolveOp op = new java.awt.image.ConvolveOp(kernel, java.awt.image.ConvolveOp.EDGE_NO_OP, null); |
| | | return op.filter(image, null); |
| | | } |
| | | |
| | | public static Object readPngFile1(File file) throws IOException, TesseractException { |
| | | // è·å tessdata ç®å½çç»å¯¹è·¯å¾ |
| | | String arch = System.getProperty("sun.arch.data.model"); |
| | | File tessDataDir; |
| | | if (arch.contains("32")) { |
| | | tessDataDir = FileUtil.file(".", "/jre_32/tessdata"); |
| | | } else { |
| | | tessDataDir = FileUtil.file(".", "/jre_64/tessdata"); |
| | | } |
| | | String path = tessDataDir.getCanonicalPath(); |
| | | // æ£æ¥ chi_sim.traineddata æä»¶æ¯å¦åå¨ |
| | | File chiSimFile = new File(path, "chi_sim.traineddata"); |
| | | if (!chiSimFile.exists()) { |
| | | throw new FileNotFoundException("chi_sim.traineddata æä»¶æªæ¾å°ï¼è¯·æ£æ¥è·¯å¾: " + chiSimFile.getAbsolutePath()); |
| | | } |
| | | // 设置é
ç½®æä»¶å¤¹ãè¯å«è¯è¨ãè¯å«æ¨¡å¼ |
| | | Tesseract tesseract = new Tesseract(); |
| | | tesseract.setDatapath(path); |
| | | // 设置è¯å«è¯è¨ä¸ºä¸æç®ä½åè±æï¼å¦æè¦è®¾ç½®ä¸ºè±æå¯æ¹ä¸º "eng"ï¼ |
| | | tesseract.setLanguage("chi_sim+eng"); |
| | | // ä½¿ç¨ OSD è¿è¡èªå¨é¡µé¢åå²ä»¥è¿è¡å¾åå¤ç |
| | | tesseract.setPageSegMode(1); |
| | | // è®¾ç½®å¼ææ¨¡å¼æ¯ç¥ç»ç½ç» LSTM 弿 |
| | | tesseract.setOcrEngineMode(1); |
| | | // å¼å§è¯å«æ´å¼ å¾çä¸çæå |
| | | return tesseract.doOCR(file); |
| | | } |
| | | |
| | | |
| | | |
| | | |
| | | public static Object readTxtFile(File file) throws IOException { |
| | | FileInputStream fin = new FileInputStream(file); |
| | | InputStreamReader reader = new InputStreamReader(fin); |
| | |
| | | } |
| | | |
| | | public static Object readCsvFile(File file) { |
| | | |
| | | StringBuilder stringBuilder = new StringBuilder(); |
| | | // å建 reader |
| | | try (BufferedReader br = Files.newBufferedReader(file.toPath())) { |
| | |
| | | } catch (Exception ignore) { |
| | | } |
| | | } |
| | | public static Object getmysqlFile(GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException { |
| | | Map<String, Object> tableMap = new HashMap<>(16); |
| | | // æ°æ®åºè¿æ¥ä¿¡æ¯ |
| | | String url = "jdbc:mysql://localhost:3306/"+getFileDto.getDbFileName()+"?useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true"; |
| | | String user = getFileDto.getDbUserName(); |
| | | String password = getFileDto.getDbPassword(); |
| | | List<ThicknessData> dataList = new ArrayList<>(); |
| | | |
| | | try ( |
| | | // 建ç«è¿æ¥ |
| | | Connection connection = DriverManager.getConnection(url, user, password); |
| | | // å建 Statement 对象æ§è¡ SQL |
| | | Statement statement = connection.createStatement() |
| | | ) { |
| | | String sql = "SELECT ThinnestPoint, AverageThickness FROM model1records"; |
| | | ResultSet resultSet = statement.executeQuery(sql); |
| | | |
| | | // éåç»æéè·åæ°æ® |
| | | while (resultSet.next()) { |
| | | double thinnestPoint = resultSet.getDouble("ThinnestPoint"); |
| | | double averageThickness = resultSet.getDouble("AverageThickness"); |
| | | dataList.add(new ThicknessData(thinnestPoint, averageThickness)); |
| | | } |
| | | tableMap.put("data", dataList); |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | return R.failed("æ°æ®åºæ¥è¯¢åºé: " + e.getMessage()); |
| | | } |
| | | return tableMap; |
| | | } |
| | | |
| | | public static Object readDbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException { |
| | | Map<String, Object> tableMap = new HashMap<>(16); |