| | |
| | | package com.chinaztt.mes.docx.util; |
| | | |
| | | import cn.hutool.core.io.FileUtil; |
| | | import cn.hutool.http.HttpRequest; |
| | | import cn.hutool.json.JSONUtil; |
| | | import com.chinaztt.mes.docx.dto.GetFileDto; |
| | | import com.chinaztt.mes.docx.dto.ThicknessData; |
| | | import com.opencsv.CSVReader; |
| | | import com.opencsv.CSVReaderBuilder; |
| | | import com.opencsv.exceptions.CsvValidationException; |
| | |
| | | import net.sourceforge.tess4j.TesseractException; |
| | | import org.apache.commons.lang3.ObjectUtils; |
| | | import org.apache.commons.lang3.StringUtils; |
| | | import org.apache.poi.POIXMLDocument; |
| | | import org.apache.poi.POIXMLTextExtractor; |
| | | import org.apache.poi.hssf.usermodel.HSSFPictureData; |
| | | import org.apache.poi.hssf.usermodel.HSSFWorkbook; |
| | | import org.apache.poi.hwpf.extractor.WordExtractor; |
| | | import org.apache.poi.ooxml.POIXMLDocument; |
| | | import org.apache.poi.ooxml.extractor.POIXMLTextExtractor; |
| | | import org.apache.poi.openxml4j.opc.OPCPackage; |
| | | import org.apache.poi.poifs.filesystem.POIFSFileSystem; |
| | | import org.apache.poi.ss.usermodel.Sheet; |
| | | import org.apache.poi.ss.usermodel.Workbook; |
| | | import org.apache.poi.xssf.usermodel.XSSFSheet; |
| | | import org.apache.poi.xssf.usermodel.XSSFWorkbook; |
| | | import org.apache.poi.ss.usermodel.WorkbookFactory; |
| | | import org.apache.poi.xwpf.extractor.XWPFWordExtractor; |
| | | |
| | | import java.io.*; |
| | | import java.nio.file.Files; |
| | | import java.sql.*; |
| | | import java.util.*; |
| | | import java.util.regex.Pattern; |
| | |
| | | return result; |
| | | } |
| | | |
| | | public static Object readExcelFile(File file) throws IOException { |
| | | /** |
| | | * 读取excel文件,兼容.xlsx,.xls格式 |
| | | * @param file |
| | | * @return |
| | | */ |
| | | public static Object readExcelFile(File file) { |
| | | StringBuilder result = new StringBuilder(); |
| | | //创建工作簿对象 |
| | | XSSFWorkbook xssfWorkbook = new XSSFWorkbook(Files.newInputStream(file.toPath())); |
| | | //获取工作簿下sheet的个数 只读取第一个sheet |
| | | // int sheetNum = xssfWorkbook.getNumberOfSheets(); |
| | | //遍历工作簿中的所有数据 |
| | | XSSFSheet sheet = xssfWorkbook.getSheetAt(0); |
| | | //获取最后一行的num,即总行数。此处从0开始 |
| | | int maxRow = sheet.getLastRowNum(); |
| | | for (int row = 1; row <= maxRow; row++) { |
| | | //获取最后单元格num,即总单元格数 ***注意:此处从1开始计数*** |
| | | int maxRol = sheet.getRow(row).getLastCellNum(); |
| | | StringBuilder aLine = new StringBuilder(); |
| | | for (int rol = 0; rol < maxRol; rol++) { |
| | | aLine.append(sheet.getRow(row).getCell(rol)).append(splitIdentifier); |
| | | try ( |
| | | FileInputStream fis = new FileInputStream(file); |
| | | Workbook workbook = WorkbookFactory.create(fis) |
| | | ) { |
| | | //遍历工作簿中的所有数据 |
| | | Sheet sheet = workbook.getSheetAt(0); |
| | | //获取最后一行的num,即总行数。此处从0开始 |
| | | int maxRow = sheet.getLastRowNum(); |
| | | for (int row = 0; row <= maxRow; row++) { |
| | | int maxRol = sheet.getRow(row).getLastCellNum(); |
| | | StringBuilder aLine = new StringBuilder(); |
| | | for (int rol = 0; rol < maxRol; rol++) { |
| | | aLine.append(sheet.getRow(row).getCell(rol)).append(splitIdentifier); |
| | | } |
| | | result.append(aLine).append("\n"); |
| | | } |
| | | result.append(aLine).append("\n"); |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | } |
| | | return result.toString(); |
| | | } |
| | | |
| | | public static Object readExcelxlsFile(File file) throws IOException { |
| | | /** |
| | | * 读取excel文件中的图片内容 |
| | | * @param file |
| | | * @return |
| | | * @throws IOException |
| | | */ |
| | | public static Object readPngContextInExcel(File file) throws IOException { |
| | | String result = ""; |
| | | try (FileInputStream fis = new FileInputStream(file); |
| | | Workbook workbook = new HSSFWorkbook(fis)) { |
| | |
| | | try (FileOutputStream fos = new FileOutputStream(tempFile)) { |
| | | fos.write(pictureData); |
| | | } |
| | | String ocrResult = ""; |
| | | String ocrResult; |
| | | try { |
| | | ocrResult = (String) readPngFile(tempFile); |
| | | } catch (TesseractException e) { |
| | | ocrResult = "OCR识别失败: " + e.getMessage(); |
| | | ocrResult = ocrImageContext(tempFile.getAbsolutePath()); |
| | | } finally { |
| | | // 删除临时文件 |
| | | tempFile.delete(); |
| | | } |
| | | result = ocrResult; |
| | | // String ocrText = fixOcrText(ocrResult); |
| | | // result.append("OCR Result:").append(ocrText).append(","); |
| | | |
| | | |
| | | // for (HSSFPictureData picture : pictures) { |
| | | // // 获取图片类型 |
| | | // String pictureType = picture.suggestFileExtension(); |
| | | // // 获取图片数据 |
| | | // byte[] pictureData = picture.getData(); |
| | | // // 创建临时文件 |
| | | // File tempFile = File.createTempFile(UUID.randomUUID().toString(), "." + pictureType); |
| | | // try (FileOutputStream fos = new FileOutputStream(tempFile)) { |
| | | // fos.write(pictureData); |
| | | // } |
| | | // // 图片预处理 |
| | | //// File processedFile = preprocessImage(tempFile, pictureType); |
| | | // // 调用 readPngFile1 方法读取图片文字信息 |
| | | // String ocrResult = ""; |
| | | // try { |
| | | // ocrResult = (String) readPngFile(tempFile); |
| | | //// ocrResult = (String) readPngFile(tempFile); |
| | | //// ocrResult = (String) readPngFile(processedFile); |
| | | // } catch (TesseractException e) { |
| | | // ocrResult = "OCR识别失败: " + e.getMessage(); |
| | | // } finally { |
| | | // // 删除临时文件 |
| | | //// tempFile.delete(); |
| | | //// processedFile.delete(); |
| | | // } |
| | | // |
| | | // // 将图片信息添加到结果中 |
| | | //// result.append("Picture Type: ").append(pictureType) |
| | | //// .append(", Picture Size: ").append(pictureData.length) |
| | | //// .append(" bytes") |
| | | //// .append(", OCR Result: ").append(ocrResult) |
| | | //// .append(","); |
| | | // String ocrText = fixOcrText(ocrResult); |
| | | // result.append("OCR Result:").append(ocrText).append(","); |
| | | // } |
| | | } |
| | | // |
| | | // // 遍历每一行 |
| | | // for (Row row : sheet) { |
| | | // // 遍历每一列 |
| | | // for (Cell cell : row) { |
| | | // CellType cellType = CellType.forInt(cell.getCellType()); |
| | | // switch (cellType) { |
| | | // case STRING: |
| | | // result.append(cell.getStringCellValue()); |
| | | // break; |
| | | // case NUMERIC: |
| | | // if (DateUtil.isCellDateFormatted(cell)) { |
| | | // result.append(cell.getDateCellValue()); |
| | | // } else { |
| | | // result.append(cell.getNumericCellValue()); |
| | | // } |
| | | // break; |
| | | // case BOOLEAN: |
| | | // result.append(cell.getBooleanCellValue()); |
| | | // break; |
| | | // case FORMULA: |
| | | // result.append(cell.getCellFormula()); |
| | | // break; |
| | | // default: |
| | | // result.append(""); |
| | | // } |
| | | // result.append("\t"); |
| | | // } |
| | | // result.append("\n"); |
| | | // } |
| | | } |
| | | return result; |
| | | |
| | | } |
| | | |
| | | // 修正 OCR 识别文本中的错误关键词 |
| | | public static String fixOcrText(String ocrText) { |
| | | // 定义错误关键词和正确内容的映射,这里处理“击 宇 强 庞”修正为“击穿强度” |
| | | // 考虑到可能有空格分隔,用正则匹配包含这些字的内容 |
| | | ocrText = ocrText.replaceAll("击\\s*宇\\s*强\\s*庞", "击穿强度"); |
| | | // 还可以继续添加其他错误修正,比如下面假设“电 压 \\(HV\\)”里的空格影响,也修正下 |
| | | ocrText = ocrText.replaceAll("电\\s*压\\s*\\(HV\\)", "电压(KV)"); |
| | | ocrText = ocrText.replaceAll("电\\s*流\\s*\\(nt\\)", "电流(mA)"); |
| | | return ocrText; |
| | | } |
| | | |
| | | public static Object readPngFile1(File file) throws IOException, TesseractException { |
| | |
| | | StringBuilder stringBuilder = new StringBuilder(); |
| | | String strTmp = ""; |
| | | while ((strTmp = buffReader.readLine()) != null) { |
| | | strTmp = strTmp.replaceAll("\t",","); |
| | | stringBuilder.append(strTmp).append("\n"); |
| | | } |
| | | buffReader.close(); |
| | |
| | | Properties prop = new Properties(); |
| | | //设置编码 |
| | | prop.put("charSet", "UTF-8"); |
| | | prop.put("user", ""); |
| | | prop.put("password", ""); |
| | | prop.put("user", StringUtils.isNotBlank(getFileDto.getDbUserName())?getFileDto.getDbUserName():""); |
| | | prop.put("password", StringUtils.isNotBlank(getFileDto.getDbPassword())?getFileDto.getDbPassword():""); |
| | | //数据地址 |
| | | String dbUrl = "jdbc:ucanaccess://" + file.getPath(); |
| | | //引入驱动 |
| | |
| | | try { |
| | | List<Object> list = new ArrayList<>(); |
| | | //遍历获取多张表数据 |
| | | // String s = "select * from " + getFileDto.getDbFileName() + " where 1=1" + getFileDto.getMdbEntrustCode() + " = '" + getFileDto.getEntrustCode() + |
| | | // "' and " + getFileDto.getMdbSampleCode() + " = '" + getFileDto.getSampleCode() + "'"; |
| | | String s = "select * from " + getFileDto.getDbFileName() + " where 1=1"; |
| | | if(StringUtils.isNotBlank(getFileDto.getMdbEntrustCode())){ |
| | | s+=" and " + getFileDto.getMdbEntrustCode() + " = '" + getFileDto.getEntrustCode()+ "'"; |
| | |
| | | if(StringUtils.isNotBlank(getFileDto.getMdbSampleCode())){ |
| | | s+=" and " + getFileDto.getMdbSampleCode() + " = '" + getFileDto.getSampleCode() + "'"; |
| | | } |
| | | if(StringUtils.isNotBlank(getFileDto.getBushingColorField())){ |
| | | s+=" and " + getFileDto.getBushingColorField() + " = '" + getFileDto.getBushingColor() + "'"; |
| | | } |
| | | |
| | | preparedStatement = conn.prepareStatement(s); |
| | | rs = preparedStatement.executeQuery(); |
| | | ResultSetMetaData data = rs.getMetaData(); |
| | |
| | | // 构建基础 SQL |
| | | |
| | | String sql = "SELECT * FROM "+table+" WHERE 1=1"; |
| | | if(StringUtils.isNotBlank(getFileDto.getMdbEntrustCode())){ |
| | | sql+=" AND " + getFileDto.getMdbEntrustCode() + " = '" + getFileDto.getEntrustCode()+ "'"; |
| | | if(StringUtils.isNotBlank(getFileDto.getMdbEntrustCode()) ){ |
| | | sql+=" AND (" + getFileDto.getMdbEntrustCode() + " = TRIM('" + getFileDto.getEntrustCode()+ "')"; |
| | | if(StringUtils.isNotBlank(getFileDto.getLotBatchNo())){ |
| | | sql+=" OR "+ getFileDto.getMdbEntrustCode() + " = TRIM('" + getFileDto.getLotBatchNo()+ "')"; |
| | | } |
| | | sql+=")"; |
| | | } |
| | | if(StringUtils.isNotBlank(getFileDto.getMdbSampleCode())){ |
| | | sql+=" AND " + getFileDto.getMdbSampleCode() + " = '" + getFileDto.getSampleCode() + "'"; |
| | | } |
| | | if(StringUtils.isNotBlank(getFileDto.getCableTag())){ |
| | | sql+=" AND Color = '" + getFileDto.getCableTag() + "'"; |
| | | } |
| | | // 创建 PreparedStatement 对象执行 SQL |
| | | preparedStatement = connection.prepareStatement(sql); |
| | |
| | | return tableMap; |
| | | } |
| | | |
| | | /** |
| | | * ocr识别图片内容 |
| | | * @param imagePath 图片路径 |
| | | * @return |
| | | */ |
| | | public static String ocrImageContext(String imagePath){ |
| | | //调用ocr识别服务 |
| | | Map<String,Object> jsonMap = new HashMap<>(); |
| | | jsonMap.put("imagePath",imagePath); |
| | | String requestBody = JSONUtil.toJsonStr(jsonMap); |
| | | return HttpRequest.post("localhost:8080/ocr/recognize").body(requestBody).execute().body(); |
| | | } |
| | | |
| | | public static Object readPngFile(File file) throws IOException, TesseractException { |
| | | String canonicalPath32 = FileUtil.file(".", "/jre_32/tessdata").getCanonicalPath(); |
| | | String canonicalPath64 = FileUtil.file(".", "/jre_64/tessdata").getCanonicalPath(); |