| | |
| | | import cn.hutool.core.io.FileUtil; |
| | | import com.chinaztt.mes.docx.dto.GetFileDto; |
| | | import com.chinaztt.mes.docx.dto.ThicknessData; |
| | | import com.opencsv.CSVReader; |
| | | import com.opencsv.CSVReaderBuilder; |
| | | import com.opencsv.exceptions.CsvValidationException; |
| | | import net.sourceforge.tess4j.Tesseract; |
| | | import net.sourceforge.tess4j.TesseractException; |
| | | import org.apache.commons.lang3.ObjectUtils; |
| | | import org.apache.commons.lang3.StringUtils; |
| | | import org.apache.poi.POIXMLDocument; |
| | | import org.apache.poi.POIXMLTextExtractor; |
| | | import org.apache.poi.hssf.usermodel.HSSFPicture; |
| | | import org.apache.poi.hssf.usermodel.HSSFPictureData; |
| | | import org.apache.poi.hssf.usermodel.HSSFSheet; |
| | | import org.apache.poi.hssf.usermodel.HSSFWorkbook; |
| | | import org.apache.poi.hwpf.extractor.WordExtractor; |
| | | import org.apache.poi.openxml4j.exceptions.InvalidFormatException; |
| | | import org.apache.poi.openxml4j.opc.OPCPackage; |
| | | import org.apache.poi.ss.usermodel.*; |
| | | import org.apache.poi.ss.usermodel.Sheet; |
| | | import org.apache.poi.ss.usermodel.Workbook; |
| | | import org.apache.poi.xssf.usermodel.XSSFSheet; |
| | | import org.apache.poi.xssf.usermodel.XSSFWorkbook; |
| | | import org.apache.poi.xwpf.extractor.XWPFWordExtractor; |
| | | import java.awt.Color; |
| | | import java.awt.image.BufferedImage; |
| | | import javax.imageio.ImageIO; |
| | | |
| | | import java.io.*; |
| | | import java.nio.file.Files; |
| | | import java.sql.*; |
| | | import java.util.*; |
| | | import java.util.regex.Matcher; |
| | | import java.util.regex.Pattern; |
| | | |
| | | public class TakeWords { |
| | | |
| | | private static final String splitIdentifier = "@-@"; // 自定义唯一标识符 |
| | | |
| | | // 科学计数法正则模式 |
| | | private static final Pattern SCIENTIFIC_PATTERN = Pattern.compile( |
| | | "^[+-]?\\d+(\\.\\d+)?[eE][+-]?\\d+$" |
| | | ); |
| | | |
| | | public static Object readWordFile(File file) { |
| | | String result = ""; |
| | |
| | | |
| | | StringBuilder stringBuilder = new StringBuilder(); |
| | | // 创建 reader |
| | | try (BufferedReader br = Files.newBufferedReader(file.toPath())) { |
| | | // CSV文件的分隔符 |
| | | String DELIMITER = ","; |
| | | // 按行读取 |
| | | String line; |
| | | while ((line = br.readLine()) != null) { |
| | | // 分割 |
| | | String[] columns = line.split(DELIMITER); |
| | | // 打印行 |
| | | stringBuilder.append(String.join(splitIdentifier, columns)).append("\n"); |
| | | // try (BufferedReader br = Files.newBufferedReader(file.toPath())) { |
| | | // // CSV文件的分隔符 |
| | | // String DELIMITER = ","; |
| | | // // 按行读取 |
| | | // String line; |
| | | // System.out.println(br.readLine()); |
| | | // while ((line = br.readLine()) != null) { |
| | | // // 分割 |
| | | // String[] columns = line.split(DELIMITER); |
| | | // // 打印行 |
| | | // stringBuilder.append(String.join(splitIdentifier, columns)).append("\n"); |
| | | // } |
| | | // } catch (IOException ex) { |
| | | // ex.printStackTrace(); |
| | | // } |
| | | try (FileReader fileReader = new FileReader(file); |
| | | CSVReader csvReader = new CSVReaderBuilder(fileReader).build()) { |
| | | |
| | | String[] nextLine; |
| | | while ((nextLine = csvReader.readNext()) != null) { |
| | | // 处理每一行数据 |
| | | for (String cell : nextLine) { |
| | | if(StringUtils.isNotBlank(cell)){ |
| | | stringBuilder.append(scientificToNumber(cell)).append(splitIdentifier); |
| | | } |
| | | } |
| | | stringBuilder.append("\n"); |
| | | } |
| | | } catch (IOException ex) { |
| | | ex.printStackTrace(); |
| | | } catch (IOException e) { |
| | | e.printStackTrace(); |
| | | } catch (CsvValidationException e) { |
| | | throw new RuntimeException(e); |
| | | } |
| | | return stringBuilder.toString(); |
| | | } |
| | | |
| | | /** |
| | | * 将科学计数法转换为数字 |
| | | * @param cell |
| | | * @return |
| | | */ |
| | | public static String scientificToNumber(String cell){ |
| | | if(SCIENTIFIC_PATTERN.matcher(cell).matches()){ |
| | | return String.valueOf(Double.parseDouble(cell)); |
| | | } |
| | | return cell; |
| | | } |
| | | |
| | | public static Object readMdbFile(File file, GetFileDto getFileDto) throws SQLException, ClassNotFoundException, InstantiationException, IllegalAccessException { |
| | | Map<String, Object> tableMap = new HashMap<>(16); |
| | | Properties prop = new Properties(); |