From 1ca5584d7e3200a9af65a099bd26d3593e2ba702 Mon Sep 17 00:00:00 2001
From: liyong <18434998025@163.com>
Date: 星期四, 07 五月 2026 14:36:08 +0800
Subject: [PATCH] 迁移pro

---
 src/main/java/com/ruoyi/ai/service/AiFileTextExtractor.java |  131 +++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 131 insertions(+), 0 deletions(-)

diff --git a/src/main/java/com/ruoyi/ai/service/AiFileTextExtractor.java b/src/main/java/com/ruoyi/ai/service/AiFileTextExtractor.java
new file mode 100644
index 0000000..6e37451
--- /dev/null
+++ b/src/main/java/com/ruoyi/ai/service/AiFileTextExtractor.java
@@ -0,0 +1,131 @@
+package com.ruoyi.ai.service;
+
+import com.ruoyi.common.utils.StringUtils;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.springframework.stereotype.Component;
+import org.springframework.web.multipart.MultipartFile;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+
+@Component
+public class AiFileTextExtractor {
+
+    private static final long MAX_FILE_SIZE = 10L * 1024 * 1024;
+
+    public String extractText(MultipartFile file) throws IOException {
+        if (file == null || file.isEmpty()) {
+            throw new IllegalArgumentException("鏂囦欢涓嶈兘涓虹┖");
+        }
+        if (file.getSize() > MAX_FILE_SIZE) {
+            throw new IllegalArgumentException("鏂囦欢杩囧ぇ锛岃鎺у埗鍦�10MB浠ュ唴");
+        }
+
+        String filename = file.getOriginalFilename();
+        String ext = getExtension(filename);
+        byte[] bytes = file.getBytes();
+
+        if (isPlainText(ext)) {
+            return decodeText(bytes);
+        }
+        if ("docx".equals(ext)) {
+            return extractDocx(bytes);
+        }
+        if ("xlsx".equals(ext)) {
+            return extractXlsx(bytes);
+        }
+        if ("xls".equals(ext)) {
+            return extractXls(bytes);
+        }
+        if (isImage(ext)) {
+            return "鍥剧墖鏂囦欢锛�" + filename + "锛屽凡涓婁紶锛岃缁撳悎鍥剧墖鍐呭璇嗗埆閲囪喘鍗曟嵁銆佽〃鏍煎拰浜у搧鏄庣粏銆�";
+        }
+        throw new IllegalArgumentException("鏆備笉鏀寔璇ユ枃浠剁被鍨�: " + ext);
+    }
+
+    public boolean isImageFile(MultipartFile file) {
+        if (file == null) {
+            return false;
+        }
+        return isImage(getExtension(file.getOriginalFilename()));
+    }
+
+    private String extractDocx(byte[] bytes) throws IOException {
+        try (ByteArrayInputStream inputStream = new ByteArrayInputStream(bytes);
+             XWPFDocument document = new XWPFDocument(inputStream);
+             XWPFWordExtractor extractor = new XWPFWordExtractor(document)) {
+            return extractor.getText();
+        }
+    }
+
+    private String extractXlsx(byte[] bytes) throws IOException {
+        try (ByteArrayInputStream inputStream = new ByteArrayInputStream(bytes);
+             XSSFWorkbook workbook = new XSSFWorkbook(inputStream)) {
+            return extractWorkbook(workbook);
+        }
+    }
+
+    private String extractXls(byte[] bytes) throws IOException {
+        try (ByteArrayInputStream inputStream = new ByteArrayInputStream(bytes);
+             HSSFWorkbook workbook = new HSSFWorkbook(inputStream)) {
+            return extractWorkbook(workbook);
+        }
+    }
+
+    private String extractWorkbook(org.apache.poi.ss.usermodel.Workbook workbook) {
+        StringBuilder text = new StringBuilder();
+        DataFormatter formatter = new DataFormatter();
+        for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
+            Sheet sheet = workbook.getSheetAt(i);
+            text.append("Sheet: ").append(sheet.getSheetName()).append("\n");
+            for (Row row : sheet) {
+                short lastCellNum = row.getLastCellNum();
+                if (lastCellNum <= 0) {
+                    text.append("\n");
+                    continue;
+                }
+                for (int c = 0; c < lastCellNum; c++) {
+                    String cellText = formatter.formatCellValue(row.getCell(c));
+                    text.append(cellText);
+                    if (c < lastCellNum - 1) {
+                        text.append('\t');
+                    }
+                }
+                text.append('\n');
+            }
+        }
+        return text.toString();
+    }
+
+    private String decodeText(byte[] bytes) {
+        String utf8 = new String(bytes, StandardCharsets.UTF_8);
+        if (utf8.contains("锟�")) {
+            return new String(bytes, java.nio.charset.Charset.forName("GBK"));
+        }
+        return utf8;
+    }
+
+    private String getExtension(String filename) {
+        if (!StringUtils.hasText(filename) || !filename.contains(".")) {
+            return "";
+        }
+        return filename.substring(filename.lastIndexOf('.') + 1).toLowerCase();
+    }
+
+    private boolean isPlainText(String ext) {
+        return StringUtils.inStringIgnoreCase(ext,
+                "txt", "md", "markdown", "json", "xml", "yaml", "yml", "csv", "log", "properties",
+                "java", "js", "ts", "vue", "html", "css", "sql", "py", "go", "sh", "bat");
+    }
+
+    private boolean isImage(String ext) {
+        return StringUtils.inStringIgnoreCase(ext, "png", "jpg", "jpeg", "webp", "bmp");
+    }
+}

--
Gitblit v1.9.3