| | |
| | | private static final int CHUNK_OVERLAP = 100; |
| | | |
| | | @Override |
| | | @Async |
| | | @Async("threadPoolTaskExecutor") |
| | | public void processVectorAsync(Long vectorId) { |
| | | log.info("开始异步向量化处理: vectorId={}, thread={}", vectorId, Thread.currentThread().getName()); |
| | | processVector(vectorId); |
| | | } |
| | | |
| | | @Override |
| | | public void processVector(Long vectorId) { |
| | | log.info("开始处理向量化: vectorId={}", vectorId); |
| | | KnowledgeBaseVector vector = knowledgeBaseVectorService.getById(vectorId); |
| | | if (vector == null) { |
| | | log.error("向量记录不存在: {}", vectorId); |
| | |
| | | |
| | | try { |
| | | // 更新状态为处理中 |
| | | log.info("更新状态为处理中: vectorId={}", vectorId); |
| | | knowledgeBaseVectorService.updateVectorStatus(vectorId, |
| | | KnowledgeBaseVector.STATUS_PROCESSING, null, null); |
| | | |
| | | // 获取文件内容 |
| | | log.info("获取文件信息: storageBlobId={}", vector.getStorageBlobId()); |
| | | StorageBlob blob = storageBlobService.getById(vector.getStorageBlobId()); |
| | | if (blob == null) { |
| | | throw new RuntimeException("文件不存在: " + vector.getStorageBlobId()); |
| | | } |
| | | |
| | | File file = getFile(blob); |
| | | log.info("文件路径: {}, 是否存在: {}", file.getAbsolutePath(), file.exists()); |
| | | |
| | | // 直接读取文件内容,不使用 MultipartFile 包装 |
| | | log.info("提取文件内容: fileName={}", vector.getFileName()); |
| | | String content = extractFileContent(file, vector.getFileName(), blob.getContentType()); |
| | | log.info("文件内容长度: {}", content != null ? content.length() : 0); |
| | | |
| | | if (content == null || content.trim().isEmpty()) { |
| | | throw new RuntimeException("文件内容为空"); |
| | | } |
| | | |
| | | // 文本切片 |
| | | log.info("开始文本切片"); |
| | | List<TextSegment> chunks = splitText(content, vector); |
| | | log.info("切片完成,共 {} 个块", chunks.size()); |
| | | |
| | | // 批量生成嵌入向量并存储 |
| | | int chunkCount = 0; |
| | | for (TextSegment chunk : chunks) { |
| | | log.debug("处理第 {} 个块", chunkCount + 1); |
| | | Embedding embedding = embeddingModel.embed(chunk).content(); |
| | | embeddingStore.add(embedding, chunk); |
| | | chunkCount++; |