昨天 ad346a7e1f1c35b09a5550c1b60cebe68f0619bf
feat(ai): 集成 Pinecone 向量数据库并实现知识库 RAG 功能

- 配置 Pinecone 向量数据库连接参数
- 实现 PineconeEmbeddingStore 集成
- 创建知识库文件向量记录表及实体类
- 添加知识库文件上传和向量化处理功能
- 实现文件切片和嵌入向量存储逻辑
- 集成 RAG 检索增强生成问答功能
- 添加知识库文件管理 API 接口
- 实现异步向量化处理机制
已添加12个文件
已修改4个文件
2115 ■■■■■ 文件已修改
doc/sql/20260609_knowledge_base_vector.sql 30 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
doc/知识库RAG功能实现文档.md 1034 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/ruoyi/ai/assistant/KnowledgeChatAgent.java 35 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/ruoyi/ai/config/EmbeddingStoreConfig.java 44 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/ruoyi/ai/controller/KnowledgeChatController.java 85 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/ruoyi/ai/dto/KnowledgeChatRequest.java 21 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/ruoyi/ai/service/KnowledgeRagService.java 34 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/ruoyi/ai/service/impl/KnowledgeRagServiceImpl.java 343 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/ruoyi/approve/controller/KnowledgeBaseController.java 140 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/ruoyi/approve/dto/KnowledgeBaseVectorVO.java 21 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/ruoyi/approve/mapper/KnowledgeBaseVectorMapper.java 39 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/ruoyi/approve/pojo/KnowledgeBase.java 15 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/ruoyi/approve/pojo/KnowledgeBaseVector.java 80 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/ruoyi/approve/service/KnowledgeBaseVectorService.java 43 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/java/com/ruoyi/approve/service/impl/KnowledgeBaseVectorServiceImpl.java 144 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
src/main/resources/application.yml 7 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
doc/sql/20260609_knowledge_base_vector.sql
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,30 @@
-- çŸ¥è¯†åº“向量检索功能数据库变更
-- æ‰§è¡Œå‰è¯·ç¡®ä¿ knowledge_base è¡¨å·²å­˜åœ¨
-- 1. knowledge_base è¡¨å¢žåŠ å­—æ®µ
ALTER TABLE knowledge_base
ADD COLUMN IF NOT EXISTS file_count INT DEFAULT 0 COMMENT '文件数量',
ADD COLUMN IF NOT EXISTS total_chunk_count INT DEFAULT 0 COMMENT '总切片数量',
ADD COLUMN IF NOT EXISTS description VARCHAR(500) COMMENT '知识库描述';
-- 2. åˆ›å»ºçŸ¥è¯†åº“文件向量记录表
CREATE TABLE IF NOT EXISTS knowledge_base_vector (
    id BIGINT AUTO_INCREMENT PRIMARY KEY COMMENT '主键ID',
    knowledge_base_id BIGINT NOT NULL COMMENT '关联知识库ID',
    storage_blob_id BIGINT NOT NULL COMMENT '关联文件blob ID',
    file_name VARCHAR(255) NOT NULL COMMENT '文件名称',
    file_type VARCHAR(50) NOT NULL COMMENT '文件类型(docx/pdf/xlsx/txt等)',
    vector_status TINYINT DEFAULT 0 COMMENT '向量化状态: 0-待处理, 1-处理中, 2-已完成, 3-失败',
    vector_error VARCHAR(500) COMMENT '向量化失败原因',
    chunk_count INT DEFAULT 0 COMMENT '切片数量',
    namespace VARCHAR(100) COMMENT '向量命名空间',
    create_time DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
    create_user INT COMMENT '创建人',
    update_time DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
    update_user INT COMMENT '更新人',
    tenant_id BIGINT COMMENT '租户ID',
    dept_id BIGINT COMMENT '部门ID',
    INDEX idx_knowledge_base_id (knowledge_base_id),
    INDEX idx_storage_blob_id (storage_blob_id),
    INDEX idx_vector_status (vector_status)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='知识库文件向量记录表';
doc/֪ʶ¿âRAG¹¦ÄÜʵÏÖÎĵµ.md
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,1034 @@
# çŸ¥è¯†åº“RAG向量检索功能实现文档
## ä¸€ã€åŠŸèƒ½æ¦‚è¿°
基于 RAG(Retrieval-Augmented Generation)技术实现知识库问答功能,支持:
- çŸ¥è¯†åº“管理(CRUD)
- æ–‡ä»¶ä¸Šä¼ ä¸Žå‘量化处理
- åŸºäºŽå‘量检索的智能问答
- å¤šç§æ–‡ä»¶æ ¼å¼æ”¯æŒï¼ˆtxt、md、docx、xlsx、xls、pdf)
## äºŒã€æŠ€æœ¯æž¶æž„
### 2.1 æŠ€æœ¯æ ˆ
| ç»„ä»¶ | æŠ€æœ¯ |
|------|------|
| å‘量数据库 | Pinecone |
| Embedding模型 | é˜¿é‡Œäº‘ DashScope text-embedding-v3 |
| LLM | é˜¿é‡Œäº‘通义千问 qwen-max |
| æ¡†æž¶ | langchain4j |
| ORM | MyBatis-Plus |
### 2.2 æž¶æž„图
```
┌─────────────────────────────────────────────────────────────┐
│                        å‰ç«¯åº”用                              â”‚
└─────────────────────────────────────────────────────────────┘
                              â”‚
                              â–¼
┌─────────────────────────────────────────────────────────────┐
│                     Controller Layer                         â”‚
│  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”   â”‚
│  â”‚ KnowledgeBaseCtrl   â”‚  â”‚ KnowledgeChatController     â”‚   â”‚
│  â”‚ (知识库管理)         â”‚  â”‚ (知识库问答)                 â”‚   â”‚
│  â””─────────────────────┘  â””─────────────────────────────┘   â”‚
└─────────────────────────────────────────────────────────────┘
                              â”‚
                              â–¼
┌─────────────────────────────────────────────────────────────┐
│                      Service Layer                           â”‚
│  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”   â”‚
│  â”‚KnowledgeBaseService â”‚  â”‚ KnowledgeRagService         â”‚   â”‚
│  â”‚ (知识库CRUD)         â”‚  â”‚ (向量化/检索)                â”‚   â”‚
│  â””─────────────────────┘  â””─────────────────────────────┘   â”‚
└─────────────────────────────────────────────────────────────┘
                              â”‚
                              â–¼
┌─────────────────────────────────────────────────────────────┐
│                      AI Layer                                â”‚
│  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”   â”‚
│  â”‚ KnowledgeChatAgent  â”‚  â”‚ EmbeddingStore (Pinecone)   â”‚   â”‚
│  â”‚ (问答Agent)          â”‚  â”‚ (向量存储)                   â”‚   â”‚
│  â””─────────────────────┘  â””─────────────────────────────┘   â”‚
└─────────────────────────────────────────────────────────────┘
```
---
## ä¸‰ã€åŽç«¯å®žçް
### 3.1 æ•°æ®åº“设计
#### 3.1.1 çŸ¥è¯†åº“表(knowledge_base)
```sql
CREATE TABLE knowledge_base (
    id BIGINT AUTO_INCREMENT PRIMARY KEY,
    title VARCHAR(255) COMMENT '知识标题',
    type VARCHAR(50) COMMENT '知识类型',
    scenario VARCHAR(255) COMMENT '适用场景',
    efficiency VARCHAR(20) COMMENT '解决效率',
    problem TEXT COMMENT '问题描述',
    solution TEXT COMMENT '解决方案',
    key_points TEXT COMMENT '关键要点',
    creator VARCHAR(100) COMMENT '创建人',
    usage_count INT DEFAULT 0 COMMENT '使用次数',
    file_count INT DEFAULT 0 COMMENT '文件数量',
    total_chunk_count INT DEFAULT 0 COMMENT '总切片数量',
    description VARCHAR(500) COMMENT '知识库描述',
    create_time DATETIME DEFAULT CURRENT_TIMESTAMP,
    update_time DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    create_user INT,
    update_user INT,
    tenant_id BIGINT,
    dept_id BIGINT
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='知识库表';
```
#### 3.1.2 çŸ¥è¯†åº“向量记录表(knowledge_base_vector)
```sql
CREATE TABLE knowledge_base_vector (
    id BIGINT AUTO_INCREMENT PRIMARY KEY COMMENT '主键ID',
    knowledge_base_id BIGINT NOT NULL COMMENT '关联知识库ID',
    storage_blob_id BIGINT NOT NULL COMMENT '关联文件blob ID',
    file_name VARCHAR(255) NOT NULL COMMENT '文件名称',
    file_type VARCHAR(50) NOT NULL COMMENT '文件类型',
    vector_status TINYINT DEFAULT 0 COMMENT '向量化状态: 0-待处理, 1-处理中, 2-已完成, 3-失败',
    vector_error VARCHAR(500) COMMENT '向量化失败原因',
    chunk_count INT DEFAULT 0 COMMENT '切片数量',
    namespace VARCHAR(100) COMMENT '向量命名空间',
    create_time DATETIME DEFAULT CURRENT_TIMESTAMP,
    create_user INT,
    update_time DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    update_user INT,
    tenant_id BIGINT,
    dept_id BIGINT,
    INDEX idx_knowledge_base_id (knowledge_base_id),
    INDEX idx_storage_blob_id (storage_blob_id),
    INDEX idx_vector_status (vector_status)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='知识库文件向量记录表';
```
### 3.2 Maven依赖
```xml
<!-- langchain4j BOM -->
<dependencyManagement>
    <dependencies>
        <dependency>
            <groupId>dev.langchain4j</groupId>
            <artifactId>langchain4j-bom</artifactId>
            <version>1.0.0-beta3</version>
            <type>pom</type>
            <scope>import</scope>
        </dependency>
    </dependencies>
</dependencyManagement>
<dependencies>
    <!-- langchain4j æ ¸å¿ƒ -->
    <dependency>
        <groupId>dev.langchain4j</groupId>
        <artifactId>langchain4j-spring-boot-starter</artifactId>
    </dependency>
    <!-- Pinecone å‘量数据库 -->
    <dependency>
        <groupId>dev.langchain4j</groupId>
        <artifactId>langchain4j-pinecone</artifactId>
    </dependency>
    <!-- é˜¿é‡Œäº‘ DashScope -->
    <dependency>
        <groupId>dev.langchain4j</groupId>
        <artifactId>langchain4j-community-dashscope-spring-boot-starter</artifactId>
    </dependency>
</dependencies>
```
### 3.3 é…ç½®æ–‡ä»¶ï¼ˆapplication.yml)
```yaml
# Pinecone å‘量数据库配置
pinecone:
  api-key: your-pinecone-api-key
  index: your-index-name
  namespace: knowledge-base
# langchain4j é…ç½®
langchain4j:
  community:
    dashscope:
      streaming-chat-model:
        api-key: your-dashscope-api-key
        model-name: "qwen-max"
      embedding-model:
        api-key: your-dashscope-api-key
        model-name: "text-embedding-v3"
```
### 3.4 æ ¸å¿ƒä»£ç å®žçް
#### 3.4.1 å®žä½“ç±»
**KnowledgeBase.java**
```java
@Data
@TableName("knowledge_base")
public class KnowledgeBase implements Serializable {
    @TableId(type = IdType.AUTO)
    private Long id;
    private String title;
    private String type;
    private String scenario;
    private String efficiency;
    private String problem;
    private String solution;
    private String keyPoints;
    private String creator;
    private Integer usageCount;
    private Integer fileCount;
    private Integer totalChunkCount;
    private String description;
    @TableField(fill = FieldFill.INSERT)
    private LocalDateTime createTime;
    @TableField(fill = FieldFill.INSERT_UPDATE)
    private LocalDateTime updateTime;
    @TableField(fill = FieldFill.INSERT)
    private Integer createUser;
    @TableField(fill = FieldFill.INSERT_UPDATE)
    private Integer updateUser;
    @TableField(fill = FieldFill.INSERT)
    private Long tenantId;
    @TableField(fill = FieldFill.INSERT)
    private Long deptId;
}
```
**KnowledgeBaseVector.java**
```java
@Data
@TableName("knowledge_base_vector")
public class KnowledgeBaseVector implements Serializable {
    @TableId(type = IdType.AUTO)
    private Long id;
    private Long knowledgeBaseId;
    private Long storageBlobId;
    private String fileName;
    private String fileType;
    private Integer vectorStatus;
    private String vectorError;
    private Integer chunkCount;
    private String namespace;
    @TableField(fill = FieldFill.INSERT)
    private LocalDateTime createTime;
    @TableField(fill = FieldFill.INSERT)
    private Integer createUser;
    @TableField(fill = FieldFill.INSERT_UPDATE)
    private LocalDateTime updateTime;
    @TableField(fill = FieldFill.INSERT_UPDATE)
    private Integer updateUser;
    @TableField(fill = FieldFill.INSERT)
    private Long tenantId;
    @TableField(fill = FieldFill.INSERT)
    private Long deptId;
    // å‘量化状态常量
    public static final int STATUS_PENDING = 0;
    public static final int STATUS_PROCESSING = 1;
    public static final int STATUS_COMPLETED = 2;
    public static final int STATUS_FAILED = 3;
}
```
#### 3.4.2 EmbeddingStore配置
**EmbeddingStoreConfig.java**
```java
@Configuration
public class EmbeddingStoreConfig {
    @Value("${pinecone.api-key}")
    private String pineconeApiKey;
    @Value("${pinecone.index}")
    private String indexName;
    @Value("${pinecone.namespace}")
    private String namespace;
    @Bean
    public Pinecone pinecone() {
        return new Pinecone.Builder(pineconeApiKey).build();
    }
    @Bean
    public Index pineconeIndex(Pinecone pinecone) {
        return pinecone.getIndexConnection(indexName);
    }
    @Bean
    public EmbeddingStore<TextSegment> embeddingStore(EmbeddingModel embeddingModel) {
        return PineconeEmbeddingStore.builder()
                .apiKey(pineconeApiKey)
                .index(indexName)
                .nameSpace(namespace)
                .createIndex(PineconeServerlessIndexConfig.builder()
                        .cloud("AWS")
                        .region("us-east-1")
                        .dimension(embeddingModel.dimension())
                        .build())
                .build();
    }
}
```
#### 3.4.3 RAG服务实现
**KnowledgeRagService.java**
```java
public interface KnowledgeRagService {
    void processVectorAsync(Long vectorId);
    void processVector(Long vectorId);
    List<String> searchRelevantContent(String namespace, String query, int maxResults);
    void deleteEmbeddings(String namespace, Long storageBlobId);
}
```
**KnowledgeRagServiceImpl.java**(核心实现)
```java
@Slf4j
@Service
public class KnowledgeRagServiceImpl implements KnowledgeRagService {
    private final KnowledgeBaseVectorService knowledgeBaseVectorService;
    private final StorageBlobService storageBlobService;
    private final EmbeddingModel embeddingModel;
    private final EmbeddingStore<TextSegment> embeddingStore;
    private final FileProperties fileProperties;
    private final Index pineconeIndex;
    @Value("${pinecone.namespace}")
    private String namespace;
    private static final int CHUNK_SIZE = 500;
    private static final int CHUNK_OVERLAP = 100;
    private static final long CHUNK_THRESHOLD_BYTES = 80L * 1024 * 1024;
    private static final int EMBEDDING_MAX_LENGTH = 8000;
    @Override
    @Async("threadPoolTaskExecutor")
    public void processVectorAsync(Long vectorId) {
        processVector(vectorId);
    }
    @Override
    public void processVector(Long vectorId) {
        KnowledgeBaseVector vector = knowledgeBaseVectorService.getById(vectorId);
        if (vector == null) return;
        try {
            // æ›´æ–°çŠ¶æ€ä¸ºå¤„ç†ä¸­
            knowledgeBaseVectorService.updateVectorStatus(vectorId, STATUS_PROCESSING, null, null);
            // èŽ·å–æ–‡ä»¶å†…å®¹
            StorageBlob blob = storageBlobService.getById(vector.getStorageBlobId());
            File file = getFile(blob);
            String content = extractFileContent(file, vector.getFileName());
            if (content == null || content.trim().isEmpty()) {
                throw new RuntimeException("文件内容为空");
            }
            // æ–‡æœ¬åˆ‡ç‰‡
            List<TextSegment> chunks;
            boolean needChunk = file.length() > CHUNK_THRESHOLD_BYTES || content.length() > EMBEDDING_MAX_LENGTH;
            if (needChunk) {
                chunks = splitText(content, vector);
            } else {
                Map<String, Object> metadata = buildMetadata(vector);
                chunks = List.of(TextSegment.from(content, new Metadata(metadata)));
            }
            // ç”ŸæˆåµŒå…¥å‘量并存储
            int chunkCount = 0;
            for (TextSegment chunk : chunks) {
                Embedding embedding = embeddingModel.embed(chunk).content();
                embeddingStore.add(embedding, chunk);
                chunkCount++;
            }
            // æ›´æ–°çŠ¶æ€ä¸ºå®Œæˆ
            knowledgeBaseVectorService.updateVectorStatus(vectorId, STATUS_COMPLETED, chunkCount, null);
        } catch (Exception e) {
            log.error("向量化处理失败", e);
            knowledgeBaseVectorService.updateVectorStatus(vectorId, STATUS_FAILED, null, e.getMessage());
        }
    }
    @Override
    public List<String> searchRelevantContent(String namespace, String query, int maxResults) {
        Embedding queryEmbedding = embeddingModel.embed(query).content();
        EmbeddingSearchRequest searchRequest = EmbeddingSearchRequest.builder()
                .queryEmbedding(queryEmbedding)
                .maxResults(maxResults)
                .minScore(0.7)
                .build();
        EmbeddingSearchResult<TextSegment> searchResult = embeddingStore.search(searchRequest);
        return searchResult.matches().stream()
                .map(match -> match.embedded().text())
                .collect(Collectors.toList());
    }
    @Override
    public void deleteEmbeddings(String namespace, Long storageBlobId) {
        Struct filter = Struct.newBuilder()
                .putFields("storageBlobId", Value.newBuilder()
                        .setStructValue(Struct.newBuilder()
                                .putFields("$eq", Value.newBuilder()
                                        .setNumberValue(storageBlobId.doubleValue())
                                        .build()))
                        .build())
                .build();
        pineconeIndex.delete(new ArrayList<>(), false, this.namespace, filter);
    }
    private String extractFileContent(File file, String fileName) throws Exception {
        String ext = getFileExtension(fileName);
        if (isPlainText(ext)) {
            return readFileWithEncoding(file);
        }
        if ("docx".equals(ext)) {
            return extractDocx(file);
        }
        if ("xlsx".equals(ext) || "xls".equals(ext)) {
            return extractExcel(file);
        }
        return readFileWithEncoding(file);
    }
    // ... å…¶ä»–辅助方法
}
```
#### 3.4.4 çŸ¥è¯†åº“问答Agent
**KnowledgeChatAgent.java**
```java
@AiService(
        wiringMode = EXPLICIT,
        streamingChatModel = "qwenStreamingChatModel",
        chatMemoryProvider = "chatMemoryProvider"
)
public interface KnowledgeChatAgent {
    @SystemMessage("""
            ä½ æ˜¯ä¼ä¸šçŸ¥è¯†åº“问答助手。
            ä½ éœ€è¦åŸºäºŽæä¾›çš„知识库内容回答用户问题。
            éµå¾ªä»¥ä¸‹è§„则:
            1. ä¸¥æ ¼åŸºäºŽçŸ¥è¯†åº“内容回答,不要编造信息
            2. å¦‚果知识库中没有相关信息,明确告知用户
            3. å›žç­”要准确、简洁、有条理
            4. å¼•用来源时注明"根据知识库内容"
            """)
    Flux<String> chat(@MemoryId String memoryId, @UserMessage String userMessage);
}
```
#### 3.4.5 Controller层
**KnowledgeBaseController.java**
```java
@RestController
@RequestMapping("/knowledgeBase")
@Tag(name = "知识库管理")
public class KnowledgeBaseController {
    @GetMapping("/getList")
    public AjaxResult getList(@RequestParam(defaultValue = "1") long current,
                              @RequestParam(defaultValue = "10") long size,
                              KnowledgeBase knowledgeBase) {
        Page page = new Page(current, size);
        return AjaxResult.success(knowledgeBaseService.listpage(page, knowledgeBase));
    }
    @PostMapping("/add")
    public AjaxResult add(@RequestBody KnowledgeBase knowledgeBase) {
        return AjaxResult.success(knowledgeBaseService.save(knowledgeBase));
    }
    @PostMapping("/update")
    public AjaxResult update(@RequestBody KnowledgeBase knowledgeBase) {
        return AjaxResult.success(knowledgeBaseService.updateById(knowledgeBase));
    }
    @DeleteMapping("/delete")
    public AjaxResult delete(@RequestBody List<Long> ids) {
        return AjaxResult.success(knowledgeBaseService.removeByIds(ids));
    }
    @GetMapping("/vector/status/{knowledgeBaseId}")
    @Operation(summary = "查询知识库文件向量化状态")
    public AjaxResult getVectorStatus(@PathVariable Long knowledgeBaseId) {
        return AjaxResult.success(knowledgeBaseVectorService.getVectorStatusByKnowledgeBaseId(knowledgeBaseId));
    }
    @PostMapping("/vector/reprocess/{vectorId}")
    @Operation(summary = "重新向量化文件")
    public AjaxResult reprocessVector(@PathVariable Long vectorId) {
        knowledgeBaseVectorService.reprocessVector(vectorId);
        return AjaxResult.success("已重新提交向量化任务");
    }
    @PostMapping("/file/save")
    @Operation(summary = "保存知识库文件关联")
    public AjaxResult saveKnowledgeBaseFiles(@RequestBody KnowledgeBaseFileDTO dto) {
        // ä¿å­˜é™„件关联并触发向量化
        // ...
    }
    @DeleteMapping("/file/delete")
    @Operation(summary = "删除知识库文件")
    public AjaxResult deleteKnowledgeBaseFiles(@RequestBody List<Long> vectorIds) {
        knowledgeBaseVectorService.deleteVectors(vectorIds);
        return AjaxResult.success();
    }
}
```
**KnowledgeChatController.java**
```java
@RestController
@RequestMapping("/ai/knowledge")
@Tag(name = "知识库问答")
public class KnowledgeChatController {
    private final KnowledgeChatAgent knowledgeChatAgent;
    private final KnowledgeRagService knowledgeRagService;
    private final KnowledgeBaseService knowledgeBaseService;
    @PostMapping(value = "/chat", produces = "text/stream;charset=utf-8")
    @Operation(summary = "知识库问答")
    public Flux<String> chat(@RequestBody KnowledgeChatRequest request) {
        // æ£€ç´¢ç›¸å…³å†…容
        String namespace = "kb-" + request.getKnowledgeBaseId();
        List<String> relevantContents = knowledgeRagService.searchRelevantContent(
                namespace, request.getQuestion(), 5);
        if (relevantContents.isEmpty()) {
            return Flux.just("知识库中未找到相关内容");
        }
        // æž„建上下文
        StringBuilder context = new StringBuilder();
        context.append("以下是从知识库中检索到的相关内容:\n\n");
        for (int i = 0; i < relevantContents.size(); i++) {
            context.append("【内容").append(i + 1).append("】\n");
            context.append(relevantContents.get(i)).append("\n\n");
        }
        context.append("---\n请基于以上知识库内容回答:\n").append(request.getQuestion());
        return knowledgeChatAgent.chat(request.getMemoryId(), context.toString());
    }
    @GetMapping("/list")
    @Operation(summary = "知识库列表")
    public AjaxResult listKnowledgeBases() {
        return AjaxResult.success(knowledgeBaseService.list());
    }
}
```
---
## å››ã€API接口文档
### 4.1 çŸ¥è¯†åº“管理接口
| æŽ¥å£ | æ–¹æ³• | è·¯å¾„ | è¯´æ˜Ž |
|------|------|------|------|
| èŽ·å–åˆ—è¡¨ | GET | /knowledgeBase/getList | åˆ†é¡µæŸ¥è¯¢çŸ¥è¯†åº“列表 |
| æ–°å¢žçŸ¥è¯†åº“ | POST | /knowledgeBase/add | åˆ›å»ºçŸ¥è¯†åº“ |
| æ›´æ–°çŸ¥è¯†åº“ | POST | /knowledgeBase/update | æ›´æ–°çŸ¥è¯†åº“信息 |
| åˆ é™¤çŸ¥è¯†åº“ | DELETE | /knowledgeBase/delete | æ‰¹é‡åˆ é™¤çŸ¥è¯†åº“ |
| æŸ¥è¯¢å‘量化状态 | GET | /knowledgeBase/vector/status/{id} | æŸ¥è¯¢æ–‡ä»¶å‘量化状态 |
| é‡æ–°å‘量化 | POST | /knowledgeBase/vector/reprocess/{id} | é‡æ–°å¤„理失败的文件 |
| ä¿å­˜æ–‡ä»¶å…³è” | POST | /knowledgeBase/file/save | ä¸Šä¼ æ–‡ä»¶åŽå…³è”到知识库 |
| åˆ é™¤æ–‡ä»¶ | DELETE | /knowledgeBase/file/delete | åˆ é™¤çŸ¥è¯†åº“文件 |
### 4.2 çŸ¥è¯†åº“问答接口
| æŽ¥å£ | æ–¹æ³• | è·¯å¾„ | è¯´æ˜Ž |
|------|------|------|------|
| çŸ¥è¯†åº“问答 | POST | /ai/knowledge/chat | æµå¼è¿”回问答结果 |
| çŸ¥è¯†åº“列表 | GET | /ai/knowledge/list | èŽ·å–å¯é€‰çŸ¥è¯†åº“åˆ—è¡¨ |
### 4.3 æŽ¥å£è¯¦ç»†è¯´æ˜Ž
#### 4.3.1 ä¿å­˜çŸ¥è¯†åº“文件关联
**请求**
```json
POST /knowledgeBase/file/save
{
    "knowledgeBaseId": 1,
    "storageBlobIds": [100, 101, 102]
}
```
**响应**
```json
{
    "code": 200,
    "msg": "操作成功"
}
```
#### 4.3.2 çŸ¥è¯†åº“问答
**请求**
```json
POST /ai/knowledge/chat
Content-Type: application/json
{
    "knowledgeBaseId": 1,
    "memoryId": "session-uuid",
    "question": "如何处理库存盘点差异?"
}
```
**响应**(SSE流式)
```
根据知识库内容,库存盘点差异的处理流程如下:
1. å‘现差异后,首先核对盘点记录...
2. æ£€æŸ¥æ˜¯å¦æœ‰æ¼ç›˜æˆ–错盘...
3. ...
```
---
## äº”、前端实现
### 5.1 çŸ¥è¯†åº“管理页面
```vue
<template>
  <div class="knowledge-base">
    <!-- åˆ—表 -->
    <el-table :data="tableData" border>
      <el-table-column prop="title" label="知识标题" />
      <el-table-column prop="type" label="知识类型" />
      <el-table-column prop="fileCount" label="文件数量" />
      <el-table-column prop="totalChunkCount" label="切片数量" />
      <el-table-column label="操作">
        <template #default="{ row }">
          <el-button @click="handleEdit(row)">编辑</el-button>
          <el-button @click="handleFiles(row)">文件管理</el-button>
          <el-button @click="handleChat(row)">问答</el-button>
          <el-button type="danger" @click="handleDelete(row)">删除</el-button>
        </template>
      </el-table-column>
    </el-table>
  </div>
</template>
<script setup>
import { ref, onMounted } from 'vue'
import { getKnowledgeBaseList, deleteKnowledgeBase } from '@/api/knowledge'
const tableData = ref([])
const loadData = async () => {
  const res = await getKnowledgeBaseList({ current: 1, size: 10 })
  tableData.value = res.data.records
}
onMounted(loadData)
</script>
```
### 5.2 æ–‡ä»¶ä¸Šä¼ ä¸Žå‘量化状态
```vue
<template>
  <div class="file-manager">
    <!-- æ–‡ä»¶ä¸Šä¼  -->
    <el-upload
      :action="uploadUrl"
      :on-success="handleUploadSuccess"
      multiple
    >
      <el-button type="primary">上传文件</el-button>
    </el-upload>
    <!-- æ–‡ä»¶åˆ—表与向量化状态 -->
    <el-table :data="fileList">
      <el-table-column prop="fileName" label="文件名" />
      <el-table-column label="向量化状态">
        <template #default="{ row }">
          <el-tag :type="getStatusType(row.vectorStatus)">
            {{ getStatusText(row.vectorStatus) }}
          </el-tag>
        </template>
      </el-table-column>
      <el-table-column prop="chunkCount" label="切片数" />
      <el-table-column label="操作">
        <template #default="{ row }">
          <el-button v-if="row.vectorStatus === 3" @click="reprocess(row)">
            é‡æ–°å¤„理
          </el-button>
          <el-button type="danger" @click="deleteFile(row)">删除</el-button>
        </template>
      </el-table-column>
    </el-table>
  </div>
</template>
<script setup>
const uploadUrl = import.meta.env.VITE_APP_BASE_API + '/common/upload'
// ä¸Šä¼ æˆåŠŸåŽä¿å­˜å…³è”
const uploadedBlobIds = ref([])
const handleUploadSuccess = (response, file) => {
  if (response.code === 200) {
    uploadedBlobIds.value.push(response.data.id)
  }
}
// ä¿å­˜æ–‡ä»¶å…³è”
const saveFiles = async () => {
  await saveKnowledgeBaseFiles({
    knowledgeBaseId: props.knowledgeBaseId,
    storageBlobIds: uploadedBlobIds.value
  })
  // åˆ·æ–°æ–‡ä»¶åˆ—表
  loadFileList()
}
// çŠ¶æ€æ–‡æœ¬æ˜ å°„
const getStatusText = (status) => {
  const map = {
    0: '待处理',
    1: '处理中',
    2: '已完成',
    3: '失败'
  }
  return map[status] || '未知'
}
const getStatusType = (status) => {
  const map = {
    0: 'info',
    1: 'warning',
    2: 'success',
    3: 'danger'
  }
  return map[status] || 'info'
}
</script>
```
### 5.3 çŸ¥è¯†åº“问答界面
```vue
<template>
  <div class="knowledge-chat">
    <!-- çŸ¥è¯†åº“选择 -->
    <el-select v-model="selectedKbId" placeholder="选择知识库">
      <el-option
        v-for="kb in knowledgeBases"
        :key="kb.id"
        :label="kb.title"
        :value="kb.id"
      />
    </el-select>
    <!-- å¯¹è¯åŒºåŸŸ -->
    <div class="chat-messages">
      <div
        v-for="(msg, index) in messages"
        :key="index"
        :class="['message', msg.role]"
      >
        <div class="content">{{ msg.content }}</div>
      </div>
    </div>
    <!-- è¾“入框 -->
    <el-input
      v-model="inputQuestion"
      placeholder="请输入问题"
      @keyup.enter="sendMessage"
    >
      <template #append>
        <el-button @click="sendMessage" :loading="loading">发送</el-button>
      </template>
    </el-input>
  </div>
</template>
<script setup>
import { ref, onMounted } from 'vue'
import { getKnowledgeBaseList, knowledgeChat } from '@/api/knowledge'
const knowledgeBases = ref([])
const selectedKbId = ref(null)
const messages = ref([])
const inputQuestion = ref('')
const loading = ref(false)
const memoryId = ref(crypto.randomUUID())
const sendMessage = async () => {
  if (!inputQuestion.value.trim()) return
  if (!selectedKbId.value) {
    ElMessage.warning('请选择知识库')
    return
  }
  // æ·»åŠ ç”¨æˆ·æ¶ˆæ¯
  messages.value.push({
    role: 'user',
    content: inputQuestion.value
  })
  loading.value = true
  try {
    // æµå¼è¯·æ±‚
    const response = await fetch('/api/ai/knowledge/chat', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({
        knowledgeBaseId: selectedKbId.value,
        memoryId: memoryId.value,
        question: inputQuestion.value
      })
    })
    // å¤„理SSE流式响应
    const reader = response.body.getReader()
    const decoder = new TextDecoder()
    let aiContent = ''
    messages.value.push({ role: 'assistant', content: '' })
    while (true) {
      const { done, value } = await reader.read()
      if (done) break
      const text = decoder.decode(value)
      aiContent += text
      messages.value[messages.value.length - 1].content = aiContent
    }
  } finally {
    loading.value = false
    inputQuestion.value = ''
  }
}
onMounted(async () => {
  const res = await getKnowledgeBaseList()
  knowledgeBases.value = res.data
})
</script>
```
### 5.4 API封装
```javascript
// api/knowledge.js
import request from '@/utils/request'
// èŽ·å–çŸ¥è¯†åº“åˆ—è¡¨
export function getKnowledgeBaseList(params) {
  return request({
    url: '/knowledgeBase/getList',
    method: 'get',
    params
  })
}
// æ–°å¢žçŸ¥è¯†åº“
export function addKnowledgeBase(data) {
  return request({
    url: '/knowledgeBase/add',
    method: 'post',
    data
  })
}
// æ›´æ–°çŸ¥è¯†åº“
export function updateKnowledgeBase(data) {
  return request({
    url: '/knowledgeBase/update',
    method: 'post',
    data
  })
}
// åˆ é™¤çŸ¥è¯†åº“
export function deleteKnowledgeBase(ids) {
  return request({
    url: '/knowledgeBase/delete',
    method: 'delete',
    data: ids
  })
}
// èŽ·å–æ–‡ä»¶å‘é‡åŒ–çŠ¶æ€
export function getVectorStatus(knowledgeBaseId) {
  return request({
    url: `/knowledgeBase/vector/status/${knowledgeBaseId}`,
    method: 'get'
  })
}
// é‡æ–°å‘量化
export function reprocessVector(vectorId) {
  return request({
    url: `/knowledgeBase/vector/reprocess/${vectorId}`,
    method: 'post'
  })
}
// ä¿å­˜æ–‡ä»¶å…³è”
export function saveKnowledgeBaseFiles(data) {
  return request({
    url: '/knowledgeBase/file/save',
    method: 'post',
    data
  })
}
// åˆ é™¤æ–‡ä»¶
export function deleteKnowledgeBaseFiles(vectorIds) {
  return request({
    url: '/knowledgeBase/file/delete',
    method: 'delete',
    data: vectorIds
  })
}
// çŸ¥è¯†åº“问答(流式)
export async function knowledgeChat(data) {
  const response = await fetch('/api/ai/knowledge/chat', {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify(data)
  })
  return response.body
}
// èŽ·å–çŸ¥è¯†åº“åˆ—è¡¨ï¼ˆé—®ç­”ç”¨ï¼‰
export function getKnowledgeBaseListForChat() {
  return request({
    url: '/ai/knowledge/list',
    method: 'get'
  })
}
```
---
## å…­ã€æ ¸å¿ƒæµç¨‹
### 6.1 æ–‡ä»¶ä¸Šä¼ ä¸Žå‘量化流程
```
1. å‰ç«¯è°ƒç”¨ /common/upload ä¸Šä¼ æ–‡ä»¶ â†’ è¿”回 storageBlobId
2. å‰ç«¯è°ƒç”¨ /knowledgeBase/file/save å…³è”文件到知识库
3. åŽç«¯åˆ›å»º KnowledgeBaseVector è®°å½•(状态:待处理)
4. åŽç«¯å¼‚步调用 KnowledgeRagService.processVectorAsync()
   â”œâ”€â”€ æ›´æ–°çŠ¶æ€ä¸º"处理中"
   â”œâ”€â”€ æå–文件内容(支持多种格式)
   â”œâ”€â”€ è‡ªåŠ¨æ£€æµ‹æ–‡ä»¶ç¼–ç ï¼ˆUTF-8/GBK)
   â”œâ”€â”€ æ–‡æœ¬åˆ‡ç‰‡ï¼ˆå¤§æ–‡ä»¶æˆ–长内容才切片)
   â”œâ”€â”€ ç”Ÿæˆ Embedding å‘量
   â”œâ”€â”€ å­˜å‚¨åˆ° Pinecone
   â””── æ›´æ–°çŠ¶æ€ä¸º"完成"或"失败"
```
### 6.2 çŸ¥è¯†åº“问答流程
```
1. ç”¨æˆ·é€‰æ‹©çŸ¥è¯†åº“,输入问题
2. å‰ç«¯è°ƒç”¨ /ai/knowledge/chat(流式接口)
3. åŽç«¯å¤„理:
   â”œâ”€â”€ æž„建命名空间:kb-{knowledgeBaseId}
   â”œâ”€â”€ è°ƒç”¨ Embedding æ¨¡åž‹ç”Ÿæˆé—®é¢˜å‘量
   â”œâ”€â”€ ä»Ž Pinecone æ£€ç´¢ç›¸å…³å†…容(minScore=0.7, maxResults=5)
   â”œâ”€â”€ æž„建上下文 Prompt
   â”œâ”€â”€ è°ƒç”¨ LLM ç”Ÿæˆå›žç­”
   â””── æµå¼è¿”回结果
```
---
## ä¸ƒã€æ³¨æ„äº‹é¡¹
1. **Pinecone å‘½åç©ºé—´**:不能使用 `__default__`,必须使用自定义命名空间
2. **文件编码**:自动检测 UTF-8/GBK,避免乱码
3. **切片策略**:
   - æ–‡ä»¶ > 80MB æˆ–内容 > 8000 å­—符时才切片
   - åˆ‡ç‰‡å¤§å° 500 å­—符,重叠 100 å­—符
   - ä¼˜å…ˆåœ¨å¥å­è¾¹ç•Œåˆ‡åˆ†
4. **Embedding é™åˆ¶**:阿里云 DashScope é™åˆ¶å•次输入最大 8192 å­—符
5. **向量删除**:使用 Pinecone åŽŸç”Ÿå®¢æˆ·ç«¯ï¼Œé€šè¿‡ metadata filter åˆ é™¤
6. **异步处理**:向量化使用 `@Async` å¼‚步执行,避免阻塞接口
---
## å…«ã€æ–‡ä»¶æ¸…单
### åŽç«¯æ–‡ä»¶
```
src/main/java/com/ruoyi/
├── approve/
│   â”œâ”€â”€ controller/
│   â”‚   â””── KnowledgeBaseController.java
│   â”œâ”€â”€ pojo/
│   â”‚   â”œâ”€â”€ KnowledgeBase.java
│   â”‚   â””── KnowledgeBaseVector.java
│   â”œâ”€â”€ service/
│   â”‚   â”œâ”€â”€ KnowledgeBaseService.java
│   â”‚   â”œâ”€â”€ KnowledgeBaseVectorService.java
│   â”‚   â””── impl/
│   â”‚       â”œâ”€â”€ KnowledgeBaseServiceImpl.java
│   â”‚       â””── KnowledgeBaseVectorServiceImpl.java
│   â”œâ”€â”€ mapper/
│   â”‚   â”œâ”€â”€ KnowledgeBaseMapper.java
│   â”‚   â””── KnowledgeBaseVectorMapper.java
│   â””── dto/
│       â””── KnowledgeBaseVectorVO.java
└── ai/
    â”œâ”€â”€ config/
    â”‚   â”œâ”€â”€ EmbeddingStoreConfig.java
    â”‚   â””── XiaozhiAgentConfig.java
    â”œâ”€â”€ controller/
    â”‚   â””── KnowledgeChatController.java
    â”œâ”€â”€ assistant/
    â”‚   â””── KnowledgeChatAgent.java
    â”œâ”€â”€ service/
    â”‚   â”œâ”€â”€ KnowledgeRagService.java
    â”‚   â””── impl/
    â”‚       â””── KnowledgeRagServiceImpl.java
    â””── dto/
        â””── KnowledgeChatRequest.java
```
### å‰ç«¯æ–‡ä»¶
```
src/views/knowledge/
├── index.vue              # çŸ¥è¯†åº“列表
├── form.vue               # æ–°å¢ž/编辑表单
├── files.vue              # æ–‡ä»¶ç®¡ç†
└── chat.vue               # çŸ¥è¯†åº“问答
src/api/knowledge.js       # API封装
```
src/main/java/com/ruoyi/ai/assistant/KnowledgeChatAgent.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,35 @@
package com.ruoyi.ai.assistant;
import dev.langchain4j.service.MemoryId;
import dev.langchain4j.service.SystemMessage;
import dev.langchain4j.service.UserMessage;
import dev.langchain4j.service.spring.AiService;
import reactor.core.publisher.Flux;
import static dev.langchain4j.service.spring.AiServiceWiringMode.EXPLICIT;
/**
 * çŸ¥è¯†åº“问答Agent
 * åŸºäºŽRAG检索增强生成
 */
@AiService(
        wiringMode = EXPLICIT,
        streamingChatModel = "qwenStreamingChatModel",
        chatMemoryProvider = "chatMemoryProviderXiaozhi"
)
public interface KnowledgeChatAgent {
    @SystemMessage("""
            ä½ æ˜¯ä¼ä¸šçŸ¥è¯†åº“问答助手。
            ä½ éœ€è¦åŸºäºŽæä¾›çš„知识库内容回答用户问题。
            éµå¾ªä»¥ä¸‹è§„则:
            1. ä¸¥æ ¼åŸºäºŽçŸ¥è¯†åº“内容回答,不要编造信息
            2. å¦‚果知识库中没有相关信息,明确告知用户
            3. å›žç­”要准确、简洁、有条理
            4. å¦‚果内容较多,使用分点列表形式
            5. å¼•用来源时注明"根据知识库内容"
            """)
    Flux<String> chat(@MemoryId String memoryId, @UserMessage String userMessage);
}
src/main/java/com/ruoyi/ai/config/EmbeddingStoreConfig.java
@@ -5,32 +5,48 @@
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.pinecone.PineconeEmbeddingStore;
import dev.langchain4j.store.embedding.pinecone.PineconeServerlessIndexConfig;
import org.springframework.beans.factory.annotation.Autowired;
import io.pinecone.clients.Index;
import io.pinecone.clients.Pinecone;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
/**
 * @author :yys
 * @date : 2025/5/2 21:07
 * å‘量存储配置
 */
@Configuration
public class EmbeddingStoreConfig {
    @Autowired
    private EmbeddingModel embeddingModel;
    @Value("${pinecone.api-key:pcsk_4SJLnh_tNB3wSLJU8tc4E5P28PcXX8eCLdURqZpVhg1FMV8CRYxjneWdzqRdB5Ftqooi9}")
    private String pineconeApiKey;
    @Value("${pinecone.index:xiaozhi-index}")
    private String indexName;
    @Value("${pinecone.namespace:knowledge-base}")
    private String namespace;
    @Bean
    public EmbeddingStore<TextSegment> embeddingStore() {
        //创建向量存储
    public Pinecone pinecone() {
        return new Pinecone.Builder(pineconeApiKey).build();
    }
    @Bean
    public Index pineconeIndex(Pinecone pinecone) {
        return pinecone.getIndexConnection(indexName);
    }
    @Bean
    public EmbeddingStore<TextSegment> embeddingStore(EmbeddingModel embeddingModel) {
        return PineconeEmbeddingStore.builder()
                .apiKey("pcsk_4SJLnh_tNB3wSLJU8tc4E5P28PcXX8eCLdURqZpVhg1FMV8CRYxjneWdzqRdB5Ftqooi9")
                .index("xiaozhi-index")//如果指定的索引不存在,将创建一个新的索引
                .nameSpace("xiaozhi-namespace") //如果指定的名称空间不存在,将创建一个新的名称 ç©ºé—´
                .apiKey(pineconeApiKey)
                .index(indexName)
                .nameSpace(namespace)
                .createIndex(PineconeServerlessIndexConfig.builder()
                        .cloud("AWS") //指定索引部署在 AWS äº‘服务上。
                        .region("us-east-1") //指定索引所在的 AWS åŒºåŸŸä¸º us-east-1。
                        .dimension(embeddingModel.dimension()) //指定索引的向量维度,该维度与 embeddedModel ç”Ÿæˆçš„向量维度相同。
                        .cloud("AWS")
                        .region("us-east-1")
                        .dimension(embeddingModel.dimension())
                        .build())
                .build();
    }
}
}
src/main/java/com/ruoyi/ai/controller/KnowledgeChatController.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,85 @@
package com.ruoyi.ai.controller;
import com.ruoyi.ai.assistant.KnowledgeChatAgent;
import com.ruoyi.ai.dto.KnowledgeChatRequest;
import com.ruoyi.ai.service.KnowledgeRagService;
import com.ruoyi.approve.pojo.KnowledgeBase;
import com.ruoyi.approve.service.KnowledgeBaseService;
import com.ruoyi.common.utils.StringUtils;
import com.ruoyi.framework.web.domain.AjaxResult;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.web.bind.annotation.*;
import reactor.core.publisher.Flux;
import java.util.List;
/**
 * çŸ¥è¯†åº“问答Controller
 */
@Slf4j
@RestController
@RequestMapping("/ai/knowledge")
@RequiredArgsConstructor
@Tag(name = "知识库问答")
public class KnowledgeChatController {
    private final KnowledgeChatAgent knowledgeChatAgent;
    private final KnowledgeRagService knowledgeRagService;
    private final KnowledgeBaseService knowledgeBaseService;
    /**
     * çŸ¥è¯†åº“问答(流式返回)
     */
    @PostMapping(value = "/chat", produces = "text/stream;charset=utf-8")
    @Operation(summary = "知识库问答")
    public Flux<String> chat(@RequestBody KnowledgeChatRequest request) {
        if (request.getKnowledgeBaseId() == null) {
            return Flux.just("知识库ID不能为空");
        }
        if (!StringUtils.hasText(request.getMemoryId())) {
            return Flux.just("会话ID不能为空");
        }
        if (!StringUtils.hasText(request.getQuestion())) {
            return Flux.just("问题不能为空");
        }
        KnowledgeBase knowledgeBase = knowledgeBaseService.getById(request.getKnowledgeBaseId());
        if (knowledgeBase == null) {
            return Flux.just("知识库不存在");
        }
        String namespace = "kb-" + request.getKnowledgeBaseId();
        List<String> relevantContents = knowledgeRagService.searchRelevantContent(
                namespace, request.getQuestion(), 5);
        if (relevantContents.isEmpty()) {
            return Flux.just("知识库中未找到相关内容,请先上传相关文档。");
        }
        StringBuilder contextBuilder = new StringBuilder();
        contextBuilder.append("以下是从知识库中检索到的相关内容:\n\n");
        for (int i = 0; i < relevantContents.size(); i++) {
            contextBuilder.append("【内容").append(i + 1).append("】\n");
            contextBuilder.append(relevantContents.get(i)).append("\n\n");
        }
        contextBuilder.append("---\n");
        contextBuilder.append("请基于以上知识库内容回答用户问题:\n");
        contextBuilder.append(request.getQuestion());
        return knowledgeChatAgent.chat(request.getMemoryId(), contextBuilder.toString());
    }
    /**
     * çŸ¥è¯†åº“列表(用于选择知识库)
     */
    @GetMapping("/list")
    @Operation(summary = "知识库列表")
    public AjaxResult listKnowledgeBases() {
        List<KnowledgeBase> list = knowledgeBaseService.list();
        return AjaxResult.success(list);
    }
}
src/main/java/com/ruoyi/ai/dto/KnowledgeChatRequest.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,21 @@
package com.ruoyi.ai.dto;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
/**
 * çŸ¥è¯†åº“问答请求
 */
@Data
@Schema(description = "知识库问答请求")
public class KnowledgeChatRequest {
    @Schema(description = "知识库ID", required = true)
    private Long knowledgeBaseId;
    @Schema(description = "会话ID,用于保持上下文", required = true)
    private String memoryId;
    @Schema(description = "用户提问内容", required = true)
    private String question;
}
src/main/java/com/ruoyi/ai/service/KnowledgeRagService.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,34 @@
package com.ruoyi.ai.service;
import java.util.List;
/**
 * çŸ¥è¯†åº“RAG服务
 * è´Ÿè´£æ–‡ä»¶å‘量化处理和检索
 */
public interface KnowledgeRagService {
    /**
     * å¼‚步处理向量化
     */
    void processVectorAsync(Long vectorId);
    /**
     * åŒæ­¥å¤„理向量化
     */
    void processVector(Long vectorId);
    /**
     * æ£€ç´¢ç›¸å…³å†…容
     * @param namespace å‘½åç©ºé—´
     * @param query æŸ¥è¯¢æ–‡æœ¬
     * @param maxResults æœ€å¤§ç»“果数
     * @return ç›¸å…³å†…容列表
     */
    List<String> searchRelevantContent(String namespace, String query, int maxResults);
    /**
     * åˆ é™¤æŒ‡å®šæ–‡ä»¶çš„向量数据
     */
    void deleteEmbeddings(String namespace, Long storageBlobId);
}
src/main/java/com/ruoyi/ai/service/impl/KnowledgeRagServiceImpl.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,343 @@
package com.ruoyi.ai.service.impl;
import com.ruoyi.ai.service.KnowledgeRagService;
import com.ruoyi.approve.pojo.KnowledgeBaseVector;
import com.ruoyi.approve.service.KnowledgeBaseVectorService;
import com.ruoyi.basic.pojo.StorageBlob;
import com.ruoyi.basic.service.StorageBlobService;
import com.ruoyi.common.config.FileProperties;
import com.google.protobuf.Struct;
import com.google.protobuf.Value;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
import dev.langchain4j.store.embedding.EmbeddingSearchResult;
import dev.langchain4j.store.embedding.EmbeddingStore;
import io.pinecone.clients.Index;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import java.io.File;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
 * çŸ¥è¯†åº“RAG服务实现
 */
@Slf4j
@Service
public class KnowledgeRagServiceImpl implements KnowledgeRagService {
    private final KnowledgeBaseVectorService knowledgeBaseVectorService;
    private final StorageBlobService storageBlobService;
    private final EmbeddingModel embeddingModel;
    private final EmbeddingStore<TextSegment> embeddingStore;
    private final FileProperties fileProperties;
    private final Index pineconeIndex;
    @Value("${pinecone.namespace:knowledge-base}")
    private String namespace;
    public KnowledgeRagServiceImpl(
            KnowledgeBaseVectorService knowledgeBaseVectorService,
            StorageBlobService storageBlobService,
            EmbeddingModel embeddingModel,
            EmbeddingStore<TextSegment> embeddingStore,
            FileProperties fileProperties,
            Index pineconeIndex) {
        this.knowledgeBaseVectorService = knowledgeBaseVectorService;
        this.storageBlobService = storageBlobService;
        this.embeddingModel = embeddingModel;
        this.embeddingStore = embeddingStore;
        this.fileProperties = fileProperties;
        this.pineconeIndex = pineconeIndex;
    }
    private static final int CHUNK_SIZE = 500;
    private static final int CHUNK_OVERLAP = 100;
    private static final long CHUNK_THRESHOLD_BYTES = 80L * 1024 * 1024;
    private static final int EMBEDDING_MAX_LENGTH = 8000;
    @Override
    @Async("threadPoolTaskExecutor")
    public void processVectorAsync(Long vectorId) {
        log.info("开始异步向量化处理: vectorId={}, thread={}", vectorId, Thread.currentThread().getName());
        processVector(vectorId);
    }
    @Override
    public void processVector(Long vectorId) {
        log.info("开始处理向量化: vectorId={}", vectorId);
        KnowledgeBaseVector vector = knowledgeBaseVectorService.getById(vectorId);
        if (vector == null) {
            log.error("向量记录不存在: {}", vectorId);
            return;
        }
        try {
            knowledgeBaseVectorService.updateVectorStatus(vectorId,
                    KnowledgeBaseVector.STATUS_PROCESSING, null, null);
            StorageBlob blob = storageBlobService.getById(vector.getStorageBlobId());
            if (blob == null) {
                throw new RuntimeException("文件不存在: " + vector.getStorageBlobId());
            }
            File file = getFile(blob);
            log.info("文件路径: {}, æ˜¯å¦å­˜åœ¨: {}", file.getAbsolutePath(), file.exists());
            long fileSize = file.length();
            String content = extractFileContent(file, vector.getFileName());
            log.info("文件内容长度: {}", content != null ? content.length() : 0);
            if (content == null || content.trim().isEmpty()) {
                throw new RuntimeException("文件内容为空");
            }
            List<TextSegment> chunks;
            boolean needChunk = fileSize > CHUNK_THRESHOLD_BYTES || content.length() > EMBEDDING_MAX_LENGTH;
            if (needChunk) {
                log.info("开始切片: fileSize={}, contentLength={}", fileSize, content.length());
                chunks = splitText(content, vector);
                log.info("切片完成,共 {} ä¸ªå—", chunks.size());
            } else {
                log.info("文件较小,不进行切片");
                Map<String, Object> metadata = buildMetadata(vector);
                chunks = List.of(TextSegment.from(content, new dev.langchain4j.data.document.Metadata(metadata)));
            }
            int chunkCount = 0;
            for (TextSegment chunk : chunks) {
                Embedding embedding = embeddingModel.embed(chunk).content();
                embeddingStore.add(embedding, chunk);
                chunkCount++;
            }
            knowledgeBaseVectorService.updateVectorStatus(vectorId,
                    KnowledgeBaseVector.STATUS_COMPLETED, chunkCount, null);
            log.info("向量化处理完成: vectorId={}, chunkCount={}", vectorId, chunkCount);
        } catch (Exception e) {
            log.error("向量化处理失败: vectorId={}", vectorId, e);
            knowledgeBaseVectorService.updateVectorStatus(vectorId,
                    KnowledgeBaseVector.STATUS_FAILED, null, e.getMessage());
        }
    }
    @Override
    public List<String> searchRelevantContent(String namespace, String query, int maxResults) {
        try {
            Embedding queryEmbedding = embeddingModel.embed(query).content();
            EmbeddingSearchRequest searchRequest = EmbeddingSearchRequest.builder()
                    .queryEmbedding(queryEmbedding)
                    .maxResults(maxResults)
                    .minScore(0.7)
                    .build();
            EmbeddingSearchResult<TextSegment> searchResult = embeddingStore.search(searchRequest);
            return searchResult.matches().stream()
                    .map(match -> match.embedded().text())
                    .collect(Collectors.toList());
        } catch (Exception e) {
            log.error("向量检索失败: namespace={}", namespace, e);
            return new ArrayList<>();
        }
    }
    @Override
    public void deleteEmbeddings(String namespace, Long storageBlobId) {
        log.info("删除向量数据: namespace={}, storageBlobId={}", namespace, storageBlobId);
        try {
            Struct filter = Struct.newBuilder()
                    .putFields("storageBlobId", Value.newBuilder()
                            .setStructValue(Struct.newBuilder()
                                    .putFields("$eq", Value.newBuilder()
                                            .setNumberValue(storageBlobId.doubleValue())
                                            .build()))
                            .build())
                    .build();
            List<String> emptyIds = new ArrayList<>();
            pineconeIndex.delete(emptyIds, false, this.namespace, filter);
            log.info("向量删除完成: storageBlobId={}", storageBlobId);
        } catch (Exception e) {
            log.error("删除向量数据失败: namespace={}, storageBlobId={}", namespace, storageBlobId, e);
        }
    }
    private File getFile(StorageBlob blob) {
        String path = blob.getPath();
        if (path != null && !path.isEmpty()) {
            return new File(new File(fileProperties.getPath(), path), blob.getUidFilename());
        }
        return new File(fileProperties.getPath(), blob.getUidFilename());
    }
    private String extractFileContent(File file, String fileName) throws Exception {
        String ext = getFileExtension(fileName);
        if (isPlainText(ext)) {
            return readFileWithEncoding(file);
        }
        if ("docx".equals(ext)) {
            return extractDocx(file);
        }
        if ("xlsx".equals(ext)) {
            return extractXlsx(file);
        }
        if ("xls".equals(ext)) {
            return extractXls(file);
        }
        return readFileWithEncoding(file);
    }
    private String readFileWithEncoding(File file) throws Exception {
        byte[] bytes = Files.readAllBytes(file.toPath());
        String utf8Content = new String(bytes, StandardCharsets.UTF_8);
        if (isValidUtf8(utf8Content)) {
            log.debug("文件编码: UTF-8");
            return utf8Content;
        }
        try {
            Charset gbk = Charset.forName("GBK");
            String gbkContent = new String(bytes, gbk);
            log.debug("文件编码: GBK");
            return gbkContent;
        } catch (Exception e) {
            log.warn("编码检测失败,使用 UTF-8");
            return utf8Content;
        }
    }
    private boolean isValidUtf8(String decoded) {
        // æ£€æŸ¥æ›¿æ¢å­—符 U+FFFD (UTF-8 è§£ç å¤±è´¥æ—¶å‡ºçް)
        if (decoded.contains("�")) {
            return false;
        }
        int invalidCount = 0;
        int checkLen = Math.min(decoded.length(), 1000);
        for (int i = 0; i < checkLen; i++) {
            char c = decoded.charAt(i);
            // æ£€æŸ¥ç§æœ‰ä½¿ç”¨åŒºåŸŸ (U+E000-U+F8FF) æˆ–异常控制字符
            if ((c >= '' && c <= '') || (c < ' ' && c != '\n' && c != '\r' && c != '\t')) {
                invalidCount++;
            }
        }
        return invalidCount < checkLen * 0.05;
    }
    private String getFileExtension(String fileName) {
        if (fileName == null || !fileName.contains(".")) {
            return "";
        }
        return fileName.substring(fileName.lastIndexOf('.') + 1).toLowerCase();
    }
    private boolean isPlainText(String ext) {
        return "txt".equals(ext) || "md".equals(ext) || "json".equals(ext)
                || "csv".equals(ext) || "xml".equals(ext) || "yaml".equals(ext)
                || "yml".equals(ext);
    }
    private String extractDocx(File file) throws Exception {
        try (var doc = new org.apache.poi.xwpf.usermodel.XWPFDocument(new java.io.FileInputStream(file));
             var extractor = new org.apache.poi.xwpf.extractor.XWPFWordExtractor(doc)) {
            return extractor.getText();
        }
    }
    private String extractXlsx(File file) throws Exception {
        try (var workbook = new org.apache.poi.xssf.usermodel.XSSFWorkbook(file)) {
            return extractWorkbook(workbook);
        }
    }
    private String extractXls(File file) throws Exception {
        try (var workbook = new org.apache.poi.hssf.usermodel.HSSFWorkbook(new java.io.FileInputStream(file))) {
            return extractWorkbook(workbook);
        }
    }
    private String extractWorkbook(org.apache.poi.ss.usermodel.Workbook workbook) {
        StringBuilder text = new StringBuilder();
        var formatter = new org.apache.poi.ss.usermodel.DataFormatter();
        for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
            var sheet = workbook.getSheetAt(i);
            text.append("Sheet: ").append(sheet.getSheetName()).append("\n");
            for (var row : sheet) {
                for (var cell : row) {
                    text.append(formatter.formatCellValue(cell)).append("\t");
                }
                text.append("\n");
            }
        }
        return text.toString();
    }
    private List<TextSegment> splitText(String content, KnowledgeBaseVector vector) {
        List<TextSegment> chunks = new ArrayList<>();
        if (content.length() <= CHUNK_SIZE) {
            Map<String, Object> metadata = buildMetadata(vector);
            chunks.add(TextSegment.from(content, new dev.langchain4j.data.document.Metadata(metadata)));
            return chunks;
        }
        int start = 0;
        int chunkIndex = 0;
        while (start < content.length()) {
            int end = Math.min(start + CHUNK_SIZE, content.length());
            if (end < content.length()) {
                int lastPeriod = content.lastIndexOf('。', end);
                int lastNewline = content.lastIndexOf('\n', end);
                int boundary = Math.max(lastPeriod, lastNewline);
                if (boundary > start + CHUNK_SIZE / 2) {
                    end = boundary + 1;
                }
            }
            String chunkText = content.substring(start, end).trim();
            if (!chunkText.isEmpty()) {
                Map<String, Object> metadata = buildMetadata(vector);
                metadata.put("chunkIndex", chunkIndex);
                chunks.add(TextSegment.from(chunkText, new dev.langchain4j.data.document.Metadata(metadata)));
                chunkIndex++;
            }
            start = end - CHUNK_OVERLAP;
            if (start < 0) start = 0;
            if (start >= content.length() - CHUNK_OVERLAP) break;
        }
        return chunks;
    }
    private Map<String, Object> buildMetadata(KnowledgeBaseVector vector) {
        Map<String, Object> metadata = new HashMap<>();
        metadata.put("knowledgeBaseId", vector.getKnowledgeBaseId());
        metadata.put("storageBlobId", vector.getStorageBlobId());
        metadata.put("fileName", vector.getFileName());
        metadata.put("namespace", vector.getNamespace());
        return metadata;
    }
}
src/main/java/com/ruoyi/approve/controller/KnowledgeBaseController.java
@@ -2,16 +2,25 @@
import com.baomidou.mybatisplus.core.toolkit.CollectionUtils;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.ruoyi.approve.dto.KnowledgeBaseVectorVO;
import com.ruoyi.approve.pojo.KnowledgeBase;
import com.ruoyi.approve.pojo.KnowledgeBaseVector;
import com.ruoyi.approve.service.KnowledgeBaseService;
import com.ruoyi.approve.service.KnowledgeBaseVectorService;
import com.ruoyi.basic.dto.StorageAttachmentDTO;
import com.ruoyi.basic.dto.StorageBlobDTO;
import com.ruoyi.basic.pojo.StorageBlob;
import com.ruoyi.basic.service.StorageAttachmentService;
import com.ruoyi.basic.service.StorageBlobService;
import com.ruoyi.common.utils.poi.ExcelUtil;
import com.ruoyi.framework.web.domain.AjaxResult;
import io.swagger.v3.oas.annotations.tags.Tag;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import jakarta.servlet.http.HttpServletResponse;
import lombok.AllArgsConstructor;
import org.springframework.web.bind.annotation.*;
import java.util.ArrayList;
import java.util.List;
@RestController
@@ -20,40 +29,42 @@
@Tag(name = "知识库管理")
public class KnowledgeBaseController {
    private KnowledgeBaseService knowledgeBaseService;
    private KnowledgeBaseVectorService knowledgeBaseVectorService;
    private StorageAttachmentService storageAttachmentService;
    private StorageBlobService storageBlobService;
    /**、
    /**
     * èŽ·å–åˆ—è¡¨
     * @return
     */
    @GetMapping("/getList")
    public AjaxResult getList(@RequestParam(defaultValue = "1") long current,
                              @RequestParam(defaultValue = "10") long size, KnowledgeBase knowledgeBase) {
        Page page = new Page(current, size);
        return AjaxResult.success(knowledgeBaseService.listpage(page,knowledgeBase));
        return AjaxResult.success(knowledgeBaseService.listpage(page, knowledgeBase));
    }
    /**、
     * å¢žæ·»
     * @return
    /**
     * æ–°å¢žçŸ¥è¯†åº“
     */
    @PostMapping("/add")
    public AjaxResult add(@RequestBody KnowledgeBase knowledgeBase){
    public AjaxResult add(@RequestBody KnowledgeBase knowledgeBase) {
        return AjaxResult.success(knowledgeBaseService.save(knowledgeBase));
    }
    /**
     * æ›´æ–°
     * @return
     * æ›´æ–°çŸ¥è¯†åº“
     */
    @PostMapping("/update")
    public AjaxResult update(@RequestBody KnowledgeBase knowledgeBase){
    public AjaxResult update(@RequestBody KnowledgeBase knowledgeBase) {
        return AjaxResult.success(knowledgeBaseService.updateById(knowledgeBase));
    }
    /**
     * åˆ é™¤
     * @return
     * åˆ é™¤çŸ¥è¯†åº“
     */
    @DeleteMapping("/delete")
    public AjaxResult delete(@RequestBody List<Long> ids){
        if(CollectionUtils.isEmpty(ids)) return AjaxResult.error("请传入要删除的ID");
    public AjaxResult delete(@RequestBody List<Long> ids) {
        if (CollectionUtils.isEmpty(ids)) return AjaxResult.error("请传入要删除的ID");
        return AjaxResult.success(knowledgeBaseService.removeByIds(ids));
    }
@@ -65,4 +76,101 @@
        util.exportExcel(response, accountExpenses, "知识库管理导出");
    }
}
    /**
     * æŸ¥è¯¢çŸ¥è¯†åº“文件向量化状态
     */
    @GetMapping("/vector/status/{knowledgeBaseId}")
    @Operation(summary = "查询知识库文件向量化状态")
    public AjaxResult getVectorStatus(@PathVariable Long knowledgeBaseId) {
        List<KnowledgeBaseVectorVO> list = knowledgeBaseVectorService.getVectorStatusByKnowledgeBaseId(knowledgeBaseId);
        return AjaxResult.success(list);
    }
    /**
     * é‡æ–°å‘量化文件
     */
    @PostMapping("/vector/reprocess/{vectorId}")
    @Operation(summary = "重新向量化文件")
    public AjaxResult reprocessVector(@PathVariable Long vectorId) {
        knowledgeBaseVectorService.reprocessVector(vectorId);
        return AjaxResult.success("已重新提交向量化任务");
    }
    /**
     * ä¿å­˜çŸ¥è¯†åº“文件关联(文件上传后调用)
     * ä¸Šä¼ æµç¨‹ï¼š
     * 1. å…ˆè°ƒç”¨ /common/upload ä¸Šä¼ æ–‡ä»¶ï¼ŒèŽ·å– storageBlobDTOs
     * 2. å†è°ƒç”¨æ­¤æŽ¥å£å…³è”文件到知识库并触发向量化
     */
    @PostMapping("/file/save")
    @Operation(summary = "保存知识库文件关联")
    public AjaxResult saveKnowledgeBaseFiles(@RequestBody KnowledgeBaseFileDTO dto) {
        if (dto.getKnowledgeBaseId() == null) {
            return AjaxResult.error("知识库ID不能为空");
        }
        if (CollectionUtils.isEmpty(dto.getStorageBlobIds())) {
            return AjaxResult.error("文件ID不能为空");
        }
        // ä¿å­˜é™„件关联
        StorageAttachmentDTO attachmentDTO = new StorageAttachmentDTO();
        attachmentDTO.setRecordType("knowledge_base");
        attachmentDTO.setRecordId(dto.getKnowledgeBaseId());
        attachmentDTO.setApplication("rag_file");
        List<StorageBlobDTO> blobDTOs = new ArrayList<>();
        for (Long blobId : dto.getStorageBlobIds()) {
            StorageBlobDTO blobDTO = new StorageBlobDTO();
            blobDTO.setId(blobId);
            blobDTOs.add(blobDTO);
        }
        attachmentDTO.setStorageBlobDTOs(blobDTOs);
        storageAttachmentService.saveStorageAttachment(attachmentDTO);
        // åˆ›å»ºå‘量记录并触发向量化
        for (Long blobId : dto.getStorageBlobIds()) {
            StorageBlob blob = storageBlobService.getById(blobId);
            if (blob != null) {
                String fileName = blob.getOriginalFilename();
                String fileType = getFileExtension(fileName);
                knowledgeBaseVectorService.createVectorRecord(
                        dto.getKnowledgeBaseId(),
                        blobId,
                        fileName,
                        fileType
                );
            }
        }
        return AjaxResult.success();
    }
    private String getFileExtension(String fileName) {
        if (fileName == null || !fileName.contains(".")) {
            return "unknown";
        }
        return fileName.substring(fileName.lastIndexOf('.') + 1).toLowerCase();
    }
    /**
     * åˆ é™¤çŸ¥è¯†åº“文件
     */
    @DeleteMapping("/file/delete")
    @Operation(summary = "删除知识库文件")
    public AjaxResult deleteKnowledgeBaseFiles(@RequestBody List<Long> vectorIds) {
        if (CollectionUtils.isEmpty(vectorIds)) {
            return AjaxResult.error("请选择要删除的文件");
        }
        knowledgeBaseVectorService.deleteVectors(vectorIds);
        return AjaxResult.success();
    }
    /**
     * çŸ¥è¯†åº“文件DTO
     */
    @lombok.Data
    public static class KnowledgeBaseFileDTO {
        private Long knowledgeBaseId;
        private List<Long> storageBlobIds;
    }
}
src/main/java/com/ruoyi/approve/dto/KnowledgeBaseVectorVO.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,21 @@
package com.ruoyi.approve.dto;
import com.ruoyi.approve.pojo.KnowledgeBaseVector;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import lombok.EqualsAndHashCode;
/**
 * çŸ¥è¯†åº“文件向量状态VO
 */
@Data
@EqualsAndHashCode(callSuper = true)
@Schema(description = "知识库文件向量状态VO")
public class KnowledgeBaseVectorVO extends KnowledgeBaseVector {
    @Schema(description = "文件预览URL")
    private String previewUrl;
    @Schema(description = "文件下载URL")
    private String downloadUrl;
}
src/main/java/com/ruoyi/approve/mapper/KnowledgeBaseVectorMapper.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,39 @@
package com.ruoyi.approve.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.ruoyi.approve.dto.KnowledgeBaseVectorVO;
import com.ruoyi.approve.pojo.KnowledgeBaseVector;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import java.util.List;
/**
 * çŸ¥è¯†åº“文件向量记录 Mapper
 */
@Mapper
public interface KnowledgeBaseVectorMapper extends BaseMapper<KnowledgeBaseVector> {
    /**
     * æŸ¥è¯¢çŸ¥è¯†åº“的文件向量状态列表
     */
    @Select("SELECT v.*, b.path as previewUrl " +
            "FROM knowledge_base_vector v " +
            "LEFT JOIN storage_blob b ON v.storage_blob_id = b.id " +
            "WHERE v.knowledge_base_id = #{knowledgeBaseId} " +
            "ORDER BY v.create_time DESC")
    List<KnowledgeBaseVectorVO> selectByKnowledgeBaseId(@Param("knowledgeBaseId") Long knowledgeBaseId);
    /**
     * ç»Ÿè®¡çŸ¥è¯†åº“的文件数量
     */
    @Select("SELECT COUNT(*) FROM knowledge_base_vector WHERE knowledge_base_id = #{knowledgeBaseId}")
    int countByKnowledgeBaseId(@Param("knowledgeBaseId") Long knowledgeBaseId);
    /**
     * ç»Ÿè®¡çŸ¥è¯†åº“的总切片数量
     */
    @Select("SELECT COALESCE(SUM(chunk_count), 0) FROM knowledge_base_vector WHERE knowledge_base_id = #{knowledgeBaseId} AND vector_status = 2")
    int sumChunkCountByKnowledgeBaseId(@Param("knowledgeBaseId") Long knowledgeBaseId);
}
src/main/java/com/ruoyi/approve/pojo/KnowledgeBase.java
@@ -91,4 +91,19 @@
    @TableField(fill = FieldFill.INSERT)
    private Long deptId;
    /**
     * æ–‡ä»¶æ•°é‡
     */
    private Integer fileCount;
    /**
     * æ€»åˆ‡ç‰‡æ•°é‡
     */
    private Integer totalChunkCount;
    /**
     * çŸ¥è¯†åº“描述
     */
    private String description;
}
src/main/java/com/ruoyi/approve/pojo/KnowledgeBaseVector.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,80 @@
package com.ruoyi.approve.pojo;
import com.baomidou.mybatisplus.annotation.*;
import com.fasterxml.jackson.annotation.JsonFormat;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import java.io.Serializable;
import java.time.LocalDateTime;
/**
 * çŸ¥è¯†åº“文件向量记录表
 * knowledge_base_vector
 */
@Data
@TableName("knowledge_base_vector")
@Schema(description = "知识库文件向量记录")
public class KnowledgeBaseVector implements Serializable {
    private static final long serialVersionUID = 1L;
    @TableId(type = IdType.AUTO)
    @Schema(description = "主键ID")
    private Long id;
    @Schema(description = "关联知识库ID")
    private Long knowledgeBaseId;
    @Schema(description = "关联文件blob ID")
    private Long storageBlobId;
    @Schema(description = "文件名称")
    private String fileName;
    @Schema(description = "文件类型(docx/pdf/xlsx/txt等)")
    private String fileType;
    @Schema(description = "向量化状态: 0-待处理, 1-处理中, 2-已完成, 3-失败")
    private Integer vectorStatus;
    @Schema(description = "向量化失败原因")
    private String vectorError;
    @Schema(description = "切片数量")
    private Integer chunkCount;
    @Schema(description = "向量命名空间")
    private String namespace;
    @TableField(fill = FieldFill.INSERT)
    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
    @Schema(description = "创建时间")
    private LocalDateTime createTime;
    @TableField(fill = FieldFill.INSERT)
    @Schema(description = "创建人")
    private Integer createUser;
    @TableField(fill = FieldFill.INSERT_UPDATE)
    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
    @Schema(description = "更新时间")
    private LocalDateTime updateTime;
    @TableField(fill = FieldFill.INSERT_UPDATE)
    @Schema(description = "更新人")
    private Integer updateUser;
    @TableField(fill = FieldFill.INSERT)
    @Schema(description = "租户ID")
    private Long tenantId;
    @TableField(fill = FieldFill.INSERT)
    @Schema(description = "部门ID")
    private Long deptId;
    // å‘量化状态常量
    public static final int STATUS_PENDING = 0;
    public static final int STATUS_PROCESSING = 1;
    public static final int STATUS_COMPLETED = 2;
    public static final int STATUS_FAILED = 3;
}
src/main/java/com/ruoyi/approve/service/KnowledgeBaseVectorService.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,43 @@
package com.ruoyi.approve.service;
import com.baomidou.mybatisplus.extension.service.IService;
import com.ruoyi.approve.dto.KnowledgeBaseVectorVO;
import com.ruoyi.approve.pojo.KnowledgeBaseVector;
import java.util.List;
/**
 * çŸ¥è¯†åº“文件向量记录 Service
 */
public interface KnowledgeBaseVectorService extends IService<KnowledgeBaseVector> {
    /**
     * æŸ¥è¯¢çŸ¥è¯†åº“的文件向量状态列表
     */
    List<KnowledgeBaseVectorVO> getVectorStatusByKnowledgeBaseId(Long knowledgeBaseId);
    /**
     * åˆ›å»ºå‘量记录并触发异步向量化
     */
    KnowledgeBaseVector createVectorRecord(Long knowledgeBaseId, Long storageBlobId, String fileName, String fileType);
    /**
     * æ›´æ–°å‘量状态
     */
    void updateVectorStatus(Long id, Integer status, Integer chunkCount, String error);
    /**
     * é‡æ–°å¤„理向量化
     */
    void reprocessVector(Long id);
    /**
     * åˆ é™¤å‘量记录及相关向量数据
     */
    void deleteVector(Long id);
    /**
     * æ‰¹é‡åˆ é™¤å‘量记录
     */
    void deleteVectors(List<Long> ids);
}
src/main/java/com/ruoyi/approve/service/impl/KnowledgeBaseVectorServiceImpl.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,144 @@
package com.ruoyi.approve.service.impl;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.ruoyi.approve.dto.KnowledgeBaseVectorVO;
import com.ruoyi.approve.mapper.KnowledgeBaseVectorMapper;
import com.ruoyi.approve.pojo.KnowledgeBase;
import com.ruoyi.approve.pojo.KnowledgeBaseVector;
import com.ruoyi.approve.service.KnowledgeBaseService;
import com.ruoyi.approve.service.KnowledgeBaseVectorService;
import com.ruoyi.ai.service.KnowledgeRagService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Lazy;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.util.List;
/**
 * çŸ¥è¯†åº“文件向量记录 Service实现
 */
@Slf4j
@Service
public class KnowledgeBaseVectorServiceImpl extends ServiceImpl<KnowledgeBaseVectorMapper, KnowledgeBaseVector>
        implements KnowledgeBaseVectorService {
    private final KnowledgeBaseService knowledgeBaseService;
    private final KnowledgeRagService knowledgeRagService;
    public KnowledgeBaseVectorServiceImpl(
            KnowledgeBaseService knowledgeBaseService,
            @Lazy KnowledgeRagService knowledgeRagService) {
        this.knowledgeBaseService = knowledgeBaseService;
        this.knowledgeRagService = knowledgeRagService;
    }
    @Override
    public List<KnowledgeBaseVectorVO> getVectorStatusByKnowledgeBaseId(Long knowledgeBaseId) {
        return baseMapper.selectByKnowledgeBaseId(knowledgeBaseId);
    }
    @Override
    public KnowledgeBaseVector createVectorRecord(Long knowledgeBaseId, Long storageBlobId,
                                                   String fileName, String fileType) {
        KnowledgeBase knowledgeBase = knowledgeBaseService.getById(knowledgeBaseId);
        if (knowledgeBase == null) {
            throw new RuntimeException("知识库不存在: " + knowledgeBaseId);
        }
        KnowledgeBaseVector vector = new KnowledgeBaseVector();
        vector.setKnowledgeBaseId(knowledgeBaseId);
        vector.setStorageBlobId(storageBlobId);
        vector.setFileName(fileName);
        vector.setFileType(fileType);
        vector.setVectorStatus(KnowledgeBaseVector.STATUS_PENDING);
        vector.setNamespace("kb-" + knowledgeBaseId);
        vector.setChunkCount(0);
        save(vector);
        // å¼‚步触发向量化处理
        knowledgeRagService.processVectorAsync(vector.getId());
        return vector;
    }
    @Override
    public void updateVectorStatus(Long id, Integer status, Integer chunkCount, String error) {
        KnowledgeBaseVector vector = getById(id);
        if (vector == null) {
            return;
        }
        vector.setVectorStatus(status);
        if (chunkCount != null) {
            vector.setChunkCount(chunkCount);
        }
        if (error != null) {
            vector.setVectorError(error);
        }
        updateById(vector);
        // å¦‚果完成,更新知识库统计
        if (status == KnowledgeBaseVector.STATUS_COMPLETED) {
            updateKnowledgeBaseStats(vector.getKnowledgeBaseId());
        }
    }
    @Override
    @Transactional(rollbackFor = Exception.class)
    public void reprocessVector(Long id) {
        KnowledgeBaseVector vector = getById(id);
        if (vector == null) {
            throw new RuntimeException("向量记录不存在: " + id);
        }
        vector.setVectorStatus(KnowledgeBaseVector.STATUS_PENDING);
        vector.setVectorError(null);
        vector.setChunkCount(0);
        updateById(vector);
        // å¼‚步重新处理
        knowledgeRagService.processVectorAsync(id);
    }
    @Override
    @Transactional(rollbackFor = Exception.class)
    public void deleteVector(Long id) {
        KnowledgeBaseVector vector = getById(id);
        if (vector == null) {
            return;
        }
        // åˆ é™¤å‘量库中的数据
        try {
            knowledgeRagService.deleteEmbeddings(vector.getNamespace(), vector.getStorageBlobId());
        } catch (Exception e) {
            log.error("删除向量库数据失败", e);
        }
        // åˆ é™¤è®°å½•
        removeById(id);
        // æ›´æ–°çŸ¥è¯†åº“统计
        updateKnowledgeBaseStats(vector.getKnowledgeBaseId());
    }
    @Override
    @Transactional(rollbackFor = Exception.class)
    public void deleteVectors(List<Long> ids) {
        for (Long id : ids) {
            deleteVector(id);
        }
    }
    private void updateKnowledgeBaseStats(Long knowledgeBaseId) {
        KnowledgeBase knowledgeBase = knowledgeBaseService.getById(knowledgeBaseId);
        if (knowledgeBase == null) {
            return;
        }
        int fileCount = baseMapper.countByKnowledgeBaseId(knowledgeBaseId);
        int totalChunkCount = baseMapper.sumChunkCountByKnowledgeBaseId(knowledgeBaseId);
        knowledgeBase.setFileCount(fileCount);
        knowledgeBase.setTotalChunkCount(totalChunkCount);
        knowledgeBaseService.updateById(knowledgeBase);
    }
}
src/main/resources/application.yml
@@ -7,6 +7,13 @@
    allow-circular-references: true
  profiles:
    active: dev
# Pinecone å‘量数据库配置
pinecone:
  api-key: pcsk_4SJLnh_tNB3wSLJU8tc4E5P28PcXX8eCLdURqZpVhg1FMV8CRYxjneWdzqRdB5Ftqooi9
  index: xiaozhi-index
  namespace: knowledge-base
langchain4j:
  mcp:
    # MCP æœåŠ¡ç«¯åœ°å€ï¼ˆæ ¹æ®å®žé™…éƒ¨ç½²çš„ MCP æœåŠ¡è°ƒæ•´ï¼‰