18586361686
2025-05-13 db5a038b1c407b377f1c25ebff1db806ddfd1868
aiflowy-modules/aiflowy-module-ai/src/main/java/tech/aiflowy/ai/controller/AiDocumentController.java
@@ -3,6 +3,7 @@
import com.agentsflex.core.document.DocumentSplitter;
import com.agentsflex.core.document.splitter.RegexDocumentSplitter;
import com.agentsflex.core.document.splitter.SimpleTokenizeSplitter;
import com.agentsflex.core.llm.embedding.EmbeddingOptions;
import org.springframework.core.io.ClassPathResource;
import tech.aiflowy.ai.entity.AiDocument;
import tech.aiflowy.ai.entity.AiDocumentChunk;
@@ -11,6 +12,7 @@
import tech.aiflowy.ai.service.*;
import tech.aiflowy.ai.service.impl.AiDocumentServiceImpl;
import tech.aiflowy.common.ai.DocumentParserFactory;
import tech.aiflowy.common.ai.ExcelDocumentSplitter;
import tech.aiflowy.common.domain.Result;
import tech.aiflowy.common.tree.Tree;
import tech.aiflowy.common.util.RequestUtil;
@@ -60,7 +62,6 @@
    private final AiKnowledgeService knowledgeService;
    private final AiDocumentChunkService documentChunkService;
    private final AiDocumentHistoryService documentHistoryService;
    private final AiLlmService aiLlmService;
    @Autowired
@@ -74,12 +75,10 @@
    public AiDocumentController(AiDocumentService service,
                                AiKnowledgeService knowledgeService,
                                AiDocumentChunkService documentChunkService,
                                AiDocumentHistoryService documentHistoryService, AiLlmService aiLlmService) {
                                AiDocumentChunkService documentChunkService, AiLlmService aiLlmService) {
        super(service);
        this.knowledgeService = knowledgeService;
        this.documentChunkService = documentChunkService;
        this.documentHistoryService = documentHistoryService;
        this.aiLlmService = aiLlmService;
    }
    @PostMapping("removeDoc")
@@ -122,18 +121,17 @@
    @GetMapping("list")
    @Override
    public Result list(AiDocument entity, Boolean asTree, String sortKey, String sortType) {
        String kbSlug = RequestUtil.getParamAsString("kb_slug");
        String kbSlug = RequestUtil.getParamAsString("id");
        if (StringUtil.noText(kbSlug)) {
            return Result.fail(1);
            return Result.fail(1, "知识库id不能为空");
        }
        AiKnowledge knowledge = StringUtil.isNumeric(kbSlug)
                ? knowledgeService.getById(kbSlug)
                : knowledgeService.getOne(QueryWrapper.create().eq(AiKnowledge::getSlug, kbSlug));
        if (knowledge == null) {
            return Result.fail(1);
            return Result.fail(2, "知识库不存在");
        }
        QueryWrapper queryWrapper = QueryWrapper.create()
@@ -146,9 +144,9 @@
    @GetMapping("documentList")
    public Result documentList(@RequestParam(name="fileName", required = false) String fileName, @RequestParam(name="pageSize") int pageSize, @RequestParam(name = "current") int current) {
        String kbSlug = RequestUtil.getParamAsString("kb_slug");
        String kbSlug = RequestUtil.getParamAsString("id");
        if (StringUtil.noText(kbSlug)) {
            return Result.fail(1);
            return Result.fail(1, "知识库id不能为空");
        }
        Page<AiDocument> documentList = aiDocumentService.getDocumentList(kbSlug, pageSize, current,fileName);
        return Result.success(documentList);
@@ -187,14 +185,16 @@
                         @RequestParam(name="regex", required = false) String regex,
                         @RequestParam(name="userWillSave") boolean userWillSave
    ) throws IOException {
        if (file.getOriginalFilename() == null){
            return Result.fail(1,"文件名不能为空");
        }
        String fileTypeByExtension = JudgeFileTypeUtil.getFileTypeByExtension(file.getOriginalFilename());
        if (StringUtils.isEmpty(fileTypeByExtension)){
            return Result.fail(1,"不支持的文档类型");
            return Result.fail(2,"不支持的文档类型");
        }
        DocumentParser documentParser = DocumentParserFactory.getDocumentParser(file.getOriginalFilename());
        if (documentParser == null) {
            return Result.fail(1, "can not support the file type: " + file.getOriginalFilename());
            return Result.fail(3, "can not support the file type: " + file.getOriginalFilename());
        }
        String path = storageService.save(file);
        AiDocument aiDocument = new AiDocument();
@@ -208,28 +208,18 @@
        //如果用户是预览分割效果
        if (!userWillSave){
            List<AiDocumentChunk> previewList = new ArrayList<>();
            // 调用解析器进行文本分割
            AiKnowledge knowledge = knowledgeService.getById(knowledgeId);
            DocumentStore documentStore = knowledge.toDocumentStore();
            // 设置向量模型
            AiLlm aiLlm = aiLlmService.getById(knowledge.getVectorEmbedLlmId());
            Llm embeddingModel = aiLlm.toLlm();
            documentStore.setEmbeddingModel(embeddingModel);
            StoreOptions options = StoreOptions.ofCollectionName(knowledge.getVectorStoreCollection());
            // 设置分割器 todo 未来可以通过参数来指定分割器,不同的文档使用不同的分割器效果更好
            documentStore.setDocumentSplitter(getDocumentSplitter(splitterName, chunkSize, overlapSize, regex));
            AtomicInteger sort  = new AtomicInteger(1);
            documentStore.setDocumentIdGenerator(item -> {
                AiDocumentChunk chunk = new AiDocumentChunk();
                chunk.setContent(item.getContent());
                chunk.setSorting(sort.get());
                sort.getAndIncrement();
                previewList.add(chunk);
                return chunk.getId();
            });
            DocumentSplitter documentSplitter = getDocumentSplitter(splitterName, chunkSize, overlapSize, regex, 2);
            Document document = Document.of(aiDocument.getContent());
            StoreResult result = documentStore.store(document, options);
            List<Document> documents = documentSplitter.split(document);
            int sort = 1;
            for (Document value : documents) {
                AiDocumentChunk chunk = new AiDocumentChunk();
                chunk.setContent(value.getContent());
                chunk.setSorting(sort);
                sort++;
                previewList.add(chunk);
            }
            // 删除本地文件
            AiDocumentServiceImpl.deleteFile(getRootPath() + path);
            Map res = new HashMap();
@@ -259,14 +249,15 @@
        }
        aiDocument.setTitle(StringUtil.removeFileExtension(file.getOriginalFilename()));
        return super.save(aiDocument);
        super.save(aiDocument);
        return storeDocument(aiDocument, splitterName, chunkSize, overlapSize, regex);
    }
    /**
     * 更新 entity 的位置
     * 更新 entity
     *
     * @param entity entity
     * @param entity
     * @return Result
     */
    private Result updatePosition(AiDocument entity) {
@@ -301,38 +292,39 @@
    }
    /**
     * entity 保存或更新后触发
     * 文档存储到向量数据库
     *
     * @param entity
     * @param isSave
     * @param entity 将要分割的文档
     * @param splitterName 分割器名称
     * @param chunkSize 分割器名称
     * @param overlapSize 分段大小
     * @param overlapSize 分段重叠大小
     * @param regex 正则表达式
     */
    @Override
    protected void onSaveOrUpdateAfter(AiDocument entity, boolean isSave) {
        AiDocument aiDocument = entity;
        // 重新获取全数据内容
    protected Result storeDocument(AiDocument entity, String splitterName, int chunkSize, int overlapSize, String regex) {
        entity = service.getById(entity.getId());
        AiKnowledge knowledge = knowledgeService.getById(entity.getKnowledgeId());
        if (knowledge == null) {
            return;
            return Result.fail(1, "知识库不存在");
        }
        // 存储到知识库
        DocumentStore documentStore = knowledge.toDocumentStore();
        if (documentStore == null) {
            return;
        if (documentStore == null){
            return Result.fail(2, "向量数据库类型未设置");
        }
        // 设置向量模型
        AiLlm aiLlm = aiLlmService.getById(knowledge.getVectorEmbedLlmId());
        if (aiLlm == null) {
            return;
            return Result.fail(3, "该知识库未配置大模型");
        }
        // 设置向量模型
        Llm embeddingModel = aiLlm.toLlm();
        documentStore.setEmbeddingModel(embeddingModel);
        StoreOptions options = StoreOptions.ofCollectionName(knowledge.getVectorStoreCollection());
        EmbeddingOptions embeddingOptions = new EmbeddingOptions();
        embeddingOptions.setModel(aiLlm.getLlmModel());
        options.setEmbeddingOptions(embeddingOptions);
        if (entity.getId() != null) {
            List<AiDocumentChunk> documentChunks = documentChunkService.list(QueryWrapper.create()
                    .eq(AiDocumentChunk::getDocumentId, entity.getId()));
@@ -351,13 +343,11 @@
        }
        // 设置分割器 todo 未来可以通过参数来指定分割器,不同的文档使用不同的分割器效果更好
        documentStore.setDocumentSplitter(new SimpleDocumentSplitter(aiDocument.getChunkSize(), aiDocument.getOverlapSize()));
        documentStore.setDocumentSplitter(getDocumentSplitter(splitterName, chunkSize, overlapSize, regex, 2));
        // 设置文档ID生成器
        AiDocument finalEntity = entity;
//        AtomicInteger sort = new AtomicInteger(1);
        //Integer sort = new Integer(1);
        AtomicInteger sort  = new AtomicInteger(1);
        // 设置文档ID生成器
        documentStore.setDocumentIdGenerator(document -> {
            AiDocumentChunk chunk = new AiDocumentChunk();
            chunk.setContent(document.getContent());
@@ -376,12 +366,16 @@
        Document document = Document.of(entity.getContent());
        StoreResult result = documentStore.store(document, options);
        if (!result.isSuccess()) {
            LoggerFactory.getLogger(AiDocumentController.class).error("DocumentStore.store failed: " + result);
        }
        AiKnowledge aiKnowledge = new AiKnowledge();
        aiKnowledge.setId(entity.getKnowledgeId());
        // CanUpdateEmbedLlm false: 不能修改知识库的大模型 true: 可以修改
        aiKnowledge.setCanUpdateEmbedding(false);
        knowledgeService.updateById(aiKnowledge);
        return Result.success();
    }
    public String getRootPath() {
@@ -396,7 +390,7 @@
        }
    }
    public DocumentSplitter getDocumentSplitter (String splitterName, int chunkSize, int overlapSize, String regex){
    public DocumentSplitter getDocumentSplitter (String splitterName, int chunkSize, int overlapSize, String regex, int excelRows){
        if (StringUtil.noText(splitterName)) {
            return null;
@@ -412,10 +406,12 @@
                } else {
                    return new SimpleTokenizeSplitter(chunkSize, overlapSize);
                }
            case "ExcelDocumentSplitter":
                return new ExcelDocumentSplitter(excelRows);
            default:
                return null;
        }
    }
}
}