| | |
| | | import com.agentsflex.core.document.DocumentSplitter; |
| | | import com.agentsflex.core.document.splitter.RegexDocumentSplitter; |
| | | import com.agentsflex.core.document.splitter.SimpleTokenizeSplitter; |
| | | import com.agentsflex.core.llm.embedding.EmbeddingOptions; |
| | | import org.springframework.core.io.ClassPathResource; |
| | | import tech.aiflowy.ai.entity.AiDocument; |
| | | import tech.aiflowy.ai.entity.AiDocumentChunk; |
| | |
| | | import tech.aiflowy.ai.service.*; |
| | | import tech.aiflowy.ai.service.impl.AiDocumentServiceImpl; |
| | | import tech.aiflowy.common.ai.DocumentParserFactory; |
| | | import tech.aiflowy.common.ai.ExcelDocumentSplitter; |
| | | import tech.aiflowy.common.domain.Result; |
| | | import tech.aiflowy.common.tree.Tree; |
| | | import tech.aiflowy.common.util.RequestUtil; |
| | |
| | | |
| | | private final AiKnowledgeService knowledgeService; |
| | | private final AiDocumentChunkService documentChunkService; |
| | | private final AiDocumentHistoryService documentHistoryService; |
| | | private final AiLlmService aiLlmService; |
| | | |
| | | @Autowired |
| | |
| | | |
| | | public AiDocumentController(AiDocumentService service, |
| | | AiKnowledgeService knowledgeService, |
| | | AiDocumentChunkService documentChunkService, |
| | | AiDocumentHistoryService documentHistoryService, AiLlmService aiLlmService) { |
| | | AiDocumentChunkService documentChunkService, AiLlmService aiLlmService) { |
| | | super(service); |
| | | this.knowledgeService = knowledgeService; |
| | | this.documentChunkService = documentChunkService; |
| | | this.documentHistoryService = documentHistoryService; |
| | | this.aiLlmService = aiLlmService; |
| | | } |
| | | @PostMapping("removeDoc") |
| | |
| | | @GetMapping("list") |
| | | @Override |
| | | public Result list(AiDocument entity, Boolean asTree, String sortKey, String sortType) { |
| | | String kbSlug = RequestUtil.getParamAsString("kb_slug"); |
| | | String kbSlug = RequestUtil.getParamAsString("id"); |
| | | if (StringUtil.noText(kbSlug)) { |
| | | return Result.fail(1); |
| | | return Result.fail(1, "知识库id不能为空"); |
| | | } |
| | | |
| | | AiKnowledge knowledge = StringUtil.isNumeric(kbSlug) |
| | | ? knowledgeService.getById(kbSlug) |
| | | : knowledgeService.getOne(QueryWrapper.create().eq(AiKnowledge::getSlug, kbSlug)); |
| | | |
| | | |
| | | if (knowledge == null) { |
| | | return Result.fail(1); |
| | | return Result.fail(2, "知识库不存在"); |
| | | } |
| | | |
| | | QueryWrapper queryWrapper = QueryWrapper.create() |
| | |
| | | |
| | | @GetMapping("documentList") |
| | | public Result documentList(@RequestParam(name="fileName", required = false) String fileName, @RequestParam(name="pageSize") int pageSize, @RequestParam(name = "current") int current) { |
| | | String kbSlug = RequestUtil.getParamAsString("kb_slug"); |
| | | String kbSlug = RequestUtil.getParamAsString("id"); |
| | | if (StringUtil.noText(kbSlug)) { |
| | | return Result.fail(1); |
| | | return Result.fail(1, "知识库id不能为空"); |
| | | } |
| | | Page<AiDocument> documentList = aiDocumentService.getDocumentList(kbSlug, pageSize, current,fileName); |
| | | return Result.success(documentList); |
| | |
| | | @RequestParam(name="regex", required = false) String regex, |
| | | @RequestParam(name="userWillSave") boolean userWillSave |
| | | ) throws IOException { |
| | | |
| | | if (file.getOriginalFilename() == null){ |
| | | return Result.fail(1,"文件名不能为空"); |
| | | } |
| | | String fileTypeByExtension = JudgeFileTypeUtil.getFileTypeByExtension(file.getOriginalFilename()); |
| | | if (StringUtils.isEmpty(fileTypeByExtension)){ |
| | | return Result.fail(1,"不支持的文档类型"); |
| | | return Result.fail(2,"不支持的文档类型"); |
| | | } |
| | | DocumentParser documentParser = DocumentParserFactory.getDocumentParser(file.getOriginalFilename()); |
| | | if (documentParser == null) { |
| | | return Result.fail(1, "can not support the file type: " + file.getOriginalFilename()); |
| | | return Result.fail(3, "can not support the file type: " + file.getOriginalFilename()); |
| | | } |
| | | String path = storageService.save(file); |
| | | AiDocument aiDocument = new AiDocument(); |
| | |
| | | //如果用户是预览分割效果 |
| | | if (!userWillSave){ |
| | | List<AiDocumentChunk> previewList = new ArrayList<>(); |
| | | // 调用解析器进行文本分割 |
| | | AiKnowledge knowledge = knowledgeService.getById(knowledgeId); |
| | | DocumentStore documentStore = knowledge.toDocumentStore(); |
| | | // 设置向量模型 |
| | | AiLlm aiLlm = aiLlmService.getById(knowledge.getVectorEmbedLlmId()); |
| | | Llm embeddingModel = aiLlm.toLlm(); |
| | | documentStore.setEmbeddingModel(embeddingModel); |
| | | StoreOptions options = StoreOptions.ofCollectionName(knowledge.getVectorStoreCollection()); |
| | | // 设置分割器 todo 未来可以通过参数来指定分割器,不同的文档使用不同的分割器效果更好 |
| | | documentStore.setDocumentSplitter(getDocumentSplitter(splitterName, chunkSize, overlapSize, regex)); |
| | | AtomicInteger sort = new AtomicInteger(1); |
| | | |
| | | documentStore.setDocumentIdGenerator(item -> { |
| | | AiDocumentChunk chunk = new AiDocumentChunk(); |
| | | chunk.setContent(item.getContent()); |
| | | chunk.setSorting(sort.get()); |
| | | sort.getAndIncrement(); |
| | | previewList.add(chunk); |
| | | return chunk.getId(); |
| | | }); |
| | | DocumentSplitter documentSplitter = getDocumentSplitter(splitterName, chunkSize, overlapSize, regex, 2); |
| | | Document document = Document.of(aiDocument.getContent()); |
| | | StoreResult result = documentStore.store(document, options); |
| | | List<Document> documents = documentSplitter.split(document); |
| | | int sort = 1; |
| | | for (Document value : documents) { |
| | | AiDocumentChunk chunk = new AiDocumentChunk(); |
| | | chunk.setContent(value.getContent()); |
| | | chunk.setSorting(sort); |
| | | sort++; |
| | | previewList.add(chunk); |
| | | } |
| | | // 删除本地文件 |
| | | AiDocumentServiceImpl.deleteFile(getRootPath() + path); |
| | | Map res = new HashMap(); |
| | |
| | | } |
| | | aiDocument.setTitle(StringUtil.removeFileExtension(file.getOriginalFilename())); |
| | | |
| | | return super.save(aiDocument); |
| | | super.save(aiDocument); |
| | | return storeDocument(aiDocument, splitterName, chunkSize, overlapSize, regex); |
| | | } |
| | | |
| | | |
| | | /** |
| | | * 更新 entity 的位置 |
| | | * 更新 entity |
| | | * |
| | | * @param entity entity |
| | | * @param entity |
| | | * @return Result |
| | | */ |
| | | private Result updatePosition(AiDocument entity) { |
| | |
| | | } |
| | | |
| | | /** |
| | | * entity 保存或更新后触发 |
| | | * 文档存储到向量数据库 |
| | | * |
| | | * @param entity |
| | | * @param isSave |
| | | * @param entity 将要分割的文档 |
| | | * @param splitterName 分割器名称 |
| | | * @param chunkSize 分割器名称 |
| | | * @param overlapSize 分段大小 |
| | | * @param overlapSize 分段重叠大小 |
| | | * @param regex 正则表达式 |
| | | */ |
| | | @Override |
| | | protected void onSaveOrUpdateAfter(AiDocument entity, boolean isSave) { |
| | | AiDocument aiDocument = entity; |
| | | // 重新获取全数据内容 |
| | | protected Result storeDocument(AiDocument entity, String splitterName, int chunkSize, int overlapSize, String regex) { |
| | | entity = service.getById(entity.getId()); |
| | | |
| | | AiKnowledge knowledge = knowledgeService.getById(entity.getKnowledgeId()); |
| | | if (knowledge == null) { |
| | | return; |
| | | return Result.fail(1, "知识库不存在"); |
| | | } |
| | | |
| | | // 存储到知识库 |
| | | DocumentStore documentStore = knowledge.toDocumentStore(); |
| | | if (documentStore == null) { |
| | | return; |
| | | if (documentStore == null){ |
| | | return Result.fail(2, "向量数据库类型未设置"); |
| | | } |
| | | |
| | | // 设置向量模型 |
| | | AiLlm aiLlm = aiLlmService.getById(knowledge.getVectorEmbedLlmId()); |
| | | if (aiLlm == null) { |
| | | return; |
| | | return Result.fail(3, "该知识库未配置大模型"); |
| | | |
| | | } |
| | | // 设置向量模型 |
| | | Llm embeddingModel = aiLlm.toLlm(); |
| | | documentStore.setEmbeddingModel(embeddingModel); |
| | | |
| | | StoreOptions options = StoreOptions.ofCollectionName(knowledge.getVectorStoreCollection()); |
| | | |
| | | EmbeddingOptions embeddingOptions = new EmbeddingOptions(); |
| | | embeddingOptions.setModel(aiLlm.getLlmModel()); |
| | | options.setEmbeddingOptions(embeddingOptions); |
| | | if (entity.getId() != null) { |
| | | List<AiDocumentChunk> documentChunks = documentChunkService.list(QueryWrapper.create() |
| | | .eq(AiDocumentChunk::getDocumentId, entity.getId())); |
| | |
| | | } |
| | | |
| | | // 设置分割器 todo 未来可以通过参数来指定分割器,不同的文档使用不同的分割器效果更好 |
| | | documentStore.setDocumentSplitter(new SimpleDocumentSplitter(aiDocument.getChunkSize(), aiDocument.getOverlapSize())); |
| | | documentStore.setDocumentSplitter(getDocumentSplitter(splitterName, chunkSize, overlapSize, regex, 2)); |
| | | |
| | | // 设置文档ID生成器 |
| | | AiDocument finalEntity = entity; |
| | | // AtomicInteger sort = new AtomicInteger(1); |
| | | //Integer sort = new Integer(1); |
| | | AtomicInteger sort = new AtomicInteger(1); |
| | | // 设置文档ID生成器 |
| | | documentStore.setDocumentIdGenerator(document -> { |
| | | AiDocumentChunk chunk = new AiDocumentChunk(); |
| | | chunk.setContent(document.getContent()); |
| | |
| | | |
| | | Document document = Document.of(entity.getContent()); |
| | | |
| | | |
| | | StoreResult result = documentStore.store(document, options); |
| | | |
| | | if (!result.isSuccess()) { |
| | | LoggerFactory.getLogger(AiDocumentController.class).error("DocumentStore.store failed: " + result); |
| | | } |
| | | AiKnowledge aiKnowledge = new AiKnowledge(); |
| | | aiKnowledge.setId(entity.getKnowledgeId()); |
| | | // CanUpdateEmbedLlm false: 不能修改知识库的大模型 true: 可以修改 |
| | | aiKnowledge.setCanUpdateEmbedding(false); |
| | | knowledgeService.updateById(aiKnowledge); |
| | | return Result.success(); |
| | | } |
| | | |
| | | public String getRootPath() { |
| | |
| | | } |
| | | } |
| | | |
| | | public DocumentSplitter getDocumentSplitter (String splitterName, int chunkSize, int overlapSize, String regex){ |
| | | public DocumentSplitter getDocumentSplitter (String splitterName, int chunkSize, int overlapSize, String regex, int excelRows){ |
| | | |
| | | if (StringUtil.noText(splitterName)) { |
| | | return null; |
| | |
| | | } else { |
| | | return new SimpleTokenizeSplitter(chunkSize, overlapSize); |
| | | } |
| | | case "ExcelDocumentSplitter": |
| | | return new ExcelDocumentSplitter(excelRows); |
| | | default: |
| | | return null; |
| | | } |
| | | |
| | | } |
| | | |
| | | } |
| | | } |