RenZQ_Java.git - Gitblit

			@@ -3,6 +3,7 @@
			import com.agentsflex.core.document.DocumentSplitter;
			import com.agentsflex.core.document.splitter.RegexDocumentSplitter;
			import com.agentsflex.core.document.splitter.SimpleTokenizeSplitter;
			import com.agentsflex.core.llm.embedding.EmbeddingOptions;
			import org.springframework.core.io.ClassPathResource;
			import tech.aiflowy.ai.entity.AiDocument;
			import tech.aiflowy.ai.entity.AiDocumentChunk;
			@@ -11,6 +12,7 @@
			import tech.aiflowy.ai.service.*;
			import tech.aiflowy.ai.service.impl.AiDocumentServiceImpl;
			import tech.aiflowy.common.ai.DocumentParserFactory;
			import tech.aiflowy.common.ai.ExcelDocumentSplitter;
			import tech.aiflowy.common.domain.Result;
			import tech.aiflowy.common.tree.Tree;
			import tech.aiflowy.common.util.RequestUtil;
			@@ -60,7 +62,6 @@

			private final AiKnowledgeService knowledgeService;
			private final AiDocumentChunkService documentChunkService;
			private final AiDocumentHistoryService documentHistoryService;
			private final AiLlmService aiLlmService;

			@Autowired
			@@ -74,12 +75,10 @@

			public AiDocumentController(AiDocumentService service,
			AiKnowledgeService knowledgeService,
			AiDocumentChunkService documentChunkService,
			AiDocumentHistoryService documentHistoryService, AiLlmService aiLlmService) {
			AiDocumentChunkService documentChunkService, AiLlmService aiLlmService) {
			super(service);
			this.knowledgeService = knowledgeService;
			this.documentChunkService = documentChunkService;
			this.documentHistoryService = documentHistoryService;
			this.aiLlmService = aiLlmService;
			}
			@PostMapping("removeDoc")
			@@ -122,18 +121,17 @@
			@GetMapping("list")
			@Override
			public Result list(AiDocument entity, Boolean asTree, String sortKey, String sortType) {
			String kbSlug = RequestUtil.getParamAsString("kb_slug");
			String kbSlug = RequestUtil.getParamAsString("id");
			if (StringUtil.noText(kbSlug)) {
			return Result.fail(1);
			return Result.fail(1, "知识库id不能为空");
			}

			AiKnowledge knowledge = StringUtil.isNumeric(kbSlug)
			? knowledgeService.getById(kbSlug)
			: knowledgeService.getOne(QueryWrapper.create().eq(AiKnowledge::getSlug, kbSlug));


			if (knowledge == null) {
			return Result.fail(1);
			return Result.fail(2, "知识库不存在");
			}

			QueryWrapper queryWrapper = QueryWrapper.create()
			@@ -146,9 +144,9 @@

			@GetMapping("documentList")
			public Result documentList(@RequestParam(name="fileName", required = false) String fileName, @RequestParam(name="pageSize") int pageSize, @RequestParam(name = "current") int current) {
			String kbSlug = RequestUtil.getParamAsString("kb_slug");
			String kbSlug = RequestUtil.getParamAsString("id");
			if (StringUtil.noText(kbSlug)) {
			return Result.fail(1);
			return Result.fail(1, "知识库id不能为空");
			}
			Page<AiDocument> documentList = aiDocumentService.getDocumentList(kbSlug, pageSize, current,fileName);
			return Result.success(documentList);
			@@ -187,14 +185,16 @@
			@RequestParam(name="regex", required = false) String regex,
			@RequestParam(name="userWillSave") boolean userWillSave
			) throws IOException {

			if (file.getOriginalFilename() == null){
			return Result.fail(1,"文件名不能为空");
			}
			String fileTypeByExtension = JudgeFileTypeUtil.getFileTypeByExtension(file.getOriginalFilename());
			if (StringUtils.isEmpty(fileTypeByExtension)){
			return Result.fail(1,"不支持的文档类型");
			return Result.fail(2,"不支持的文档类型");
			}
			DocumentParser documentParser = DocumentParserFactory.getDocumentParser(file.getOriginalFilename());
			if (documentParser == null) {
			return Result.fail(1, "can not support the file type: " + file.getOriginalFilename());
			return Result.fail(3, "can not support the file type: " + file.getOriginalFilename());
			}
			String path = storageService.save(file);
			AiDocument aiDocument = new AiDocument();
			@@ -208,28 +208,18 @@
			//如果用户是预览分割效果
			if (!userWillSave){
			List<AiDocumentChunk> previewList = new ArrayList<>();
			// 调用解析器进行文本分割
			AiKnowledge knowledge = knowledgeService.getById(knowledgeId);
			DocumentStore documentStore = knowledge.toDocumentStore();
			// 设置向量模型
			AiLlm aiLlm = aiLlmService.getById(knowledge.getVectorEmbedLlmId());
			Llm embeddingModel = aiLlm.toLlm();
			documentStore.setEmbeddingModel(embeddingModel);
			StoreOptions options = StoreOptions.ofCollectionName(knowledge.getVectorStoreCollection());
			// 设置分割器 todo 未来可以通过参数来指定分割器，不同的文档使用不同的分割器效果更好
			documentStore.setDocumentSplitter(getDocumentSplitter(splitterName, chunkSize, overlapSize, regex));
			AtomicInteger sort = new AtomicInteger(1);

			documentStore.setDocumentIdGenerator(item -> {
			AiDocumentChunk chunk = new AiDocumentChunk();
			chunk.setContent(item.getContent());
			chunk.setSorting(sort.get());
			sort.getAndIncrement();
			previewList.add(chunk);
			return chunk.getId();
			});
			DocumentSplitter documentSplitter = getDocumentSplitter(splitterName, chunkSize, overlapSize, regex, 2);
			Document document = Document.of(aiDocument.getContent());
			StoreResult result = documentStore.store(document, options);
			List<Document> documents = documentSplitter.split(document);
			int sort = 1;
			for (Document value : documents) {
			AiDocumentChunk chunk = new AiDocumentChunk();
			chunk.setContent(value.getContent());
			chunk.setSorting(sort);
			sort++;
			previewList.add(chunk);
			}
			// 删除本地文件
			AiDocumentServiceImpl.deleteFile(getRootPath() + path);
			Map res = new HashMap();
			@@ -259,14 +249,15 @@
			}
			aiDocument.setTitle(StringUtil.removeFileExtension(file.getOriginalFilename()));

			return super.save(aiDocument);
			super.save(aiDocument);
			return storeDocument(aiDocument, splitterName, chunkSize, overlapSize, regex);
			}


			/**
			* 更新 entity 的位置
			* 更新 entity
			*
			* @param entity entity
			* @param entity
			* @return Result
			*/
			private Result updatePosition(AiDocument entity) {
			@@ -301,38 +292,39 @@
			}

			/**
			* entity 保存或更新后触发
			* 文档存储到向量数据库
			*
			* @param entity
			* @param isSave
			* @param entity 将要分割的文档
			* @param splitterName 分割器名称
			* @param chunkSize 分割器名称
			* @param overlapSize 分段大小
			* @param overlapSize 分段重叠大小
			* @param regex 正则表达式
			*/
			@Override
			protected void onSaveOrUpdateAfter(AiDocument entity, boolean isSave) {
			AiDocument aiDocument = entity;
			// 重新获取全数据内容
			protected Result storeDocument(AiDocument entity, String splitterName, int chunkSize, int overlapSize, String regex) {
			entity = service.getById(entity.getId());

			AiKnowledge knowledge = knowledgeService.getById(entity.getKnowledgeId());
			if (knowledge == null) {
			return;
			return Result.fail(1, "知识库不存在");
			}

			// 存储到知识库
			DocumentStore documentStore = knowledge.toDocumentStore();
			if (documentStore == null) {
			return;
			if (documentStore == null){
			return Result.fail(2, "向量数据库类型未设置");
			}

			// 设置向量模型
			AiLlm aiLlm = aiLlmService.getById(knowledge.getVectorEmbedLlmId());
			if (aiLlm == null) {
			return;
			return Result.fail(3, "该知识库未配置大模型");

			}
			// 设置向量模型
			Llm embeddingModel = aiLlm.toLlm();
			documentStore.setEmbeddingModel(embeddingModel);

			StoreOptions options = StoreOptions.ofCollectionName(knowledge.getVectorStoreCollection());

			EmbeddingOptions embeddingOptions = new EmbeddingOptions();
			embeddingOptions.setModel(aiLlm.getLlmModel());
			options.setEmbeddingOptions(embeddingOptions);
			if (entity.getId() != null) {
			List<AiDocumentChunk> documentChunks = documentChunkService.list(QueryWrapper.create()
			.eq(AiDocumentChunk::getDocumentId, entity.getId()));
			@@ -351,13 +343,11 @@
			}

			// 设置分割器 todo 未来可以通过参数来指定分割器，不同的文档使用不同的分割器效果更好
			documentStore.setDocumentSplitter(new SimpleDocumentSplitter(aiDocument.getChunkSize(), aiDocument.getOverlapSize()));
			documentStore.setDocumentSplitter(getDocumentSplitter(splitterName, chunkSize, overlapSize, regex, 2));

			// 设置文档ID生成器
			AiDocument finalEntity = entity;
			// AtomicInteger sort = new AtomicInteger(1);
			//Integer sort = new Integer(1);
			AtomicInteger sort = new AtomicInteger(1);
			// 设置文档ID生成器
			documentStore.setDocumentIdGenerator(document -> {
			AiDocumentChunk chunk = new AiDocumentChunk();
			chunk.setContent(document.getContent());
			@@ -376,12 +366,16 @@

			Document document = Document.of(entity.getContent());


			StoreResult result = documentStore.store(document, options);

			if (!result.isSuccess()) {
			LoggerFactory.getLogger(AiDocumentController.class).error("DocumentStore.store failed: " + result);
			}
			AiKnowledge aiKnowledge = new AiKnowledge();
			aiKnowledge.setId(entity.getKnowledgeId());
			// CanUpdateEmbedLlm false: 不能修改知识库的大模型 true: 可以修改
			aiKnowledge.setCanUpdateEmbedding(false);
			knowledgeService.updateById(aiKnowledge);
			return Result.success();
			}

			public String getRootPath() {
			@@ -396,7 +390,7 @@
			}
			}

			public DocumentSplitter getDocumentSplitter (String splitterName, int chunkSize, int overlapSize, String regex){
			public DocumentSplitter getDocumentSplitter (String splitterName, int chunkSize, int overlapSize, String regex, int excelRows){

			if (StringUtil.noText(splitterName)) {
			return null;
			@@ -412,10 +406,12 @@
			} else {
			return new SimpleTokenizeSplitter(chunkSize, overlapSize);
			}
			case "ExcelDocumentSplitter":
			return new ExcelDocumentSplitter(excelRows);
			default:
			return null;
			}

			}

			}
			}