18586361686
2025-05-13 db5a038b1c407b377f1c25ebff1db806ddfd1868
chore: 更换easyExcel为fastExcel,增加excel分割器
6个文件已修改
65 ■■■■■ 已修改文件
aiflowy-commons/aiflowy-common-ai/pom.xml 4 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
aiflowy-commons/aiflowy-common-ai/src/main/java/tech/aiflowy/common/ai/ExcelDocumentParser.java 36 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
aiflowy-modules/aiflowy-module-ai/src/main/java/tech/aiflowy/ai/controller/AiDocumentChunkController.java 6 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
aiflowy-modules/aiflowy-module-ai/src/main/java/tech/aiflowy/ai/controller/AiDocumentController.java 9 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
aiflowy-ui-react/src/components/AntdCrud/DynamicFormItem.tsx 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
pom.xml 8 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
aiflowy-commons/aiflowy-common-ai/pom.xml
@@ -121,8 +121,8 @@
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>easyexcel</artifactId>
            <groupId>cn.idev.excel</groupId>
            <artifactId>fastexcel</artifactId>
        </dependency>
    </dependencies>
aiflowy-commons/aiflowy-common-ai/src/main/java/tech/aiflowy/common/ai/ExcelDocumentParser.java
@@ -1,9 +1,10 @@
package tech.aiflowy.common.ai;
import cn.idev.excel.EasyExcel;
import cn.idev.excel.read.listener.PageReadListener;
import com.agentsflex.core.document.Document;
import com.agentsflex.core.document.DocumentParser;
import com.alibaba.excel.EasyExcel;
import com.alibaba.excel.read.listener.PageReadListener;
import com.alibaba.fastjson.JSON;
import java.io.InputStream;
import java.util.ArrayList;
@@ -34,40 +35,15 @@
                        }
                    }
                }))
                .headRowNumber(0)  // 关键:不要跳过任何行
//                .headRowNumber(0)  // 关键:不要跳过任何行
                .sheet()           // 默认第一个 sheet
                .doRead();
        String plainText = generateMarkdownTable(tableData);
//        String plainText = generateMarkdownTable(tableData);
        // 创建并返回 Document 对象
        return new Document(plainText);
        return new Document(JSON.toJSONString(tableData));
    }
    private static String generateMarkdownTable(List<List<String>> tableData) {
        if (tableData == null || tableData.isEmpty()) {
            return "表格数据为空";
        }
        StringBuilder sb = new StringBuilder();
        // 表头
        List<String> headers = tableData.get(0);
        sb.append("| ").append(String.join(" | ", headers)).append(" |\n");
        // 分隔线
        sb.append("|");
        for (int i = 0; i < headers.size(); i++) {
            sb.append(" --- |");
        }
        sb.append("\n");
        // 数据行
        for (int i = 1; i < tableData.size(); i++) {
            List<String> row = tableData.get(i);
            sb.append("| ").append(String.join(" | ", row)).append(" |\n");
        }
        return sb.toString();
    }
}
aiflowy-modules/aiflowy-module-ai/src/main/java/tech/aiflowy/ai/controller/AiDocumentChunkController.java
@@ -32,8 +32,8 @@
@RequestMapping("/api/v1/aiDocumentChunk")
public class AiDocumentChunkController extends BaseCurdController<AiDocumentChunkService, AiDocumentChunk> {
     @Resource
     AiKnowledgeService aiKnowledgeService;
    @Resource
    AiKnowledgeService aiKnowledgeService;
    @Resource
    AiLlmService aiLlmService;
@@ -101,4 +101,4 @@
        StoreResult deleteResult = documentStore.delete(deleteList, options);
        return super.remove(chunkId);
    }
}
}
aiflowy-modules/aiflowy-module-ai/src/main/java/tech/aiflowy/ai/controller/AiDocumentController.java
@@ -12,6 +12,7 @@
import tech.aiflowy.ai.service.*;
import tech.aiflowy.ai.service.impl.AiDocumentServiceImpl;
import tech.aiflowy.common.ai.DocumentParserFactory;
import tech.aiflowy.common.ai.ExcelDocumentSplitter;
import tech.aiflowy.common.domain.Result;
import tech.aiflowy.common.tree.Tree;
import tech.aiflowy.common.util.RequestUtil;
@@ -208,7 +209,7 @@
        if (!userWillSave){
            List<AiDocumentChunk> previewList = new ArrayList<>();
            // 设置分割器 todo 未来可以通过参数来指定分割器,不同的文档使用不同的分割器效果更好
            DocumentSplitter documentSplitter = getDocumentSplitter(splitterName, chunkSize, overlapSize, regex);
            DocumentSplitter documentSplitter = getDocumentSplitter(splitterName, chunkSize, overlapSize, regex, 2);
            Document document = Document.of(aiDocument.getContent());
            List<Document> documents = documentSplitter.split(document);
            int sort = 1;
@@ -342,7 +343,7 @@
        }
        // 设置分割器 todo 未来可以通过参数来指定分割器,不同的文档使用不同的分割器效果更好
        documentStore.setDocumentSplitter(getDocumentSplitter(splitterName, chunkSize, overlapSize, regex));
        documentStore.setDocumentSplitter(getDocumentSplitter(splitterName, chunkSize, overlapSize, regex, 2));
        AiDocument finalEntity = entity;
        AtomicInteger sort  = new AtomicInteger(1);
@@ -389,7 +390,7 @@
        }
    }
    public DocumentSplitter getDocumentSplitter (String splitterName, int chunkSize, int overlapSize, String regex){
    public DocumentSplitter getDocumentSplitter (String splitterName, int chunkSize, int overlapSize, String regex, int excelRows){
        if (StringUtil.noText(splitterName)) {
            return null;
@@ -405,6 +406,8 @@
                } else {
                    return new SimpleTokenizeSplitter(chunkSize, overlapSize);
                }
            case "ExcelDocumentSplitter":
                return new ExcelDocumentSplitter(excelRows);
            default:
                return null;
        }
aiflowy-ui-react/src/components/AntdCrud/DynamicFormItem.tsx
@@ -241,4 +241,4 @@
};
export default DynamicFormItem
export default DynamicFormItem
pom.xml
@@ -37,7 +37,7 @@
        <commons-io.version>2.18.0</commons-io.version>
        <hutool-http.version>5.8.28</hutool-http.version>
        <hutool-json.version>5.8.32</hutool-json.version>
        <easyexcel.version>4.0.3</easyexcel.version>
        <fastexcel.version>1.2.0</fastexcel.version>
    </properties>
    <dependencyManagement>
        <dependencies>
@@ -249,9 +249,9 @@
                <version>${commons-io.version}</version>
            </dependency>
            <dependency>
                <groupId>com.alibaba</groupId>
                <artifactId>easyexcel</artifactId>
                <version>${easyexcel.version}</version>
                <groupId>cn.idev.excel</groupId>
                <artifactId>fastexcel</artifactId>
                <version>${fastexcel.version}</version>
            </dependency>
        </dependencies>
    </dependencyManagement>