using System; using System.IO; using System.Text; using PaddleOCRSharp; namespace CommonHelper { public static class OcrHelper { private static PaddleOCREngine engine; /// /// 初始化 /// public static void Init() { //自带轻量版中英文模型V4模型 OCRModelConfig config = null; #if DEBUG config = new OCRModelConfig(); string str1 = EngineBase.GetRootDirectory().TrimEnd('\\'); config = new OCRModelConfig(); string str2 = str1 + "\\bin\\inference"; config.det_infer = str2 + "\\ch_PP-OCRv4_det_infer"; config.cls_infer = str2 + "\\ch_ppocr_mobile_v2.0_cls_infer"; config.rec_infer = str2 + "\\ch_PP-OCRv4_rec_infer"; config.keys = str2 + "\\ppocr_keys.txt"; #endif //OCR参数 OCRParameter oCRParameter = new OCRParameter(); oCRParameter.cpu_math_library_num_threads = 10;//预测并发线程数 oCRParameter.enable_mkldnn = true; oCRParameter.cls = false; //是否执行文字方向分类;默认false oCRParameter.det = true;//是否开启文本框检测,用于检测文本块 oCRParameter.use_angle_cls = false;//是否开启方向检测,用于检测识别180旋转 oCRParameter.det_db_score_mode = true;//是否使用多段线,即文字区域是用多段线还是用矩形, oCRParameter.max_side_len = 960; oCRParameter.rec_img_h = 48; oCRParameter.rec_img_w = 320; oCRParameter.det_db_thresh = 0.3f; oCRParameter.det_db_box_thresh = 0.618f; //初始化OCR引擎 engine = new PaddleOCREngine(config, oCRParameter); } /// /// 图片ocr /// /// 图片地址 /// public static string Img2Txt(string path) { var imgByte = File.ReadAllBytes(path); var ocrRes = engine.DetectText(imgByte); return ocrRes.Text; } /// /// pdf ocr /// /// pdf文件地址 /// public static string Pdf2Txt(string path) { var sb = new StringBuilder(); foreach (var imgPath in PdfHelper.Pdf2Imgs(path)) { var txt = Img2Txt(imgPath); sb.Append(txt); File.Delete(imgPath); } return sb.ToString(); } } }