WX
2024-06-01 e1cd06bcd9860c1a8172f5779bc6bc52a5fd8e84
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
using System;
using System.IO;
using System.Text;
using PaddleOCRSharp;
 
namespace CommonHelper
{
    public static class OcrHelper
    {
        private static PaddleOCREngine engine;
 
        /// <summary>
        /// 初始化
        /// </summary>
        public static void Init()
        {
            //自带轻量版中英文模型V4模型
            OCRModelConfig config = null;
#if DEBUG
            config = new OCRModelConfig();
            string str1 = EngineBase.GetRootDirectory().TrimEnd('\\');
            config = new OCRModelConfig();
            string str2 = str1 + "\\bin\\inference";
            config.det_infer = str2 + "\\ch_PP-OCRv4_det_infer";
            config.cls_infer = str2 + "\\ch_ppocr_mobile_v2.0_cls_infer";
            config.rec_infer = str2 + "\\ch_PP-OCRv4_rec_infer";
            config.keys = str2 + "\\ppocr_keys.txt";
#endif
 
            //OCR参数
            OCRParameter oCRParameter = new OCRParameter();
            oCRParameter.cpu_math_library_num_threads = 10;//预测并发线程数
            oCRParameter.enable_mkldnn = true;
            oCRParameter.cls = false; //是否执行文字方向分类;默认false
            oCRParameter.det = true;//是否开启文本框检测,用于检测文本块
            oCRParameter.use_angle_cls = false;//是否开启方向检测,用于检测识别180旋转
            oCRParameter.det_db_score_mode = true;//是否使用多段线,即文字区域是用多段线还是用矩形,
            oCRParameter.max_side_len = 960;
            oCRParameter.rec_img_h = 48;
            oCRParameter.rec_img_w = 320;
            oCRParameter.det_db_thresh = 0.3f;
            oCRParameter.det_db_box_thresh = 0.618f;
 
            //初始化OCR引擎
            engine = new PaddleOCREngine(config, oCRParameter);
 
        }
 
        /// <summary>
        /// 图片ocr
        /// </summary>
        /// <param name="path">图片地址</param>
        /// <returns></returns>
        public static string Img2Txt(string path)
        {
            var imgByte = File.ReadAllBytes(path);
            var ocrRes = engine.DetectText(imgByte);
            return ocrRes.Text;
        }
 
        /// <summary>
        /// pdf ocr
        /// </summary>
        /// <param name="path">pdf文件地址</param>
        /// <returns></returns>
        public static string Pdf2Txt(string path)
        {
 
            var sb = new StringBuilder();
            foreach (var imgPath in PdfHelper.Pdf2Imgs(path))
            {
                var txt = Img2Txt(imgPath);
                sb.Append(txt);
 
                File.Delete(imgPath);
            }
 
            return sb.ToString();
        }
 
    }
 
}