using System;
using System.IO;
using System.Text;
using PaddleOCRSharp;
namespace CommonHelper
{
public static class OcrHelper
{
private static PaddleOCREngine engine;
///
/// 初始化
///
public static void Init()
{
//自带轻量版中英文模型V4模型
OCRModelConfig config = null;
#if DEBUG
config = new OCRModelConfig();
string str1 = EngineBase.GetRootDirectory().TrimEnd('\\');
config = new OCRModelConfig();
string str2 = str1 + "\\bin\\inference";
config.det_infer = str2 + "\\ch_PP-OCRv4_det_infer";
config.cls_infer = str2 + "\\ch_ppocr_mobile_v2.0_cls_infer";
config.rec_infer = str2 + "\\ch_PP-OCRv4_rec_infer";
config.keys = str2 + "\\ppocr_keys.txt";
#endif
//OCR参数
OCRParameter oCRParameter = new OCRParameter();
oCRParameter.cpu_math_library_num_threads = 10;//预测并发线程数
oCRParameter.enable_mkldnn = true;
oCRParameter.cls = false; //是否执行文字方向分类;默认false
oCRParameter.det = true;//是否开启文本框检测,用于检测文本块
oCRParameter.use_angle_cls = false;//是否开启方向检测,用于检测识别180旋转
oCRParameter.det_db_score_mode = true;//是否使用多段线,即文字区域是用多段线还是用矩形,
oCRParameter.max_side_len = 960;
oCRParameter.rec_img_h = 48;
oCRParameter.rec_img_w = 320;
oCRParameter.det_db_thresh = 0.3f;
oCRParameter.det_db_box_thresh = 0.618f;
//初始化OCR引擎
engine = new PaddleOCREngine(config, oCRParameter);
}
///
/// 图片ocr
///
/// 图片地址
///
public static string Img2Txt(string path)
{
var imgByte = File.ReadAllBytes(path);
var ocrRes = engine.DetectText(imgByte);
return ocrRes.Text;
}
///
/// pdf ocr
///
/// pdf文件地址
///
public static string Pdf2Txt(string path)
{
var sb = new StringBuilder();
foreach (var imgPath in PdfHelper.Pdf2Imgs(path))
{
var txt = Img2Txt(imgPath);
sb.Append(txt);
File.Delete(imgPath);
}
return sb.ToString();
}
}
}