using System;
|
using System.IO;
|
using System.Text;
|
using PaddleOCRSharp;
|
|
namespace CommonHelper
|
{
|
public static class OcrHelper
|
{
|
private static PaddleOCREngine engine;
|
|
/// <summary>
|
/// 初始化
|
/// </summary>
|
public static void Init()
|
{
|
//自带轻量版中英文模型V4模型
|
OCRModelConfig config = null;
|
#if DEBUG
|
config = new OCRModelConfig();
|
string str1 = EngineBase.GetRootDirectory().TrimEnd('\\');
|
config = new OCRModelConfig();
|
string str2 = str1 + "\\bin\\inference";
|
config.det_infer = str2 + "\\ch_PP-OCRv4_det_infer";
|
config.cls_infer = str2 + "\\ch_ppocr_mobile_v2.0_cls_infer";
|
config.rec_infer = str2 + "\\ch_PP-OCRv4_rec_infer";
|
config.keys = str2 + "\\ppocr_keys.txt";
|
#endif
|
|
//OCR参数
|
OCRParameter oCRParameter = new OCRParameter();
|
oCRParameter.cpu_math_library_num_threads = 10;//预测并发线程数
|
oCRParameter.enable_mkldnn = true;
|
oCRParameter.cls = false; //是否执行文字方向分类;默认false
|
oCRParameter.det = true;//是否开启文本框检测,用于检测文本块
|
oCRParameter.use_angle_cls = false;//是否开启方向检测,用于检测识别180旋转
|
oCRParameter.det_db_score_mode = true;//是否使用多段线,即文字区域是用多段线还是用矩形,
|
oCRParameter.max_side_len = 960;
|
oCRParameter.rec_img_h = 48;
|
oCRParameter.rec_img_w = 320;
|
oCRParameter.det_db_thresh = 0.3f;
|
oCRParameter.det_db_box_thresh = 0.618f;
|
|
//初始化OCR引擎
|
engine = new PaddleOCREngine(config, oCRParameter);
|
|
}
|
|
/// <summary>
|
/// 图片ocr
|
/// </summary>
|
/// <param name="path">图片地址</param>
|
/// <returns></returns>
|
public static string Img2Txt(string path)
|
{
|
var imgByte = File.ReadAllBytes(path);
|
var ocrRes = engine.DetectText(imgByte);
|
return ocrRes.Text;
|
}
|
|
/// <summary>
|
/// pdf ocr
|
/// </summary>
|
/// <param name="path">pdf文件地址</param>
|
/// <returns></returns>
|
public static string Pdf2Txt(string path)
|
{
|
|
var sb = new StringBuilder();
|
foreach (var imgPath in PdfHelper.Pdf2Imgs(path))
|
{
|
var txt = Img2Txt(imgPath);
|
sb.Append(txt);
|
|
File.Delete(imgPath);
|
}
|
|
return sb.ToString();
|
}
|
|
}
|
|
}
|