利用OCR识别扫描的jpg、tif文件的文字

第一步:下载老马哥的从 office和sharepoint 提取出来的注册表和dll  http://115.com/file/dpa4qrt2  

或者直接安装office和sharepoint2007

第二步:下载我的demo   http://files.cnblogs.com/0banana0/OCR.zip

***识别度不是百分之百的  当然需要校准啦   在 编辑器里边修改错误的东西 !

 第三步:发布

本地环境无措 发布到iis包错 “Object hasn't been initialized and can't be used yet”

解决办法:Go to IIS->ApplicatioPools->Default Applicationpool->Identity->custom account->Give the user name and password.

****发布的时候 iis还报一个错:Compiler Error Message: CS0016拒绝访问

解决办法:给c:windows emp 加上 network service (只加这个我的不行)和 iis_iusrs(后来加上和这个才行)权限

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Collections;
using System.IO;
using System.Text;
using ContractManage.DAL;
using System.Threading;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace ContractManage.uploadify
{
    /// <summary>
    /// Summary description for OCR
    /// </summary>
    public class OCR : IHttpHandler
    {
        private MODI.Document _document;
        private MODI.Images _images;
        private MODI.Image _image;
        private MODI.Layout _layout;
        private ManualResetEvent _completedOCR = new ManualResetEvent(false);
        public void ProcessRequest(HttpContext context)
        {
            context.Response.ContentType = "text/plain";
            if (!string.IsNullOrEmpty(context.Request.QueryString["cid"]))
            {
                string cid = context.Request.QueryString["cid"];
                string sPath = context.Server.MapPath("uploadify");
                string path = sPath.Substring(0, sPath.Length - 10) + "\OCRFile\" + cid;
                CheckFileType(path, cid);
            }
            context.Response.Write(" ");
        }

        public void CheckFileType(string directoryPath, string cid)
        {
            IEnumerator files = Directory.GetFiles(directoryPath).GetEnumerator();
            while (files.MoveNext())
            {
                //get file extension 
                string fileExtension = Path.GetExtension(Convert.ToString(files.Current));

                //get file path without extenstion 
                string filePath = Convert.ToString(files.Current).Replace(fileExtension, string.Empty);

                //get fileName
                string fileName = Path.GetFileName(Convert.ToString(files.Current));
                //Check for JPG File Format 
                if (fileExtension == ".jpg" || fileExtension == ".JPG" || fileExtension == ".tif" || fileExtension == ".tiff") // or // ImageFormat.Jpeg.ToString()
                {
                    try
                    {
                        //OCR Operations ... 
                        MODI.Document _document = new MODI.Document();
                        _document.OnOCRProgress += new MODI._IDocumentEvents_OnOCRProgressEventHandler(_document_OnOCRProgress);
                        _document.Create(Convert.ToString(files.Current));
                        _document.OCR(MODI.MiLANGUAGES.miLANG_CHINESE_SIMPLIFIED, true, true);

                        _completedOCR.WaitOne(5000);
                        //_document.Save();

                        _images = _document.Images;
                        _image = (MODI.Image)_images[0];
                        //FileStream createFile = new FileStream(filePath + ".txt", FileMode.CreateNew);
                        //StreamWriter writeFile = new StreamWriter(createFile);
                        _layout = _image.Layout;
                        string strContent = _layout.Text;
                        _document.Close(false);
                        //string strPath = "uploadify/OCRFile/" + cid + "/" + fileName;
                        //string strContent = "没有内容";
                        string strPath = "uploadify/OCRFile/" + cid + "/" + fileName;
                        SaveDate(strPath, strContent, cid);
                        //writeFile.Write(strContent);
                        //writeFile.Close();


                    }
                    catch (Exception ex)
                    {
                        throw ex;
                        //MessageBox.Show("This Image hasn't a text or has a problem", 
                        //"OCR Notifications", 
                        //MessageBoxButtons.OK, MessageBoxIcon.Information); 
                    }
                    finally
                    {
                    }

                }
            }
        }
        void _document_OnOCRProgress(int Progress, ref bool Cancel)
        {
            if (Progress == 100)
            {
                _completedOCR.Set();
            }
        }
        private static void SetComObjectToNull(params object[] objects)
        {
            for (int i = 0; i < objects.Length; i++)
            {
                object o = objects[i];
                if (o != null)
                {
                    Marshal.FinalReleaseComObject(o);
                    o = null;
                }
            }
        }


        [MethodImpl(MethodImplOptions.NoInlining)]
        public void Dispose()
        {
            SetComObjectToNull(_layout, _image, _images, _document);
            GC.Collect();
            GC.WaitForPendingFinalizers();
        }



        public void SaveDate(string strPath, string strContent, string cid)
        {
            try
            {
                Pt_ContractImg img = new Pt_ContractImg();
                img.Content = strContent;
                img.Path = strPath;
                img.ContractID = Convert.ToInt32(cid);
                Pt_ContractImg_DAO.Insert(img);
            }
            catch (Exception ex)
            {

            }
            finally
            {

            }
        }
        public bool IsReusable
        {
            get
            {
                return false;
            }
        }

    }
}
原文地址:https://www.cnblogs.com/0banana0/p/3184886.html