C#读取Word指定页的内容

 

复制代码
/// <summary>
        /// Word按页读取内容
        /// </summary>
        /// <param name="page">页数</param>
        /// <param name="isHtml">html或纯文本</param>
        /// <returns></returns>
        /// 
        public string ReadPage(int page, bool isHtml)
        {
            object objWhat = Word.WdGoToItem.wdGoToPage;
            object objWhich = Word.WdGoToDirection.wdGoToAbsolute;

            object objPage = page;
            Word.Range range1 = oDoc.GoTo(ref objWhat, ref objWhich, ref objPage, ref missing);
            Word.Range range2 = range1.GoToNext(Word.WdGoToItem.wdGoToPage);

            object objStart = range1.Start;
            object objEnd = range2.Start ;
            if (range1.Start == range2.Start)
                objEnd = oDoc.Characters.Count;//最后一页
            oDoc.Range(ref objStart, ref objEnd).Copy();

            if (isHtml)
            {
                MemoryStream stream =   Clipboard.GetData("Html Format") as MemoryStream;
                stream.Position = 0;//解决从剪切板复制中文内容乱码的问题www.xuehi.com
                byte[] bytes = new byte[stream.Length];
                stream.Read(bytes, 0, (int)stream.Length);

                return Encoding.UTF8.GetString(bytes);
            }
            else
            {
                byte[] bytes = Encoding.Default.GetBytes((string)Clipboard.GetData(DataFormats.Text));

                return Encoding.GetEncoding("gb2312").GetString(bytes);
            }
        }
复制代码
原文地址:https://www.cnblogs.com/micro-chen/p/10823701.html