读取Word文档中的表格

 1         //读取Word文档中的表格
 2         // DataTable 需要添加引用  using System.Data;
 3         public static DataTable Run()
 4         {
 5             try
 6             {
 7                 //DataTable 申明DataTable变量,保存从Word获取到的数据
 8                 DataTable dt = new DataTable();
 9                 DataColumn dc1 = new DataColumn("One", Type.GetType("System.String"));
10                 DataColumn dc2 = new DataColumn("TwoText", Type.GetType("System.String"));
11                 DataColumn dc3 = new DataColumn("TwoHtml", Type.GetType("System.String"));
12                 dt.Columns.Add(dc1);
13                 dt.Columns.Add(dc2);
14                 dt.Columns.Add(dc3);
15 
16                 int number = 1;//记录有数据的文档数目
17                 int troublecount = 0;//记录没有数据的文档数目
18 
19                 string Content = "";//申明变量,保存word文档内容
20                 //获取目录下的所有文件
21                 //DirectoryInfo  FileInfo  需要添加引用  using System.IO;
22                 DirectoryInfo dir = new DirectoryInfo("E:/20190917");
23                 FileInfo[] fileList = dir.GetFiles();
24                 foreach (var item in fileList)
25                 {
26                     object fileName = item.FullName;
27                     object confirmCovert = false;
28                     //判断文档类型是否为word文档
29                     if (item.Extension.ToUpper() == ".DOC" || item.Extension.ToUpper() == ".DOCX")
30                     {
31                         //获取word文档内容
32                         //Application  Document需要添加引用   using Microsoft.Office.Interop.Word;
33                         Application app = new Application();
34                         Document doc = null;
35 
36                         doc = app.Documents.Open(ref fileName, ref confirmCovert);
37                         app.Visible = false;
38                         Content = doc.Content.Text;
39 
40                         string[] arr = Content.Split('
');
41                         if (arr.Count() < 2)
42                         {
43                             troublecount++;
44                             Console.WriteLine("文件{0}中没有正文!!!!!!!!。{1}", fileName, troublecount);
45                             continue;
46                         }
47                         else
48                         {
49                             //抓取表格内容
50                             DataRow dr = dt.NewRow();
51                             dr["One"] = arr[0].ToString();
52                             int contentIndex = Content.IndexOf("表格显示:");
53                             List<string> lst = GetContent(doc, Content, contentIndex);
54 
55                             dr["TwoText"] = lst[0].ToString();
56                             dr["TwoHtml"] = lst[1].ToString();
57                             dt.Rows.Add(dr);
58 
59                             number++;
60                         }
61 
62                         doc.Close();
63                         app.Quit();
64 
65                     }
66                     WriteOuputInformation(string.Format("{0}:文档已经存入数据库。{1}", fileName, number));
67                 }
68                 Console.WriteLine("所有文件已读取完毕,共读取了{0}条数据,没有数据的Word文档总条数为{1}", number, troublecount);
69                 return dt;
70             }
71             catch (Exception exp)
72             {
73                 WriteErrorInformation(string.Format("Exception: {0}", exp.Message));
74                 return null;
75             }
76         }
#region[获取表格纯文本内容和富文本内容]
        static List<string> GetContent(Document doc, string Content, int contentIndex)
        {
            List<string> lst = new List<string>();
            string contentText = Content.Substring(contentIndex + 6);
            string contentHtml = contentText;

            int R = 0;//保存行索引
            int C = 0;//保存列索引
            //表格格式
            if (doc.Tables.Count > 0)
            {
                string text = contentText;
                //遍历<table>
                for (int i = 1; i <= doc.Tables.Count; i++)
                {
                    //读取到word文档中table的内容
                    string wordtable = doc.Tables[i].Range.Text;
                    string htmltable = "";
                    htmltable += "<table cellspacing='0' bordercolor='black' border='1' cellpadding='5' text-align='center'>";
                    //遍历行
                    for (int row = 1; row <= doc.Tables[i].Rows.Count; row++)
                    {
                        htmltable += "<tr>";
                        //遍历列
                        for (int column = 1; column <= doc.Tables[i].Columns.Count; column++)
                        {
                            htmltable += "<td>";
                            //R = getTableRowIndex(row, column, doc, i);
                            //C = getTablecolumnIndex(row, column, doc, i);
                            //htmltable += doc.Tables[i].Cell(R, C).Range.Text.Replace("
", "").Replace("a", "");
                            R = getTableRowIndex(row, column, doc, i);
                            C = getTablecolumnIndex(row, column, doc, i);
                            htmltable += doc.Tables[i].Cell(row, column).Range.Text.Replace("
", "").Replace("a", "");
                            htmltable += "</td>";
                            contentText += "  ";

                        }
                        htmltable += "</tr>";
                    }
                    htmltable += "</table>";
                    contentHtml = contentHtml.Replace(wordtable, htmltable);
                }
                contentHtml = "<p>" + contentHtml;
                contentHtml = contentHtml.Replace("
", "</p><p>");
                contentHtml += "</p>";
            }
            else//文本格式
            {
                contentText = Content.Substring(contentIndex + 6);
                contentHtml = "<p>";
                contentHtml += contentText;
                contentHtml = contentHtml.Replace("
", "</p><p>");
                contentHtml += "</p>";
            }
            lst.Add(contentText);
            lst.Add(contentHtml);
            return lst;
        }
        #endregion
#region[操作后给出提示信息]
        static void WriteErrorInformation(string errorInformation)
        {
            Console.ForegroundColor = ConsoleColor.Red;
            Console.WriteLine("Error: " + errorInformation);
            Console.ForegroundColor = ConsoleColor.Gray;
        }
        static void WriteOuputInformation(string outputInformation)
        {
            Console.ForegroundColor = ConsoleColor.DarkGreen;
            Console.WriteLine("-->>" + outputInformation);
            Console.ForegroundColor = ConsoleColor.Gray;
        }
        #endregion
原文地址:https://www.cnblogs.com/suflowers1700218/p/11676645.html