C# 读取 csv(大文件)

上次读取了excel发现还是很慢(结果集为DataTable),后来研究了一下csv这个文件效率很高呀,特别是针对大文件的时候,话不多说上代码!

本机配置:win10 i5900F 16G 500G固态

1、csv文件帮助类

  1 public static class CsvHelper
  2     {
  3         /// <summary>
  4         /// 创建CSV文件并写入内容
  5         /// </summary>
  6         /// <param name="dt">DataTable</param>
  7         /// <param name="fileName">文件全名</param>
  8         /// <returns>是否写入成功</returns>
  9         public static Boolean SaveCSV(DataTable dt, string fullFileName)
 10         {
 11             Boolean r = false;
 12             FileStream fs = new FileStream(fullFileName, System.IO.FileMode.Create, System.IO.FileAccess.Write);
 13             StreamWriter sw = new StreamWriter(fs, System.Text.Encoding.Default);
 14             string data = "";
 15 
 16             //写出列名称
 17             for (int i = 0; i < dt.Columns.Count; i++)
 18             {
 19                 data += dt.Columns[i].ColumnName.ToString();
 20                 if (i < dt.Columns.Count - 1)
 21                 {
 22                     data += ",";
 23                 }
 24             }
 25             sw.WriteLine(data);
 26 
 27             //写出各行数据
 28             for (int i = 0; i < dt.Rows.Count; i++)
 29             {
 30                 data = "";
 31                 for (int j = 0; j < dt.Columns.Count; j++)
 32                 {
 33                     data += dt.Rows[i][j].ToString();
 34                     if (j < dt.Columns.Count - 1)
 35                     {
 36                         data += ",";
 37                     }
 38                 }
 39                 sw.WriteLine(data);
 40             }
 41 
 42             sw.Close();
 43             fs.Close();
 44 
 45             r = true;
 46             return r;
 47         }
 48 
 49         /// <summary>
 50         /// 读CSV 文件
 51         /// </summary>
 52         /// <param name="fileName">文件全名</param>
 53         /// <returns>DataTable</returns>
 54         public static DataTable ReadCSV(string fullFileName)
 55         {
 56             return ReadCSV(fullFileName, 0, 0, 0, 0, true);
 57         }
 58 
 59         /// <summary>
 60         /// 读CSV 文件
 61         /// </summary>
 62         /// <param name="fileName">文件全名</param>
 63         /// <param name="firstRow">开始行</param>
 64         /// <param name="firstColumn">开始列</param>
 65         /// <param name="getRows">获取多少行</param>
 66         /// <param name="getColumns">获取多少列</param>
 67         /// <param name="haveTitleRow">是有标题行</param>
 68         /// <returns>DataTable</returns>
 69         public static DataTable ReadCSV(string fullFileName, Int16 firstRow = 0, Int16 firstColumn = 0, Int16 getRows = 0, Int16 getColumns = 0, bool haveTitleRow = true)
 70         {
 71             DataTable dt = new DataTable();
 72             FileStream fs = new FileStream(fullFileName, System.IO.FileMode.Open, System.IO.FileAccess.Read);
 73             StreamReader sr = new StreamReader(fs, System.Text.Encoding.Default);
 74             try
 75             {
 76                 string strLine = "";//记录每次读取的一行记录
 77                 string[] aryLine;//记录每行记录中的各字段内容
 78                 int columnCount = 0; //标示列数
 79                 bool bCreateTableColumns = false;//是否已建立了表的字段
 80                 int iRow = 1;//第几行
 81 
 82                 if (firstRow > 0) //去除无用行
 83                 {
 84                     for (int i = 1; i < firstRow; i++)
 85                     {
 86                         sr.ReadLine();
 87                     }
 88                 }
 89                 string[] separators = { "," };// { ",", ".", "!", "?", ";", ":", " " };
 90                 while ((strLine = sr.ReadLine()) != null)//逐行读取CSV中的数据
 91                 {
 92                     strLine = strLine.Trim();
 93                     aryLine = strLine.Split(separators, System.StringSplitOptions.RemoveEmptyEntries);
 94 
 95                     if (bCreateTableColumns == false)
 96                     {
 97                         bCreateTableColumns = true;
 98                         columnCount = aryLine.Length;
 99                         //创建列
100                         for (int i = firstColumn; i < (getColumns == 0 ? columnCount : firstColumn + getColumns); i++)
101                         {
102                             DataColumn dc = new DataColumn(haveTitleRow == true ? aryLine[i] : "COL" + i.ToString());
103                             dt.Columns.Add(dc);
104                         }
105 
106                         bCreateTableColumns = true;
107 
108                         if (haveTitleRow == true)
109                         {
110                             continue;
111                         }
112                     }
113 
114                     DataRow dr = dt.NewRow();
115                     for (int j = firstColumn; j < (getColumns == 0 ? columnCount : firstColumn + getColumns); j++)
116                     {
117                         dr[j - firstColumn] = aryLine[j];
118                     }
119                     dt.Rows.Add(dr);
120 
121                     iRow = iRow + 1;
122                     if (getRows > 0)
123                     {
124                         if (iRow > getRows)
125                         {
126                             break;
127                         }
128                     }
129                 }
130             }
131             catch (Exception ex)
132             {
133                 //异常处理
134             }
135             finally
136             {
137                 sr.Close();
138                 fs.Close();
139             }
140             return dt;
141         }
142 
143     }
View Code

2、使用

2.1、创建csv文件,数据量为100W,21行

 1  public void CSV_Create()
 2         {
 3             string filePath = @"C:UsersAdministratorDesktop大数据.csv";
 4 
 5             #region 填充DataTable
 6             DataTable tblDatas = new DataTable("Datas");
 7             DataColumn dc = null;
 8             dc = tblDatas.Columns.Add("ID", Type.GetType("System.Int32"));
 9             dc.AutoIncrement = true;//自动增加
10             dc.AutoIncrementSeed = 1;//起始为1
11             dc.AutoIncrementStep = 1;//步长为1
12             dc.AllowDBNull = false;//
13 
14             for (int i = 1; i < 21; i++)
15             {
16                 dc = tblDatas.Columns.Add("p" + i, Type.GetType("System.String"));
17             }
18 
19             DataRow newRow = tblDatas.NewRow();
20 
21             for (int i = 0; i < 1000000; i++)
22             {
23                 newRow = tblDatas.NewRow();
24                 newRow["p1"] = "大话西游大话西游大话西游大话西游" + i;
25                 newRow["p2"] = "大话西游大话西游大话西游大话西游" + i;
26                 newRow["p3"] = "大话西游大话西游大话西游大话西游" + i;
27                 newRow["p4"] = "大话西游大话西游大话西游大话西游" + i;
28                 newRow["p5"] = "大话西游大话西游大话西游大话西游" + i;
29                 newRow["p6"] = "大话西游大话西游大话西游大话西游" + i;
30                 newRow["p7"] = "大话西游大话西游大话西游大话西游" + i;
31                 newRow["p8"] = "大话西游大话西游大话西游大话西游" + i;
32                 newRow["p9"] = "大话西游大话西游大话西游大话西游" + i;
33                 newRow["p10"] = "大话西游大话西游大话西游大话西游" + i;
34                 newRow["p11"] = "大话西游大话西游大话西游大话西游" + i;
35                 newRow["p12"] = "大话西游大话西游大话西游大话西游" + i;
36                 newRow["p13"] = "大话西游大话西游大话西游大话西游" + i;
37                 newRow["p14"] = "大话西游大话西游大话西游大话西游" + i;
38                 newRow["p15"] = "大话西游大话西游大话西游大话西游" + i;
39                 newRow["p16"] = "大话西游大话西游大话西游大话西游" + i;
40                 newRow["p17"] = "大话西游大话西游大话西游大话西游" + i;
41                 newRow["p18"] = "大话西游大话西游大话西游大话西游" + i;
42                 newRow["p19"] = "大话西游大话西游大话西游大话西游" + i;
43                 newRow["p20"] = "大话西游大话西游大话西游大话西游" + i;
44                 tblDatas.Rows.Add(newRow);
45             }
46 
47             #endregion
48 
49             Stopwatch sw = new Stopwatch();
50             sw.Start();
51 
52             CsvHelper.SaveCSV(tblDatas, filePath);
53 
54             System.IO.FileInfo fileInfo = new System.IO.FileInfo(filePath);
55             log.Info("生成.csv文件," + filePath + ",文件大小" + System.Math.Ceiling((fileInfo.Length / 1024.0) / 1024) + " M" + ",耗时:" + sw.Elapsed);
56         }
View Code

耗时大概20秒左右,文件大小750M左右。

2.2、读csv文件

1  public void CSV_Read()
2         {
3             Stopwatch sw = new Stopwatch();
4             sw.Start();
5             string path = @"C:UsersAdministratorDesktop大数据.csv";
6             DataTable dt = CsvHelper.ReadCSV(path);
7             log.Info(path + ",文件读取完成,数据条数" + dt.Rows.Count / 10000 + "万,耗时:" + sw.Elapsed);
8         }
View Code

生成Datatable类型的结果集,耗时10秒左右,测试结果log日志内容如下:

感谢:https://www.cnblogs.com/fiozhao/p/3225112.html

原文地址:https://www.cnblogs.com/PrintY/p/14044598.html