C#抓取网面上的html内容(JS动态生成的无法抓取)

抓取内容的代码:

 1         /// </summary>
 2         /// <param name="url">路径URL</param>
 3         /// <param name="path">存储路径</param>
 4         /// <returns></returns>
 5         public static string HttpDownloadFile(string url, string path)
 6         {
 7             try
 8             {
 9                 // 设置参数
10                 HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest;
11 
12                 //发送请求并获取相应回应数据
13                 HttpWebResponse response = request.GetResponse() as HttpWebResponse;
14                 //直到request.GetResponse()程序才开始向目标网页发送Post请求
15                 Stream responseStream = response.GetResponseStream();
16 
17                 //可以将生成的流文件直接生成字符串 SourceCode就是生成后的字符串
18                 //StreamReader readStream = new StreamReader(responseStream, Encoding.UTF8);
19                 //string SourceCode = readStream.ReadToEnd();
20 
21                 //创建本地文件写入流
22                 if (File.Exists(path))
23                 {
24                     File.Delete(path);
25                 }
26                 FileStream fs = File.Create(path);
27                 fs.Close();
28                 
29                 Stream stream = new FileStream(path, FileMode.Create);
30                 byte[] bArr = new byte[1024];
31                 int size = responseStream.Read(bArr, 0, (int)bArr.Length);
32                 while (size > 0)
33                 {
34                     stream.Write(bArr, 0, size);
35                     size = responseStream.Read(bArr, 0, (int)bArr.Length);
36                 }
37                 stream.Close();
38                 responseStream.Close();
39                 return path;
40             }
41             catch (Exception ex)
42             {
43 
44                 throw ex;
45             }
46 
47         }

调用方式:

1                 HttpReviceFile.HttpDownloadFile("http://localhost:811/ ", @"D:WorkTest.xml");
原文地址:https://www.cnblogs.com/870060760JR/p/6118024.html