支持Cookie并开放了一些特殊设置项的HttpWebClient

  1 using System;
  2 using System.Collections.Generic;
  3 using System.Linq;
  4 using System.Text;
  5 using System.Net;
  6 using System.IO;
  7 using System.Collections.Specialized;
  8 using System.Web;
  9 
 10 namespace Common.Helpers
 11 {
 12     /// <summary>
 13     /// 网络访问辅助类
 14     /// </summary>
 15     public class HttpWebClient : WebClient
 16     {
 17         #region 公共属性
 18         /// <summary>
 19         /// 浏览器用户标识,默认采用Chrome的标识
 20         /// </summary>
 21         public string UserAgent { get; set; }
 22         /// <summary>
 23         /// Cookie容器
 24         /// </summary>
 25         public CookieContainer CookieContainer { get; set; }
 26         /// <summary>
 27         /// 如果 POST 请求需要 100-Continue 响应,则为 true;否则为 false。
 28         /// </summary>
 29         public bool Expect100Continue { get; set; }
 30 
 31         private WebResponse m_LastWebResponse = null;
 32         /// <summary>
 33         /// 最后一次的响应对象
 34         /// </summary>
 35         public WebResponse LastWebResponse { get { return this.m_LastWebResponse; } }
 36 
 37         private int m_Timeout = 120000;
 38         /// <summary>
 39         /// 超时时间,默认120000毫秒(120秒)
 40         /// </summary>
 41         public int Timeout
 42         {
 43             get { return m_Timeout; }
 44             set { m_Timeout = value; }
 45         }
 46 
 47         private HttpWebClientSetting m_HttpWebClientSetting = null;
 48         /// <summary>
 49         /// WebClient设置项,该属性始终不会为null
 50         /// </summary>
 51         public HttpWebClientSetting HttpWebClientSetting
 52         {
 53             get
 54             {
 55                 if (m_HttpWebClientSetting == null)
 56                 {
 57                     m_HttpWebClientSetting = new HttpWebClientSetting();
 58                 }
 59                 return m_HttpWebClientSetting;
 60             }
 61             set
 62             {
 63                 m_HttpWebClientSetting = value ?? new HttpWebClientSetting();
 64             }
 65         }
 66         
 67 
 68         /// <summary>
 69         /// 预处理Web请求对象的委托方法(会在每次获取WebRequest对象后调用),默认值为null
 70         /// </summary>
 71         public Action<HttpWebRequest> PrepareProcessWebRequest { get; set; }
 72         #endregion
 73 
 74         #region 构造方法
 75         public HttpWebClient()
 76             : this(new CookieContainer())
 77         {
 78         }
 79 
 80         public HttpWebClient(CookieContainer cookieContainer)
 81         {
 82             this.CookieContainer = cookieContainer;
 83             this.UserAgent = UserAgentValues.FireFox;
 84             this.Expect100Continue = false;
 85         }
 86         #endregion
 87 
 88         #region 重写方法,增加对CookieContainer的支持
 89         protected override WebRequest GetWebRequest(Uri address)
 90         {
 91             if (!string.IsNullOrEmpty(this.UserAgent))
 92             {
 93                 this.Headers.Add(HttpRequestHeader.UserAgent, this.UserAgent);
 94             }
 95 
 96             WebRequest request = base.GetWebRequest(address);
 97             request.Timeout = this.Timeout;
 98             
 99             if (request is HttpWebRequest)
100             {
101                 HttpWebRequest httpRequest = request as HttpWebRequest;
102                 httpRequest.CookieContainer = this.CookieContainer;
103                 httpRequest.ServicePoint.Expect100Continue = this.Expect100Continue; // 取消100-continue
104 
105                 //读取自定义设置项
106                 if (this.HttpWebClientSetting != null)
107                 {
108                     httpRequest.AllowAutoRedirect = this.HttpWebClientSetting.AllowAutoRedirect;
109                 }
110 
111                 //使用外部委托属性处理Request对象
112                 if (this.PrepareProcessWebRequest != null)
113                 {
114                     this.PrepareProcessWebRequest(httpRequest);
115                 }
116             }
117             
118             return request;
119         }
120         #endregion
121 
122         #region 重写方法,增加对响应对象的访问
123         protected override WebResponse GetWebResponse(WebRequest request)
124         {
125             WebResponse response = base.GetWebResponse(request);
126             this.m_LastWebResponse = response;
127             return response;
128         }
129         #endregion
130 
131         #region (public) 向一个URL用POST提交数据,并返回其响应内容 PostData
132         /// <summary>
133         /// 向一个URL用POST提交数据,并返回其响应内容
134         /// ZhangQingFeng    2014-12-14    Add
135         ///    EditLog:
136         ///        ZhangQingFeng    2015-05-12    Edit        因WebClient的UpdateValues方法中固定为UTF-8格式进行UrlEncode,因此此处需用UploadString方式来间接实现    --见微软WebClient类源码UploadValuesInternal方法中
137         /// </summary>
138         /// <param name="url">请求的URL</param>
139         /// <param name="data">要提交的数据</param>
140         /// <param name="encoding">请求所使用的编码</param>
141         /// <param name="responseEncoding">响应内容所使用的编码,为null时使用请求的编码</param>
142         /// <returns>响应的内容</returns>
143         public string PostData(string url, NameValueCollection data, Encoding encoding, Encoding responseEncoding)
144         {
145             WebClient client = this;
146 
147             /*
148             client.Encoding = encoding ?? Encoding.UTF8;
149 
150             byte[] response = client.UploadValues(url, "POST", data ?? new NameValueCollection());
151 
152             string html = string.Empty;
153 
154             if (responseEncoding == null)
155             {
156                 html = client.Encoding.GetString(response);
157             }
158             else
159             {
160                 html = responseEncoding.GetString(response);
161             }
162              */
163 
164             client.Encoding = encoding ?? Encoding.UTF8;
165             client.Headers.Add(HttpRequestHeader.ContentType, "application/x-www-form-urlencoded");
166 
167             string delimiter = String.Empty;
168             StringBuilder values = new StringBuilder();
169             foreach (string name in data.AllKeys)
170             {
171                 values.Append(delimiter);
172                 values.Append(HttpUtility.UrlEncode(name, encoding));
173                 values.Append("=");
174                 values.Append(HttpUtility.UrlEncode(data[name], encoding));
175                 delimiter = "&";
176             }
177 
178             byte[] arrData = client.UploadData(url, "POST", Encoding.ASCII.GetBytes(values.ToString()));
179             string html = (responseEncoding ?? client.Encoding).GetString(arrData);
180 
181             return html;
182         }
183 
184         /// <summary>
185         /// 向一个URL用POST提交数据,并返回其响应内容
186         /// ZhangQingFeng    2014-12-14    Add
187         /// </summary>
188         /// <param name="url">请求的URL</param>
189         /// <param name="data">要提交的数据</param>
190         /// <param name="encoding">请求和响应所使用的编码</param>
191         /// <returns>响应的内容</returns>
192         public string PostData(string url, NameValueCollection data, Encoding encoding)
193         {
194             return PostData(url, data, encoding, null);
195         }
196 
197         /// <summary>
198         /// 向一个URL用POST提交数据,并返回其响应内容(使用this.Encoding来作请求编码和响应编码)
199         /// ZhangQingFeng    2014-12-14    Add
200         /// </summary>
201         /// <param name="url">请求的URL</param>
202         /// <param name="data">要提交的数据</param>
203         /// <returns>响应的内容</returns>
204         public string PostData(string url, NameValueCollection data)
205         {
206             return PostData(url, data, this.Encoding);
207         }
208         #endregion
209 
210         #region (public) 向一个URL用POST提交数据,并返回其响应内容 PostData
211         /// <summary>
212         /// 向一个URL用POST提交数据,并返回其响应内容
213         /// ZhangQingFeng    2014-12-14    Add
214         /// </summary>
215         /// <param name="url">请求的URL</param>
216         /// <param name="data">要提交的数据</param>
217         /// <param name="encoding">请求和响应内容所使用的编码</param>
218         /// <returns>响应的内容</returns>
219         public string PostData(string url, Dictionary<string, string> data, Encoding encoding, Encoding responseEncoding)
220         {
221             NameValueCollection postData = new NameValueCollection();
222             if (data != null)
223             {
224                 foreach (var item in data)
225                 {
226                     postData.Add(item.Key, item.Value);
227                 }
228             }
229             return PostData(url, postData, encoding, responseEncoding);
230         }
231 
232 
233         /// <summary>
234         /// 向一个URL用POST提交数据,并返回其响应内容
235         /// ZhangQingFeng    2014-12-14    Add
236         /// </summary>
237         /// <param name="url">请求的URL</param>
238         /// <param name="data">要提交的数据</param>
239         /// <param name="encoding">请求和响应所使用的编码</param>
240         /// <returns>响应的内容</returns>
241         public string PostData(string url, Dictionary<string, string> data, Encoding encoding)
242         {
243             return PostData(url, data, encoding, null);
244         }
245 
246         /// <summary>
247         /// 向一个URL用POST提交数据,并返回其响应内容(使用this.Encoding来作请求编码和响应编码)
248         /// ZhangQingFeng    2014-12-14    Add
249         /// </summary>
250         /// <param name="url">请求的URL</param>
251         /// <param name="data">要提交的数据</param>
252         /// <returns>响应的内容</returns>
253         public string PostData(string url, Dictionary<string, string> data)
254         {
255             return PostData(url, data, this.Encoding);
256         }
257         #endregion
258 
259         #region 辅助类
260         /// <summary>
261         /// 浏览器用户标识类
262         /// </summary>
263         public class UserAgentValues
264         {
265             public static readonly string FireFox = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0";
266             public static readonly string Chrome = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36";
267             public static readonly string IE8 = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2;)";
268         }
269         #endregion
270     }
271 
272     /// <summary>
273     /// HttpWebClient对象设置类
274     /// </summary>
275     public class HttpWebClientSetting
276     {
277         private bool m_AllowAutoRedirect = true;
278         /// <summary>
279         /// 当响应内容为重定向时客户端是否自动重定向(如果该属性为true,则取到的响应则为重定向后的内容,否则则为响应原文),默认值为true
280         /// </summary>
281         public bool AllowAutoRedirect
282         {
283             get { return m_AllowAutoRedirect; }
284             set { m_AllowAutoRedirect = value; }
285         }
286     }
287 }
HttpWebClient

在做页面抓取的过程中,发现自带的WebClient不够灵活,因此做了一个实现。

关于在PostData方法中不使用UploadValues()方法的原因:

1.查看微软的源代码实现时发现,无论设置请求时的Encoding是否为GB2312,在使用WebClient的UploadValues()上传内容时,其内在都是使用UTF-8编码进行UrlEncode,因此传到服务端中的数据中若包含有中文时则一定会乱码,因此重写PostData以规避此问题。

关于HttpWebClientSetting中的AllowAutoRedirect属性:

在WebClient发起请求时,若响应内容为重定向,则WebClient会自动做重定向,因此该类提供此设置项以控制在访问时是否自动做重定向(第二次访问Refer后的网站时会将请求中的Refer头置空,将该AllowAutoRedirect设置为false,然后手动从Response.Header中取出Location对象地址,设置Refer后再访问,则可真实模拟浏览器访问,从而避开一些网站的防抓取设置)

关于HttpWebClient中的LastWebResponse属性:

当存在多次重定向时,系统记录了最后一次返回的内容,从此内容的Header中取出ResponseUri,则可以取到最后返回响应的页面真实地址,从而为下一次的设置请求Refer头作准备。

大约就是如此,后期如有Bug会继续更新。

原文地址:https://www.cnblogs.com/feng8621/p/4905996.html