C# HttpBrowser 跨进程访问,解决内存泄露问题

  1 #undef DEBUG
  2 using Microsoft.Win32;
  3 using Newtonsoft.Json;
  4 using System;
  5 using System.Collections.Generic;
  6 using System.Collections.Specialized;
  7 using System.Diagnostics;
  8 using System.Diagnostics.Contracts;
  9 using System.Drawing;
 10 using System.IO;
 11 using System.Linq;
 12 using System.Net;
 13 using System.Runtime.InteropServices;
 14 using System.Text;
 15 using System.Threading;
 16 using System.Threading.Tasks;
 17 using System.Windows.Forms;
 18 
 19 namespace AnfleCrawler.Common
 20 {
 21     /// <summary>
 22     /// Chromium / CasperJS + PhantomJS
 23     /// http://pinvoke.net/index.aspx
 24     /// </summary>
 25     public sealed partial class HttpBrowser : IHttpClient
 26     {
 27         #region NestedTypes
 28         [Serializable]
 29         public class AjaxBlockEntity
 30         {
 31             internal const string AjaxBlock = "_AjaxBlock";
 32             public string ID { get; set; }
 33             public string Text { get; set; }
 34             public bool IsEvent { get; set; }
 35         }
 36         public class AjaxEventEntity : MarshalByRefObject
 37         {
 38             public string ListenerSelector { get; set; }
 39             public bool EntryCall { get; set; }
 40             public Action<string> FinalCallback { get; set; }
 41         }
 42 
 43         [ComVisible(true)]
 44         public sealed class STAContext : Disposable
 45         {
 46             #region Fields
 47             public volatile bool IsRedirect;
 48             //internal MessageLoopApartment _Apartment;
 49             private SynchronizedCollection<Tuple<HtmlElement, EventHandler>> _releaseSet;
 50             private AutoResetEvent _sendReceiveWaiter;
 51             private CountdownEvent _ajaxWaiter;
 52             private System.Threading.Timer _lazyTimer;
 53 
 54             internal volatile bool DoInvokeHtml;
 55             private volatile string _outerHtml;
 56             #endregion
 57 
 58             #region Properties
 59             public Uri RequestUrl { get; private set; }
 60             public HttpRequestContent RequestContent { get; private set; }
 61             internal AutoResetEvent WaitHandle { get; set; }
 62 
 63             internal AutoResetEvent SendReceiveWaiter
 64             {
 65                 get
 66                 {
 67                     if (_sendReceiveWaiter == null)
 68                     {
 69                         _sendReceiveWaiter = new AutoResetEvent(false);
 70                     }
 71                     return _sendReceiveWaiter;
 72                 }
 73             }
 74             internal AjaxBlockEntity[] AjaxBlocks { get; private set; }
 75             internal CountdownEvent AjaxWaiter
 76             {
 77                 get
 78                 {
 79                     if (_ajaxWaiter == null)
 80                     {
 81                         _ajaxWaiter = new CountdownEvent(1);
 82                     }
 83                     return _ajaxWaiter;
 84                 }
 85             }
 86             internal volatile bool IsProcessEvent;
 87             internal AjaxEventEntity AjaxEvent { get; set; }
 88 
 89             internal string OuterHtml
 90             {
 91                 get
 92                 {
 93                     DoInvokeHtml = true;
 94                     return _outerHtml;
 95                 }
 96                 set
 97                 {
 98                     _outerHtml = value;
 99                 }
100             }
101             #endregion
102 
103             #region Constructor
104             internal STAContext(Uri url, HttpRequestContent content)
105             {
106                 this.RequestUrl = url;
107                 this.RequestContent = content;
108                 string ablock;
109                 if (this.RequestContent != null && this.RequestContent.Form != null)
110                 {
111                     if (!string.IsNullOrEmpty(ablock = this.RequestContent.Form.Get(AjaxBlockEntity.AjaxBlock)))
112                     {
113                         this.AjaxBlocks = JsonConvert.DeserializeObject<AjaxBlockEntity[]>(ablock);
114                         this.RequestContent.Form.Remove(AjaxBlockEntity.AjaxBlock);
115                     }
116                 }
117                 DoInvokeHtml = true;
118             }
119 
120             protected override void DisposeInternal(bool disposing)
121             {
122                 if (disposing)
123                 {
124                     //if (_Apartment != null)
125                     //{
126                     //    _Apartment.Dispose();
127                     //    _Apartment = null;
128                     //}
129                     if (_lazyTimer != null)
130                     {
131                         _lazyTimer.Dispose();
132                         _lazyTimer = null;
133                     }
134                     if (this.WaitHandle != null)
135                     {
136                         this.WaitHandle.Dispose();
137                         this.WaitHandle = null;
138                     }
139 
140                     DisposeObject(_sendReceiveWaiter);
141                     DisposeObject(_ajaxWaiter);
142                 }
143             }
144             #endregion
145 
146             #region Methods
147             public void SetHtml(string html)
148             {
149                 _outerHtml = html;
150                 DoInvokeHtml = false;
151             }
152 
153             internal void RegisterLazyLoad(Action<object> func, object state)
154             {
155                 if (_lazyTimer != null)
156                 {
157                     return;
158                 }
159                 _lazyTimer = new System.Threading.Timer(x => STA_Run(func, x, this), state, 2000, Timeout.Infinite);
160             }
161             /// <summary>
162             /// 另种思路,在每次加载完毕后delay
163             /// </summary>
164             internal void DelayLazyLoad()
165             {
166                 if (_lazyTimer == null)
167                 {
168                     return;
169                 }
170                 _lazyTimer.Change(2000, Timeout.Infinite);
171             }
172 
173             /// <summary>
174             /// STA
175             /// </summary>
176             /// <param name="node"></param>
177             /// <param name="e"></param>
178             internal void AjaxMark(HtmlElement node, EventHandler e)
179             {
180                 if (_releaseSet == null)
181                 {
182                     _releaseSet = new SynchronizedCollection<Tuple<HtmlElement, EventHandler>>();
183                 }
184                 var q = from t in _releaseSet
185                         where t.Item1 == node
186                         select t;
187                 if (q.Any())
188                 {
189                     return;
190                 }
191                 _releaseSet.Add(Tuple.Create(node, e));
192                 node.AttachEventHandler("onpropertychange", e);
193             }
194 
195             /// <summary>
196             /// STA
197             /// </summary>
198             internal void AjaxUnmarks()
199             {
200                 if (_releaseSet.IsNullOrEmpty())
201                 {
202                     return;
203                 }
204                 foreach (var item in _releaseSet)
205                 {
206                     var node = item.Item1;
207                     node.DetachEventHandler("onpropertychange", item.Item2);
208                 }
209                 _releaseSet = null;
210             }
211 
212             internal void _ReleaseMemory()
213             {
214                 return;
215 #if !DEBUG
216                 var proc = Process.GetCurrentProcess();
217                 //128M
218                 if (proc.PrivateMemorySize64 <= 134217728L)
219                 {
220                     return;
221                 }
222                 base.ReleaseMemory();
223 #endif
224             }
225             #endregion
226         }
227         #endregion
228 
229         #region Static
230         public const string Callback_Snapshot = "_xSnapshot";
231 
232         static HttpBrowser()
233         {
234             SetBrowserFeatureControl();
235             //NativeMethods.SetErrorMode(NativeMethods.ErrorModes.SYSTEM_DEFAULT);
236             NativeMethods.SetErrorMode(NativeMethods.ErrorModes.SEM_FAILCRITICALERRORS | NativeMethods.ErrorModes.SEM_NOGPFAULTERRORBOX | NativeMethods.ErrorModes.SEM_NOOPENFILEERRORBOX);
237         }
238 
239         /// <summary>
240         /// http://msdn.microsoft.com/en-us/library/ee330720(v=vs.85).aspx
241         /// </summary>
242         private static void SetBrowserFeatureControl()
243         {
244             // FeatureControl settings are per-process
245             string fileName = Path.GetFileName(Process.GetCurrentProcess().MainModule.FileName);
246             string[] skip = new string[] { "devenv.exe", "XDesProc.exe" };
247             if (skip.Any(p => p.Equals(fileName, StringComparison.OrdinalIgnoreCase)))
248             {
249                 return;
250             }
251 
252             SetBrowserFeatureControlKey("FEATURE_BROWSER_EMULATION", fileName, GetBrowserEmulationMode());
253             SetBrowserFeatureControlKey("FEATURE_MANAGE_SCRIPT_CIRCULAR_REFS", fileName, 1);
254             //SetBrowserFeatureControlKey("FEATURE_GPU_RENDERING ", fileName, 1);
255             //SetBrowserFeatureControlKey("FEATURE_AJAX_CONNECTIONEVENTS", fileName, 1);
256             //SetBrowserFeatureControlKey("FEATURE_ENABLE_CLIPCHILDREN_OPTIMIZATION", fileName, 1);
257             //SetBrowserFeatureControlKey("FEATURE_DOMSTORAGE ", fileName, 1);
258             //SetBrowserFeatureControlKey("FEATURE_IVIEWOBJECTDRAW_DMLT9_WITH_GDI  ", fileName, 0);
259             //SetBrowserFeatureControlKey("FEATURE_NINPUT_LEGACYMODE", fileName, 0);
260             //SetBrowserFeatureControlKey("FEATURE_DISABLE_LEGACY_COMPRESSION", fileName, 1);
261             //SetBrowserFeatureControlKey("FEATURE_LOCALMACHINE_LOCKDOWN", fileName, 0);
262             //SetBrowserFeatureControlKey("FEATURE_BLOCK_LMZ_OBJECT", fileName, 0);
263             //SetBrowserFeatureControlKey("FEATURE_BLOCK_LMZ_SCRIPT", fileName, 0);
264             //SetBrowserFeatureControlKey("FEATURE_DISABLE_NAVIGATION_SOUNDS", fileName, 1);
265             //SetBrowserFeatureControlKey("FEATURE_SCRIPTURL_MITIGATION", fileName, 1);
266             //SetBrowserFeatureControlKey("FEATURE_SPELLCHECKING", fileName, 0);
267             //SetBrowserFeatureControlKey("FEATURE_STATUS_BAR_THROTTLING", fileName, 1);
268             //SetBrowserFeatureControlKey("FEATURE_TABBED_BROWSING", fileName, 1);
269             //SetBrowserFeatureControlKey("FEATURE_VALIDATE_NAVIGATE_URL", fileName, 1);
270             //SetBrowserFeatureControlKey("FEATURE_WEBOC_DOCUMENT_ZOOM", fileName, 1);
271             //SetBrowserFeatureControlKey("FEATURE_WEBOC_POPUPMANAGEMENT", fileName, 0);
272             //SetBrowserFeatureControlKey("FEATURE_WEBOC_MOVESIZECHILD", fileName, 1);
273             //SetBrowserFeatureControlKey("FEATURE_ADDON_MANAGEMENT", fileName, 0);
274             //SetBrowserFeatureControlKey("FEATURE_WEBSOCKET", fileName, 1);
275             //SetBrowserFeatureControlKey("FEATURE_WINDOW_RESTRICTIONS ", fileName, 0);
276             //SetBrowserFeatureControlKey("FEATURE_XMLHTTP", fileName, 1);
277         }
278         /// <summary>
279         /// http://msdn.microsoft.com/en-us/library/ie/ee330730(v=vs.85).aspx
280         /// </summary>
281         /// <returns></returns>
282         private static uint GetBrowserEmulationMode()
283         {
284             int browserVersion;
285             using (var ieKey = Registry.LocalMachine.OpenSubKey(@"SOFTWAREMicrosoftInternet Explorer",
286                 RegistryKeyPermissionCheck.ReadSubTree, System.Security.AccessControl.RegistryRights.QueryValues))
287             {
288                 var version = ieKey.GetValue("svcVersion") ?? ieKey.GetValue("Version");
289                 if (version == null)
290                 {
291                     throw new ApplicationException("Microsoft Internet Explorer is required!");
292                 }
293                 int.TryParse(version.ToString().Split('.')[0], out browserVersion);
294             }
295             if (browserVersion < 8)
296             {
297                 throw new ApplicationException("Microsoft Internet Explorer 8 is required!");
298             }
299             switch (browserVersion)
300             {
301                 case 9:
302                     return 9000;
303                 case 10:
304                     return 10000;
305                 case 11:
306                     return 11000;
307                 default:
308                     return 8000;
309             }
310         }
311         private static void SetBrowserFeatureControlKey(string feature, string appName, uint value)
312         {
313             using (var key = Registry.CurrentUser.CreateSubKey(
314                 String.Concat(@"SoftwareMicrosoftInternet ExplorerMainFeatureControl", feature),
315                 RegistryKeyPermissionCheck.ReadWriteSubTree))
316             {
317                 key.SetValue(appName, value, RegistryValueKind.DWord);
318             }
319         }
320 
321         private static void STA_Run(Action<object> func, object state, STAContext context)
322         {
323             var sta = new Thread(arg =>
324             {
325                 var set = (object[])arg;
326                 try
327                 {
328                     var func2 = (Action<object>)set[0];
329                     func2(set[1]);
330                 }
331                 catch (Exception ex)
332                 {
333                     App.LogError(ex, "STA_Run");
334                 }
335             }, 1024 * 512);   //1024 * 512, 默认1M
336             sta.IsBackground = true;
337             sta.SetApartmentState(ApartmentState.STA);
338             try
339             {
340                 sta.Start(new object[2] { func, state });
341             }
342             catch (OutOfMemoryException ex)
343             {
344                 HandleException(ex);
345             }
346 
347             //context._Apartment.Invoke(func, state);
348         }
349 
350         public static void FillAjaxBlock(NameValueCollection form, AjaxBlockEntity[] set)
351         {
352             Contract.Requires(form != null);
353 
354             form[AjaxBlockEntity.AjaxBlock] = JsonConvert.SerializeObject(set, Formatting.None);
355         }
356         #endregion
357 
358         #region Fields
359         private EndPoint _proxyAddr;
360         private Lazy<IHttpClient> _lazyClient;
361         private CookieContainer _cookieContainer;
362         private Action<STAContext, HtmlDocument> _onLoad;
363         #endregion
364 
365         #region Properties
366         public int SendReceiveTimeout { get; set; }
367         public ushort? RetryCount { get; set; }
368         public TimeSpan? RetryWaitDuration { get; set; }
369         public bool UseCookies { get; set; }
370         public CookieContainer CookieContainer
371         {
372             get { return _cookieContainer; }
373         }
374         public string SaveFileDirectory { get; set; }
375         /// <summary>
376         /// 网页快照大小,Full Screenshot则设置Size.Empty
377         /// </summary>
378         public Size? Snapshot { get; set; }
379         /// <summary>
380         /// 供下载使用
381         /// </summary>
382         internal IHttpClient Client
383         {
384             get
385             {
386                 var client = _lazyClient.Value;
387                 client.SendReceiveTimeout = this.SendReceiveTimeout;
388                 client.RetryCount = this.RetryCount;
389                 client.RetryWaitDuration = this.RetryWaitDuration;
390                 client.UseCookies = this.UseCookies;
391                 client.SaveFileDirectory = this.SaveFileDirectory;
392                 return client;
393             }
394         }
395         #endregion
396 
397         #region Constructors
398         public HttpBrowser()
399         {
400             this.SendReceiveTimeout = -1;
401             _lazyClient = new Lazy<IHttpClient>(() => new HttpClient(), false);
402             _cookieContainer = new CookieContainer();
403             this.UseCookies = true;
404         }
405         /// <summary>
406         /// crossLoad中如有跨域交互,请继承扩展IsolateProxy
407         /// </summary>
408         /// <param name="crossLoad"></param>
409         public HttpBrowser(Action<STAContext, HtmlDocument> crossLoad)
410             : this()
411         {
412             _onLoad = crossLoad;
413         }
414         #endregion
415 
416         #region Methods
417         public void SetProxy(EndPoint address, NetworkCredential credential = null)
418         {
419             if (credential != null)
420             {
421                 throw new NotSupportedException("credential");
422             }
423 
424             if (IsSpawned)
425             {
426                 _proxyAddr = address;
427             }
428             else
429             {
430 #if DEBUG
431                 App.LogInfo("SetProxy HttpBrowser {0}", address);
432 #endif
433                 if (WinInetInterop.SetConnectionProxy(address.ToString()))
434                 {
435                     App.LogInfo("SetProxy HttpBrowser {0} succeed", address);
436                 }
437             }
438         }
439         internal void RestoreSystemProxy()
440         {
441             if (IsSpawned)
442             {
443                 _proxyAddr = null;
444             }
445             else
446             {
447 #if DEBUG
448                 App.LogInfo("RestoreSystemProxy HttpBrowser");
449 #endif
450                 if (WinInetInterop.RestoreSystemProxy())
451                 {
452                     App.LogInfo("RestoreSystemProxy HttpBrowser succeed");
453                 }
454             }
455         }
456 
457         public string GetHtml(Uri requestUrl, HttpRequestContent content = null)
458         {
459             if (IsSpawned)
460             {
461                 return SpawnedStart(_proxyAddr, requestUrl, content);
462             }
463             using (var arg = new STAContext(requestUrl, content))
464             {
465                 arg.WaitHandle = new AutoResetEvent(false);
466                 this.STA_Run(arg);
467                 arg.WaitHandle.WaitOne();
468                 return arg.OuterHtml;
469             }
470         }
471 
472         public string GetHtml(Uri requestUrl, AjaxEventEntity local, HttpRequestContent content = null)
473         {
474             Contract.Requires(requestUrl != null);
475             if (local == null)
476             {
477                 return GetHtml(requestUrl, content);
478             }
479 
480             using (var arg = new STAContext(requestUrl, content))
481             {
482                 arg.AjaxEvent = local;
483                 arg.WaitHandle = new AutoResetEvent(false);
484                 this.STA_Run(arg);
485                 arg.WaitHandle.WaitOne();
486                 return arg.OuterHtml;
487             }
488         }
489 
490         public Stream GetStream(Uri requestUrl, HttpRequestContent content = null)
491         {
492             return this.Client.GetStream(requestUrl, content);
493         }
494 
495         public void DownloadFile(Uri fileUrl, out string fileName)
496         {
497             this.Client.DownloadFile(fileUrl, out fileName);
498         }
499         #endregion
500 
501         #region Hepler
502         /// <summary>
503         /// 注入Script
504         /// </summary>
505         /// <param name="document"></param>
506         /// <param name="js"></param>
507         public void InjectScript(HtmlDocument document, string js)
508         {
509             Contract.Requires(document != null);
510 
511             if (!CheckDocument(document.Url))
512             {
513                 App.LogInfo("HttpBrowser InjectScript Cancel");
514                 return;
515             }
516             var head = document.GetElementsByTagName("head")[0];
517             var script = document.CreateElement("script");
518             script.SetAttribute("type", "text/javascript");
519             script.SetAttribute("text", js);
520             head.AppendChild(script);
521         }
522         private bool CheckDocument(Uri documentUrl)
523         {
524             if (documentUrl != null && documentUrl.OriginalString.StartsWith("res://ieframe.dll", StringComparison.OrdinalIgnoreCase))
525             {
526                 App.LogInfo("CheckDocument {0}", documentUrl);
527                 return false;
528             }
529             return true;
530         }
531 
532         /// <summary>
533         /// 设置ajax参数
534         /// </summary>
535         /// <param name="browser"></param>
536         private void SetAjax(WebBrowser browser, bool isEvent)
537         {
538             var arg = (STAContext)browser.ObjectForScripting;
539             if (arg.AjaxBlocks.IsNullOrEmpty())
540             {
541                 return;
542             }
543             foreach (var block in arg.AjaxBlocks.Where(p => p.IsEvent == isEvent))
544             {
545                 var node = browser.Document.GetElementById(block.ID);
546                 if (node == null)
547                 {
548                     continue;
549                 }
550                 arg.AjaxWaiter.AddCount();
551                 arg.AjaxMark(node, (sender, e) =>
552                 {
553                     node = browser.Document.GetElementById(block.ID);
554                     if (node == null || block.Text == null
555                         || (!block.Text.Equals(node.InnerText, StringComparison.OrdinalIgnoreCase)))
556                     {
557                         // bug 如果先Signal再AddCount就会出错
558                         arg.AjaxWaiter.Signal();
559                     }
560                 });
561             }
562             arg.AjaxWaiter.Signal();
563         }
564         /// <summary>
565         /// 等待ajax执行
566         /// </summary>
567         /// <param name="arg"></param>
568         private bool WaitAjax(STAContext arg)
569         {
570             if (arg.AjaxBlocks.IsNullOrEmpty())
571             {
572                 return false;
573             }
574             int aTimeout = this.SendReceiveTimeout;
575             if (aTimeout <= 0)
576             {
577                 aTimeout = (int)TimeSpan.FromSeconds(60d).TotalMilliseconds;
578             }
579             if (!arg.AjaxWaiter.Wait(aTimeout))
580             {
581                 App.LogInfo("HttpBrowser Ajax Timeout {0}", arg.RequestUrl);
582                 return false;
583             }
584             return true;
585         }
586 
587         private void ProcessAjaxEvent(WebBrowser browser)
588         {
589             var arg = (STAContext)browser.ObjectForScripting;
590             if (arg.AjaxEvent == null || string.IsNullOrEmpty(arg.AjaxEvent.ListenerSelector))
591             {
592                 return;
593             }
594 
595             arg.IsProcessEvent = true;
596             if (arg.AjaxEvent.EntryCall && arg.AjaxEvent.FinalCallback != null)
597             {
598                 InvokeHtml(browser);
599                 arg.AjaxEvent.FinalCallback(arg.OuterHtml);
600             }
601             object val = browser.Document.InvokeScript("Soubiscbot", new object[] { 0, arg.AjaxEvent.ListenerSelector });
602             var set = val.ToString().Split(',');
603             foreach (string id in set)
604             {
605                 var node = browser.Document.GetElementById(id);
606                 if (node == null)
607                 {
608                     continue;
609                 }
610                 arg.AjaxWaiter.Reset();
611                 SetAjax(browser, true);
612                 node.InvokeMember("click");
613                 bool isSet = WaitAjax(arg);
614                 Console.WriteLine("ProcessAjaxEvent isSet={0}", isSet);
615                 if (arg.AjaxEvent.FinalCallback != null)
616                 {
617                     InvokeHtml(browser);
618                     arg.AjaxEvent.FinalCallback(arg.OuterHtml);
619                 }
620             }
621             arg.IsProcessEvent = false;
622         }
623 
624         /// <summary>
625         /// 读取页面OuterHtml
626         /// </summary>
627         /// <param name="browser"></param>
628         /// <returns></returns>
629         private void InvokeHtml(WebBrowser browser)
630         {
631             var scripting = (STAContext)browser.ObjectForScripting;
632             if (scripting == null)
633             {
634                 throw new InvalidOperationException("InvokeHtml");
635             }
636             if (!scripting.DoInvokeHtml)
637             {
638                 return;
639             }
640             scripting.OuterHtml = (string)browser.Document.InvokeScript("Soubiscbot");
641         }
642         #endregion
643 
644         #region STAThread
645         private void STA_Run(STAContext context)
646         {
647             context._ReleaseMemory();
648             //context._Apartment = new MessageLoopApartment();
649             STA_Run(state =>
650             {
651                 var browser = new WebBrowser()
652                 {
653                     ScriptErrorsSuppressed = true,
654                     IsWebBrowserContextMenuEnabled = false,
655                     ObjectForScripting = state
656                 };
657                 browser.Navigating += browser_Navigating;
658                 browser.DocumentCompleted += browser_DocumentCompleted;
659                 browser.NewWindow += browser_NewWindow;
660                 if (this.Snapshot.HasValue)
661                 {
662                     browser.ScrollBarsEnabled = false;
663                     browser.Size = new Size(Screen.PrimaryScreen.WorkingArea.Width, 10240);
664                     browser.Show();
665                 }
666                 else
667                 {
668                     browser.Hide();
669                 }
670                 var arg = (STAContext)state;
671                 byte[] postData = null;
672                 string headers = null;
673                 if (arg.RequestContent != null)
674                 {
675                     if (this.UseCookies)
676                     {
677                         if (arg.RequestContent.HasCookie)
678                         {
679                             _cookieContainer.Add(arg.RequestUrl, arg.RequestContent.Cookies);
680                         }
681                         string cookieHeader = arg.RequestContent.Headers[HttpRequestHeader.Cookie];
682                         if (!string.IsNullOrEmpty(cookieHeader))
683                         {
684                             _cookieContainer.SetCookies(arg.RequestUrl, cookieHeader.Replace(';', ','));
685                             arg.RequestContent.Headers.Remove(HttpRequestHeader.Cookie);
686                         }
687                         cookieHeader = _cookieContainer.GetCookieHeader(arg.RequestUrl);
688                         if (cookieHeader.Length > 0)
689                         {
690                             arg.RequestContent.Headers[HttpRequestHeader.Cookie] = cookieHeader.Replace(',', ';');
691                         }
692                         //WinInetInterop.SaveCookies(_cookieContainer, absoluteUri);
693                     }
694                     else
695                     {
696                         arg.RequestContent.Headers[HttpRequestHeader.Cookie] = string.Empty;
697                         //WinInetInterop.DeleteCache(WinInetInterop.CacheKind.Cookies);
698                     }
699                     if (arg.RequestContent.HasBody)
700                     {
701                         arg.RequestContent.Headers[HttpRequestHeader.ContentType] = "application/x-www-form-urlencoded";
702                         postData = Encoding.UTF8.GetBytes(arg.RequestContent.GetFormString());
703                     }
704                     headers = arg.RequestContent.GetHeadersString();
705                 }
706                 browser.Navigate(arg.RequestUrl, "_self", postData, headers);
707 
708                 STA_Run(STA_Wait, browser, arg);
709                 //会阻塞当前线程
710                 Application.Run();
711             }, context, context);
712         }
713         private void STA_Wait(object state)
714         {
715             var browser = (WebBrowser)state;
716 #if DEBUG
717             App.LogInfo("STA_Wait {0}", browser.Url);
718 #endif
719             var arg = (STAContext)browser.ObjectForScripting;
720             try
721             {
722                 int srTimeout = this.SendReceiveTimeout;
723                 if (srTimeout > -1 && !arg.SendReceiveWaiter.WaitOne(srTimeout))
724                 {
725                     //请求超时
726                     browser.Invoke((Action)(() =>
727                     {
728                         if (browser.ReadyState != WebBrowserReadyState.Complete)
729                         {
730                             browser.Stop();
731                             App.LogInfo("HttpBrowser SendReceive Timeout {0}", arg.RequestUrl);
732                         }
733                     }));
734                 }
735                 WaitAjax(arg);
736             }
737             catch (Exception ex)
738             {
739                 App.LogError(ex, "HttpBrowser STA_Wait {0}", arg.RequestUrl);
740                 HandleException(ex);
741             }
742         }
743 
744         private void browser_NewWindow(object sender, System.ComponentModel.CancelEventArgs e)
745         {
746             var browser = (WebBrowser)sender;
747             var node = browser.Document.ActiveElement;
748             string link;
749             if (node != null && !string.IsNullOrEmpty(link = node.GetAttribute("href")))
750             {
751                 e.Cancel = true;
752                 browser.Navigate(link);
753             }
754         }
755         private void browser_Navigating(object sender, WebBrowserNavigatingEventArgs e)
756         {
757             var browser = (WebBrowser)sender;
758 #if DEBUG
759             App.LogInfo("browser_Navigating {0}", browser.Url);
760 #endif
761             var arg = (STAContext)browser.ObjectForScripting;
762             arg.DelayLazyLoad();
763         }
764         private void browser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
765         {
766             var browser = (WebBrowser)sender;
767 #if DEBUG
768             App.LogInfo("browser_DocumentCompleted {0}", browser.Url);
769 #endif
770             var arg = (STAContext)browser.ObjectForScripting;
771             try
772             {
773                 //e.Url不会变res:// 
774                 if (!CheckDocument(browser.Url))
775                 {
776                     App.LogInfo("HttpBrowser DocumentCompleted Cancel {0}", browser.Url);
777                     return;
778                 }
779                 if (browser.ReadyState != WebBrowserReadyState.Complete)
780                 {
781                     return;
782                 }
783 
784                 //发生redirect或iframe load
785                 if (browser.Url != e.Url)
786                 {
787                     App.LogInfo("HttpBrowser Redirect {0} to {1}", arg.RequestUrl, e.Url);
788                 }
789                 if (this.UseCookies)
790                 {
791                     WinInetInterop.LoadCookies(_cookieContainer, browser.Document.Url);
792                 }
793                 InjectScript(browser.Document, @"if (typeof ($) == 'undefined') {
794             var script = document.createElement('script');
795             script.src = 'http://libs.baidu.com/jquery/1.9.0/jquery.js';
796             document.getElementsByTagName('head')[0].appendChild(script);
797         }
798         function Soubiscbot(kind) {
799             switch (kind) {
800                 case 0:
801                     var set = [];
802                     $(arguments[1]).each(function (i, o) {
803                         var me = $(o);
804                         var id = me.attr('id');
805                         if (!id) {
806                             id = Math.random();
807                             me.attr('id', id);
808                         }
809                         set[i] = id;
810                     });
811                     return set.toString();
812                     break;
813                 case 1:
814                     try {
815                         return arguments[1]();
816                     }
817                     catch (ex) {
818                         return ex.toString();
819                     }
820                     break;
821                 default:
822                     return document.documentElement.outerHTML;
823                     break;
824             }
825         }");
826 
827                 if (this.SendReceiveTimeout > -1)
828                 {
829                     arg.SendReceiveWaiter.Set();
830                 }
831                 SetAjax(browser, false);
832                 if (_onLoad != null)
833                 {
834                     _onLoad(arg, browser.Document);
835                 }
836                 if (arg.IsRedirect)
837                 {
838                     STA_Run(STA_Wait, browser, arg);
839                 }
840                 else
841                 {
842                     arg.RegisterLazyLoad(x =>
843                     {
844                         var b = (WebBrowser)x;
845                         if (b.IsDisposed)
846                         {
847                             return;
848                         }
849                         b.Invoke((Action<WebBrowser>)ProcessAjaxEvent, b);
850                         b.Invoke((Action<object>)Callback, b);
851                     }, browser);
852                 }
853             }
854             catch (Exception ex)
855             {
856                 App.LogError(ex, "HttpBrowser DocumentCompleted RequestUrl={0} BrowserUrl={1}", arg.RequestUrl, browser.Url);
857                 HandleException(ex);
858             }
859         }
860 
861         private static void HandleException(Exception ex)
862         {
863             if (ex is OutOfMemoryException || ex is AccessViolationException)
864             {
865                 App.LogInfo("HttpBrowser auto exit {0}", ex.HResult);
866                 Environment.Exit(ex.HResult);
867             }
868         }
869         #endregion
870 
871         #region Callback
872         private void Callback(object state)
873         {
874             var browser = (WebBrowser)state;
875 #if DEBUG
876             App.LogInfo("Callback {0}", browser.Url);
877 #endif
878             var arg = (STAContext)browser.ObjectForScripting;
879             if (!Monitor.TryEnter(arg))
880             {
881                 return;
882             }
883             try
884             {
885 #warning HACK
886                 if (this.Snapshot.HasValue)
887                 {
888                     Thread.Sleep(4000);
889                 }
890                 browser.Invoke((Action)(() =>
891                 {
892                     if (this.Snapshot.HasValue)
893                     {
894                         //Guid fileID = CryptoManaged.MD5Hash(browser.Url.OriginalString);//browser.Url为ResponseUrl
895                         Guid fileID = Guid.NewGuid();
896                         var js = new StringBuilder();
897                         js.AppendFormat("document.body.setAttribute('{0}', '{1}');", Callback_Snapshot, fileID);
898                         js.Append(@"    window.addEventListener('load', function () {
899         window.scrollTo(0, document.documentElement.offsetHeight);
900     });
901 ");
902                         browser.Document.InvokeScript("eval", new object[] { js.ToString() });
903                         string savePath = Path.Combine(this.SaveFileDirectory, string.Format("{0}.png", fileID));
904                         try
905                         {
906                             var shotSize = this.Snapshot.Value == Size.Empty ? browser.Document.Body.ScrollRectangle.Size : this.Snapshot.Value;
907                             browser.Size = shotSize;
908                             using (var img = new Bitmap(browser.Width, browser.Height))
909                             {
910                                 //browser.DrawToBitmap(img, new Rectangle(Point.Empty, img.Size));
911                                 NativeMethods.DrawTo(browser.ActiveXInstance, img, Color.White);
912                                 img.Save(savePath, System.Drawing.Imaging.ImageFormat.Png);
913                                 App.LogInfo("xSnapshot {0} {1}", browser.Url, savePath);
914                             }
915                         }
916                         catch (Exception ex)
917                         {
918                             App.LogError(ex, "xSnapshot {0} {1}", browser.Url, savePath);
919                         }
920                     }
921                     InvokeHtml(browser);
922                 }));
923             }
924             catch (Exception ex)
925             {
926                 App.LogError(ex, "HttpBrowser Callback {0}", arg.RequestUrl);
927                 HandleException(ex);
928             }
929             finally
930             {
931                 Monitor.Exit(arg);
932                 STA_Exit(browser);
933             }
934         }
935 
936         /// <summary>
937         /// !重要! 退出STAUI线程
938         /// </summary>
939         private void STA_Exit(WebBrowser browser)
940         {
941 #if DEBUG
942             App.LogInfo("STA_Exit {0}", browser.Url);
943 #endif
944             RestoreSystemProxy();
945             var arg = (STAContext)browser.ObjectForScripting;
946             if (arg.WaitHandle != null)
947             {
948                 arg.WaitHandle.Set();
949             }
950             try
951             {
952                 browser.Stop();
953                 arg.AjaxUnmarks();
954                 //arg._Apartment.Dispose();
955                 browser.Invoke((Action)(() => Application.ExitThread()));
956                 browser.Dispose();
957             }
958             catch (SystemException ex)
959             {
960                 //AccessViolationException
961                 //InvalidComObjectException
962                 App.LogError(ex, "HttpBrowser STA_Exit {0}", arg.RequestUrl);
963             }
964         }
965         #endregion
966     }
967 }
HttpBrowser
#region Spawned Process
        public bool IsSpawned { get; set; }

        internal string SpawnedStart(EndPoint proxy, Uri requestUrl, HttpRequestContent content)
        {
#if DEBUG
            App.LogInfo("SpawnedStart: Proxy={0}	Url={1}", proxy, requestUrl);
#endif
            bool hasValue = content != null;
            var stream = Serializer.Serialize(Tuple.Create(proxy, requestUrl,
                hasValue ? content.Headers : null,
                hasValue ? content.Form : null));
            RestoreSystemProxy();
            string[] args = Environment.GetCommandLineArgs();
            string arg = string.Format("x#{0}", Convert.ToBase64String(stream.ToArray()));
            var proc = Process.Start(new ProcessStartInfo(args[0], arg)
            {
                RedirectStandardOutput = true,
                UseShellExecute = false,
            });
            string html = proc.StandardOutput.ReadToEnd();
            if (!proc.WaitForExit(120 * 1000))
            {
                proc.Kill();
            }
            proc.Close();
            return html;
        }

        public static bool SpawnedMain()
        {
            string[] args = Environment.GetCommandLineArgs();
            if (!(args.Length > 1 && args[1].StartsWith("x#")))
            {
                return false;
            }
            var stream = new MemoryStream(Convert.FromBase64String(args[1].Substring(2)));
            var arg = (Tuple<EndPoint, Uri, WebHeaderCollection, NameValueCollection>)Serializer.Deserialize(stream);
            var client = (IHttpClient)new HttpBrowser();
            if (arg.Item1 != null)
            {
                client.SetProxy(arg.Item1);
            }
            string html = client.GetHtml(arg.Item2, new HttpRequestContent()
            {
                Headers = arg.Item3,
                Form = arg.Item4
            });
            Console.WriteLine(html);
            return true;
        }
        #endregion
原文地址:https://www.cnblogs.com/Googler/p/3850443.html