UWP获取任意网页加载完成后的HTML

主要思想:通过后台WebView载入指定网页,再提取出WebView中的内容

关键代码:

var html = await webView.InvokeScriptAsync("eval", new string[] { "document.documentElement.outerHTML;" });

有一个很简单的思路,

订阅WebView NavigationCompleted事件,然后让Navigate到指定的网址,发生事件时执行这行代码

除此之外,这里还有一个异步的方法,用到了TaskCompletionSource这个东西

首先,创建一个TaskCompletionSource:

TaskCompletionSource<string> completionSource = new TaskCompletionSource<string>();

因为返回的东西是string(html),所以泛型T设置成string

然后使用lambda的形式订阅Navigation事件:

 1 webView.NavigationCompleted += async (sender, args) =>
 2 {
 3     if (args.Uri != uri)
 4         return;
 5     await Task.Delay(200);
 6     var html = await sender.InvokeScriptAsync("eval", new string[] { "document.documentElement.outerHTML;" });
 7     webView.NavigateToString("");
 8     webView = null;
 9     completionSource.SetResult(html);
10 };

Line5的延迟200ms,是为了Navigation完成之后再给页面里的其他一些元素(比如一些js脚本)一些加载的时间(讲道理订阅事件里也应该写一个的)

Line7的导航到空是为了防止WebView里的东西继续运行从而导致一些灵异事件(尤其是一些带视频的网页,咳咳)

Line9,给Task设置个Result,await就会结束

最后:

1 return completionSource.Task;

封装成类:

    public class WebHelper
    {
        public class WebLoadedArgs:EventArgs
        {
            public bool Success { get; private set; }
            public WebErrorStatus WebErrorStatus { get; private set; }
            public string Html { get; private set; }

            public WebLoadedArgs(WebErrorStatus webErrorStatus)
            {
                WebErrorStatus = webErrorStatus;
                Success = false;
            }

            public WebLoadedArgs(string Html,WebErrorStatus webErrorStatus)
            {
                this.Html = Html;
                WebErrorStatus = webErrorStatus;
                Success = true;
            }
        }

        public string Url { get; private set; }
        public event EventHandler<WebLoadedArgs> WebLoaded;
        private WebView webView;

        public WebHelper(string Url)
        {
            this.Url = Url;
            webView = new WebView(WebViewExecutionMode.SeparateThread);
            webView.Navigate(new Uri(Url));
            webView.NavigationCompleted += WebView_NavigationCompleted;
            webView.NavigationFailed += WebView_NavigationFailed;
        }

        private void WebView_NavigationFailed(object sender, WebViewNavigationFailedEventArgs e)
        {
            WebLoaded(this, new WebLoadedArgs(e.WebErrorStatus));
        }

        private async void WebView_NavigationCompleted(WebView sender, WebViewNavigationCompletedEventArgs args)
        {
            
            var html = await sender.InvokeScriptAsync("eval", new string[] { "document.documentElement.outerHTML;" });
            webView = null;
            WebLoaded(this, new WebLoadedArgs(html,args.WebErrorStatus));
        }

        /// <summary>
        /// 异步实现获取Web内容
        /// </summary>
        /// <param name="Url">网址</param>
        /// <param name="TimeOut">超时时间</param>
        /// <returns>Web的Html内容</returns>
        public static Task<string> LoadWebAsync(string Url,int Timeout)
        {
            return LoadWebAsync(Url, "", Timeout);
        }


        /// <summary>
        /// 异步实现获取Web内容
        /// </summary>
        /// <param name="Url">网址</param>
        /// <param name="Referer">Header[Referer],用以解决一些盗链效验</param>
        /// <param name="TimeOut">超时时间</param>
        /// <returns>Web的Html内容</returns>
        public static Task<string> LoadWebAsync(string Url,string Referer, int TimeOut)
        {
            
            WebView webView = new WebView(WebViewExecutionMode.SeparateThread);
            Uri uri = new Uri(Url);
            HttpRequestMessage requestMessage = new HttpRequestMessage(HttpMethod.Get, uri);
            requestMessage.Headers.Add("Referer", Referer);
            webView.NavigateWithHttpRequestMessage(requestMessage);

            TaskCompletionSource<string> completionSource = new TaskCompletionSource<string>();
            webView.NavigationCompleted += async (sender, args) =>
            {
                if (args.Uri != uri)
                    return;
                await Task.Delay(200);
                var html = await sender.InvokeScriptAsync("eval", new string[] { "document.documentElement.outerHTML;" });
                webView.NavigateToString("");
                webView = null;
                completionSource.SetResult(html);
            };
            webView.NavigationFailed += (sender, args) =>
            {
                webView = null;
                completionSource.SetException(new WebException("", (WebExceptionStatus)args.WebErrorStatus));
            };
            DispatcherTimer timer = new DispatcherTimer();
            timer.Interval = TimeSpan.FromSeconds(TimeOut);
            timer.Tick += (sender, args) =>
            {
                timer = null;
                webView.NavigateToString("");
                webView = null;
                completionSource.SetException(new TimeoutException());
            };
            timer.Start();

            return completionSource.Task;
        }
    }

使用方法:

(事件订阅的方式)

        WebHelper webHelper = new WebHelper("http://www.baidu.com/");
        webHelper.WebLoaded += WebHelper_WebLoaded;

        private void WebHelper_WebLoaded(object sender, WebHelper.WebLoadedArgs e)
        {
            if(e.Success)
            {
                var html = e.Html;
            }
        }

(异步的方式)

var html = await WebHelper.LoadWebAsync("http://www.baidu.com", 120);
原文地址:https://www.cnblogs.com/loyieking/p/9209476.html