Rendered/interactive javascript with gtk/webkit/jswebkit

Scrapy snippets: Rendered/interactive javascript with gtk/webkit/jswebkit

Rendered/interactive javascript with gtk/webkit/jswebkit

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
from scrapy.http import Request, FormRequest, HtmlResponse

import gtk
import webkit
import jswebkit

class WebkitDownloader( object ):
    def process_request( self, request, spider ):
        if( type(request) is not FormRequest ):
            webview = webkit.WebView()
            webview.connect( 'load-finished', lambda v,f: gtk.main_quit() )
            webview.load_uri( request.url )
            gtk.main()
            js = jswebkit.JSContext( webview.get_main_frame().get_global_context() )
            renderedBody = str( js.EvaluateScript( 'document.documentElement.innerHTML' ) )
            return HtmlResponse( request.url, body=renderedBody )
原文地址:https://www.cnblogs.com/lexus/p/2374909.html