Tkinter 之爬虫框架项目实战

一、效果图

 

 

 

 

 

 二、源码

    ''' 测试内容页爬取'''
    def test_content_url(self):
        try:
            url = self.test_url_var.get().strip()
            items = self.content_tree.get_children('')
            content = self.get_html(url)
            content_dict = {}
            self.test_text.delete(1.0, END)
            for item in items:
                value = self.content_tree.item(item).get('values')
                if value[4] == 0:
                    print(value)
                    if value[5] == 0:
                        # substring
                        return_value = self.deal_with_sustring(content, value[1], value[2])
                        
                        if value[6]:
                            return_value = self.request_again(url, return_value, value[6])
                        if value[7]:
                            exec_content = value[7].format(return_value)
                            return_value = self.deal_with_python(exec_content)
                            return_value =  self.c
                        content_dict[value[0]] = return_value
                        self.test_text.insert(END, value[0] + ': ' + return_value + '
')  
                    else:
                        # re
                        pattern = re.findall(value[3], content, re.I|re.M)
                        if pattern:
                            pattern_value = pattern[0]
                        else:
                            pattern_value = ''
                        if value[6]:
                            pattern_value = self.request_again(url, pattern_value, value[6])
                        if value[7]:
                            exec_content = value[7].format(pattern_value)
                            return_value = self.deal_with_python(exec_content)
                        self.test_text.insert(END, value[0] + ': ' + pattern_value + '
')
                        content_dict[value[0]] = pattern_value
                else:
                    print('%s在列表页提取' % value[0])
            print(content_dict)
        except Exception as e:
            print(e)
            self.test_text.insert(END, '错误信息:' + str(e))

  

 有需要源码的可以评论哦~

原文地址:https://www.cnblogs.com/yang-2018/p/11983715.html