获取全球dns统计信息

 1 # -*- coding:UTF-8 -*-
 2 import requests, time
 3 import json
 4 from bs4 import BeautifulSoup as bp
 5 
 6 t3 = time.time()
 7 ths = []  # 存放线程
 8 
 9 
10 def get(num):
11     n = str(num)
12     page = requests.post('http://www.employees.org/~dwing/aaaa-stats/',
13                          )  # 自定义请求头,这些请求头内容是在浏览器上看到的
14 
15     t = page.text
16     # print(t)
17     soup = bp(t, 'lxml')  # 使用beautifulsoup解析xml文件,解析html时,将xml改为lxml
18     all_body = soup.find_all('tr')  # 查找EmailResult标签包含的所有内容,生成一个列表
19     for info in all_body:
20         if not info:continue
21         tds = info.find_all('td')
22         one_row = []
23         for i in tds:
24             if not i:continue
25             for infos in i:
26                 if not infos:continue
27                 try:
28                     nums = infos.text  #所有的数字
29                     if not nums:continue
30                     nums = nums.strip().strip('
')
31                     # if len(nums) >= 13:continue
32                     if len(nums) == 12:
33                         total_checked = nums[0:5]
34                         with_a_record = nums[5:]
35                         one_row.append(total_checked)
36                         one_row.append(with_a_record)
37                     else:
38                         one_row.append(nums)
39                         # print(nums,'aaa')
40                 except:
41                     one_row.append(infos)
42                     # print(infos, 'lll') # 百分数
43 
44         if '
' in one_row:
45             one_row.remove('
')
46         if 'diffs' in one_row:
47             one_row.remove('diffs')
48         if '(large run)' in one_row:
49             continue
50         else:
51             if one_row:
52                 if len(one_row) == 16:
53                     all_data = {}
54                     all_data['date'] = one_row[0]
55                     all_data['total_checked'] = one_row[1]
56                     all_data['with_A_records_count'] = one_row[2]
57                     all_data['with_A_records_rate'] = one_row[3]
58                     all_data['with_AAAA_records_count'] = one_row[4]
59                     all_data['with_AAAA_records_rate'] = one_row[5]
60                     all_data['AAAA_with_IPv4-mapped_count'] = one_row[6]
61                     all_data['AAAA_with_IPv4-mapped_rate'] = one_row[7]
62                     all_data['AAAA_with_loopback_count'] = one_row[8]
63                     all_data['AAAA_with_loopback_rate'] = one_row[9]
64                     all_data['valid_AAAA_records_count'] = one_row[10]
65                     all_data['valid_AAAA_records_rate'] = one_row[11]
66                     all_data['IPv6_connection_ok_count'] = one_row[12]
67                     all_data['IPv6_connection_ok_rate'] = one_row[13]
68                     all_data['IPv6_connection_failed_count'] = one_row[14]
69                     all_data['IPv6_connection_failed_rate'] = one_row[15]
70                     # print(one_row)
71                     ddd = json.dumps(all_data, indent=2,
72                                      ensure_ascii=False)  # ensure_ascii=False :防止将文字转成unicoe
73 
74                     with open('dns_status.txt', 'a+') as f:
75                         f.write(ddd)
76             # print(i.text,'lll')
77 
78 
79 get(1)
80 
81 t4 = time.time()
82 tt = t4 - t3
83 print(tt)

结果见github

原文地址:https://www.cnblogs.com/wt11/p/7765293.html