91家纺网，二次更新

"""

抓取

解析

存储

"""

import re

#import ast

from urllib import parse

from datetime import datetime

import requests

from scrapy import Selector

domain = "http://www.91jf.com/"

#函数用来保存写入测试文本

'''

def write_txt():

f = open("a.txt", 'w')

f.write("hello world")

f.close()

'''

def get_nodes_json():

left_menu_text = requests.get("http://www.91jf.com/").text

sel = Selector(text=left_menu_text)

all_divs = sel.xpath("//div[@class='class_child_li']//a[@href]").extract()

#all_divs = sel.xpath("//div[@class='class_menu']//a[@href]")

#all_divs = selector.xpath("//div[@class='class_child_li']//li//span/text()")

#nodes_str_match = re.search(r"(<li><a href=".*>)", left_menu_text)#此处测试正则匹配的代码，没有调通

#print(all_divs)#

if all_divs:

nodes_lists = []

for i in range(len(all_divs)):

nodes_str = all_divs[i]

print(nodes_str)

nodes_lists.append(nodes_str)

return nodes_lists

return []

if __name__ == "__main__":

get_nodes_json()