初练pandas实现数据处理

import urllib.request;
from pandas import DataFrame;
from pandas import Series;
from bs4 import BeautifulSoup;

response = urllib.request.urlopen('file:///F:/python/untitled1/core/do_data/2month.html');
html = response.read();
soup = BeautifulSoup(html,"html.parser")
trs = soup.find_all('tr')
ths = trs[0].find_all('th');

index_d = []
for th in ths:
    index_d.append(th.getText())
data = DataFrame(columns=index_d)
print(index_d)

for tr in trs :
    tds = tr.find_all('td')
    td_datas = []
    for td in tds:
        td_datas.append(td.getText())
    if len(td_datas) != 0:
        data=data.append(
            Series(
                td_datas,
                index=index_d
            ), ignore_index=True
        )

print(len(data))

str2s = []

for i in range(len(data["股票全码"])):
    str2 =str(data["股票全码"][i])
    str2 = str2.replace("SZ","0|")
    str2 = str2.replace("SH","1|")
    str2 = str2 + "|" + data["涨停时间"][i] +" "+ data["历史涨停原因"][i] +" "+ data["涨停选原因"][i]
    str2s.append(str2)

data["new"] = str2s
data=data.drop_duplicates(subset=['股票代码'],keep='last',inplace=False)
print(len(data))
df2 = data["new"].values
#print(type(df2))

file = open('data.txt', 'w')
file.writelines("
".join(df2));
file.close()
原文地址:https://www.cnblogs.com/rongye/p/12466584.html