URL去重

import socket

dictlist ={};

def ReadHost():
    hosts = [];
    obn = open('d:/sss.txt', 'rb');
    for line in obn:
        #sometime you should filter 

        line = line.strip('
')
        hosts.append(line)
    obn.close();
    return hosts;

def SysDNS():
    hosts = ReadHost();

    for host in hosts:
        #print(host)
        try:
            myaddrs = socket.getaddrinfo(host,None)
            for eachaddr in myaddrs:
    	        addrs = eachaddr[4][0]
                #print((addrs))
                if(dictlist.has_key(addrs)):
                    break;
                else:
                    dictlist[addrs] = host;
                    #print(host)
                    break;
        except socket.herror,e:
            continue;
        except socket.gaierror,e1:
            continue;



def showDict():
    fw = open("d:/out.txt","wb");
    for (k,v) in dictlist.items():
        #print(k,v)
        fw.writelines(v);
    fw.close();

if __name__ == "__main__":
    SysDNS();
    showDict();

  

原文地址:https://www.cnblogs.com/xiaobaichuangtianxia/p/3794453.html