词表数据转换

将adx-campid 格式的数据专为 adx-campids 格式的数据

#将black_campaign_list.txt 
#生成的adx 单子黑名单专为 adx campids 集合

dicts = Hash.new {|dicts, key| dicts[key] = [] }
# key adx  value campids 
File.open("black_campaign_list.txt").each do |line|
arr = line.split("	")
if arr.length <2  
    next
end



campid = arr[0]
adxs =  arr[1].gsub("
","").split(",")

if campid.length >100
    puts "<====> unlegal campid #{campid}"
  next 
end
#puts "adx is : #{adxs} "
adxs.each do |adx|
  if campid.length >20
      next 
  end
  dicts[adx]  << campid
end
 
end


puts " dicts.keys adx is: " , dicts.keys
dicts.keys.each do |k|
v = dicts[k].uniq
if v != nil?
    puts  k , v.length
else
    puts "v is nil ,key is #{v }"
end

end


begin
  file = File.open("blackCampagin.txt", "w")
  dicts.each do |k ,v | 
      v = v.uniq
      if  k!= nil && v != nil 
      str =k +"	" + v.join(",")
    file.puts(str) 
    else 
       puts "output k is : #{k } v is #{v}"
    end
  end

  file.close 

rescue IOError => e

  #some error occur, dir not writable etc.
ensure
  #file.close unless file.nil?
end
原文地址:https://www.cnblogs.com/lavin/p/8405091.html