代码12

此代码是结合代码11的

需要将正例即为1的query减少一半

f0 = open('/home/xbwang/croschangetrain/data/sick/train/a.toks','r')
f1 = open('/home/xbwang/croschangetrain/data/sick/train/b.toks','r')
f2 = open('/home/xbwang/croschangetrain/data/sick/train/sim.txt','r')
a = f0.readlines()
b = f1.readlines()
c = f2.readlines()
length = len(a)
lis = []
for i in range(length):
    lis.append(a[i]+'==='+b[i]+'==='+c[i])
num = 0
for j in range(4500):
    label = lis[j].split('===')[2]
    if (num == 2001):
        break
    if (label == '1
'):
        lis.pop(j)
        num = num+1
f3 = open('/home/xbwang/Desktop/a','a')
f4 = open('/home/xbwang/Desktop/b','a')
f5 = open('/home/xbwang/Desktop/c','a')
length1 = len(lis)
for k in range(length1):
    tag = lis[k].split('===')
    f3.write(tag[0])
    f4.write(tag[1])
    f5.write(tag[2])
原文地址:https://www.cnblogs.com/ymjyqsx/p/6402321.html