w
# -*- coding: utf-8 -*- def segment(text, segs): words = [] last = 0 for i in range(len(segs)): if segs[i] == '1': words.append(text[last:i+1]) last = i+1 words.append(text[last:]) return words def evaluate(text, segs): words = segment(text, segs) print words text_size = len(words) print text_size lexicon_size = len(' '.join(list(set(words)))) print set(words) print list(set(words)) print ' '.join(list(set(words))) print lexicon_size size = text_size + lexicon_size print size return size text = "doyouseethekittyseethedoggydoyoulikethekittylikethedoggy" seg1 = "0000000000000001000000000010000000000000000100000000000" seg2 = "0100100100100001001001000010100100010010000100010010000" seg3 = "0000100100000011001000000110000100010000001100010000001" #evaluate(text, seg1) evaluate(text, seg2) #evaluate(text, seg3)