手动实现DPA算法——添加随机森林

再做了词对齐之后，就开始尝试着做依存的映射，总的情况归纳为以下6种：

以源语言英语作为观测对象，有以下四种情况，其中align_item(m,n)m表示孩子结点对齐的单词的个数，n表示父亲结点对应的单词个数，二者顺序可以颠倒：

（1）一对一（依存关系组中的孩子结点和父亲结点各自只对应一个目标语言单词）。即（1,1）

（2）对空现象，有一个结点找不到目标语言单词与之对应（0,1）

（3）一对多，有一个结点对应的目标语言单词总数大于1，即（n，1）

（4）多对多，两个结点各自对应的目标语言单词总数均大于1，即（n，n）尚未解决，目前并不知道如何拆分。

以目标语言汉语作为观测对象，有以下两种情况:

(1)一对多，即一个汉语单词对应多个源语言英语单词，即（n，1）

(2)对空，即某个汉语单词没有对应任何的源语言英文单词，即（0），添加成依存森林树。

输入为词对齐矩阵，输出为依存关系元组。以下是代码的实现过程：

# -*- coding: utf-8 -*-
import codecs
def make_dpa():
    #input_file = codecs.open("align_matrix", 'r', encoding='utf-8')
    output_file = codecs.open('projectedItem', 'w', encoding='utf-8')
    matrix = align.get_matrix()
    for i in range(len(matrix)):
        for col in range(len(matrix[i][0])):
            #先以源语言作为研究对象，遍历对齐矩阵
            if matrix[i][row][col] != '0':
                m = 0
                n = 0
                child_list = []
                parent_list = []
                pair = matrix[i][row][col].strip().split()
                parent_col = int(pair[1])
                row = 1
                #遍历孩子结点所在列
                while row < len(matrix[i]):
                    if matrix[i][row][col] = '1':
                        m += 1
                        child_list.append(matrix[i][row][0])
                row = 1
                #遍历父亲结点所在列
                while row < len(matrix[i]):
                    if matrix[i][row][parent_col] = '1':
                        n += 1
                        #父亲结点只需存储行号
                        parent_list.append(row)
                #开始进行对齐结果的判定(one to one等)
                #sDependcy_list表示源语言依存关系元组
                source_makechoice(m,n,child_list,parent_list,sDependcy_list)
        #开始以目标语言为研究对象，遍历对齐矩阵
        count = 0
        for row in range(len(matrix[i])):
            if matrix[i][row][0] != '0':
                col = 1
                while col < matrix[i][row]:
                    if matrix[i][row][col] = '1'
                        count += 1
            target_makechoice(count)


def source_makechoice(input1,input2,clist1,plist2,dlist3):
    if input1 == 1 and input2 == 1:
        depencyItem = '(' + clist1[0] + ',' + plist2[0] + ')'
    elif input1 == 0 and input2 == 1:
        depencyItem = '(' + 'null' + ',' + plist2[0] + ')'
    elif input1 == 1 and input2 == 0:
        depencyItem = '(' + clist1[0] + ',' + 'null' + ')'
    elif input1 == n and input2 == 1:
        print('待解决')
    elif input1 == 1 and input2 == n:
        print('待解决')
    elif input1 == n and input2 == n:
        print('待解决')


def target_makechoice(count):