【转载】计算汉字相似程度

package com.thunisoft.in4.cm.associate.util;

import java.util.List;
import java.util.Map;

/**
 * 编辑距离算法
 * 
 * @author fanxf
 * 
 */
public class HanziParse {
    
    /**
     * 计算矢量距离 Levenshtein Distance(LD)
     * 
     * @param str1
     *            str1
     * @param str2
     *            str2
     * @return ld; 0:完全相等,非零代表相似距離
     */
    private static int distance(String str1, String str2) {

        int n = str1.length();
        int m = str2.length();
        if (n == 0) {
            return m;
        }
        if (m == 0) {
            return n;
        }
        /** 二维数组,存放中间距离 */
        int[][] distance = new int[n + 1][m + 1];
        /** 初始化数据 */
        int i = 0;
        for (i = 0; i <= n; i++) {
            distance[i][0] = i;
        }
        int j = 0;
        for (j = 0; j <= m; j++) {
            distance[0][j] = j;
        }
        /** 计算中间数据 */
        int cost = 0;
        for (i = 1; i <= n; i++) {
            char ch1 = str1.charAt(i - 1);
            for (j = 1; j <= m; j++) {
                char ch2 = str2.charAt(j - 1);
                if (ch1 == ch2) {
                    cost = 0;
                } else {
                    cost = 1;
                }
                /** 获取最小值 */
                distance[i][j] = min(distance[i - 1][j] + 1,
                        distance[i][j - 1] + 1, distance[i - 1][j - 1] + cost);
            }
        }
        return distance[n][m];
    }

    /**
     * 计算最小值
     * 
     * @param one
     * @param two
     * @param three
     * @return 返回最小值
     */
    private static int min(int one, int two, int three) {
        int min = one;
        if (two < min) {
            min = two;
        }
        if (three < min) {
            min = three;
        }
        return min;
    }

    /**
     * 计算相似度
     * 
     * @param str1
     *            str1 待匹配数据
     * @param str2
     *            str2 待匹配数据
     * @return sim,匹配相似度
     */
    public static float similarity(String str1, String str2) {
        int ld = distance(str1, str2);
        return 1 - (float) ld / Math.max(str1.length(), str2.length());
    }


    /**
     * 测试
     * 
     * @param args
     */
    public static void main(String[] args) {
         HanziParse ld = new HanziParse();
         float num = ld.similarity("vvv", "vvv");
         System.out.println(num);

    }
}
原文地址:https://www.cnblogs.com/zhangxsh/p/3494535.html