package com.thunisoft.in4.cm.associate.util; import java.util.List; import java.util.Map; /** * 编辑距离算法 * * @author fanxf * */ public class HanziParse { /** * 计算矢量距离 Levenshtein Distance(LD) * * @param str1 * str1 * @param str2 * str2 * @return ld; 0:完全相等,非零代表相似距離 */ private static int distance(String str1, String str2) { int n = str1.length(); int m = str2.length(); if (n == 0) { return m; } if (m == 0) { return n; } /** 二维数组,存放中间距离 */ int[][] distance = new int[n + 1][m + 1]; /** 初始化数据 */ int i = 0; for (i = 0; i <= n; i++) { distance[i][0] = i; } int j = 0; for (j = 0; j <= m; j++) { distance[0][j] = j; } /** 计算中间数据 */ int cost = 0; for (i = 1; i <= n; i++) { char ch1 = str1.charAt(i - 1); for (j = 1; j <= m; j++) { char ch2 = str2.charAt(j - 1); if (ch1 == ch2) { cost = 0; } else { cost = 1; } /** 获取最小值 */ distance[i][j] = min(distance[i - 1][j] + 1, distance[i][j - 1] + 1, distance[i - 1][j - 1] + cost); } } return distance[n][m]; } /** * 计算最小值 * * @param one * @param two * @param three * @return 返回最小值 */ private static int min(int one, int two, int three) { int min = one; if (two < min) { min = two; } if (three < min) { min = three; } return min; } /** * 计算相似度 * * @param str1 * str1 待匹配数据 * @param str2 * str2 待匹配数据 * @return sim,匹配相似度 */ public static float similarity(String str1, String str2) { int ld = distance(str1, str2); return 1 - (float) ld / Math.max(str1.length(), str2.length()); } /** * 测试 * * @param args */ public static void main(String[] args) { HanziParse ld = new HanziParse(); float num = ld.similarity("vvv", "vvv"); System.out.println(num); } }