similarity 字符串编辑距离相似度匹配

Step Description
1 Set n to be the length of s.
  Set m to be the length of t.
  If n = 0, return m and exit.
  If m = 0, return n and exit.
  Construct a matrix containing 0 .. m rows and 0 .. n columns.
2 Initialize the first row to 0 .. n.
  Initialize the first column to 0 .. m.
3 Examine each character of s (i from 1 to n).
4 Examine each character of t (j from 1 to m).
5 If s [i] equals t [j], the cost is 0.
  If s [i] doesn't equal t [j], the cost is 1.
6 Set cell d [i, j] of the matrix equal to the minimum of:
  a. The cell immediately above plus 1: d [i-1, j] + 1.
  b. The cell immediately to the left plus 1: d [i, j-1] + 1.
  c. The cell diagonally above and to the left plus the cost: d [i-1, j-1] + cost.
7 After the iteration steps (3, 4, 5, 6) are complete, the distance is found in cell d [n, m].





/// <summary>  
/// 编辑距离(Levenshtein Distance)  
/// </summary>  
/// <param name="source">源串</param>  
/// <param name="target">目标串</param>  
/// <param name="similarity">输出:相似度,值在0~1</param>  
/// <param name="isCaseSensitive">是否大小写敏感</param>  
/// <returns>源串和目标串之间的编辑距离</returns>  
public static Int32 LevenshteinDistance(String source, String target, out Double similarity, Boolean isCaseSensitive = false)  
{  
    if (String.IsNullOrEmpty(source))  
    {  
        if (String.IsNullOrEmpty(target))  
        {  
            similarity = 1;  
            return 0;  
        }  
        else  
        {  
            similarity = 0;  
            return target.Length;  
        }  
    }  
    else if (String.IsNullOrEmpty(target))  
    {  
        similarity = 0;  
        return source.Length;  
    }  
  
    String From, To;  
    if (isCaseSensitive)  
    {   // 大小写敏感  
        From = source;  
        To = target;  
    }  
    else  
    {   // 大小写无关  
        From = source.ToLower();  
        To = target.ToLower();  
    }  
  
    // 初始化  
    Int32 m = From.Length;  
    Int32 n = To.Length;  
    Int32[,] H = new Int32[m + 1, n + 1];  
    for (Int32 i = 0; i <= m; i++) H[i, 0] = i;  // 注意:初始化[0,0]  
    for (Int32 j = 1; j <= n; j++) H[0, j] = j;  
  
    // 迭代  
    for (Int32 i = 1; i <= m; i++)  
    {  
        Char SI = From[i - 1];  
        for (Int32 j = 1; j <= n; j++)  
        {   // 删除(deletion) 插入(insertion) 替换(substitution)  
            if (SI == To[j - 1])  
                H[i, j] = H[i-1, j-1];                      
            else  
                H[i, j] = Math.Min(H[i-1, j-1], Math.Min(H[i-1, j], H[i, j-1])) + 1;                      
        }  
    }  
  
    // 计算相似度  
    Int32 MaxLength = Math.Max(m, n);   // 两字符串的最大长度  
    similarity = ((Double)(MaxLength - H[m, n])) / MaxLength;  
  
    return H[m, n];    // 编辑距离  
}  
 
  1. public static void CheckEditDistance()  
  2. {  
  3.     Int32 Distance;  
  4.     Double Similarity;  
  5.   
  6.     while (true)  
  7.     {  
  8.         Console.WriteLine("------------------------------------");  
  9.         Console.Write("源串 = ");  
  10.         String Source = Console.ReadLine();  
  11.         if (Source == "q") break;  
  12.   
  13.         Console.Write("目标串 = ");  
  14.         String Target = Console.ReadLine();  
  15.   
  16.         Distance = LevenshteinDistance(Source, Target, out Similarity, true);  
  17.         Console.WriteLine("编辑距离 = " + Distance.ToString());  
  18.         Console.WriteLine("相似度 = " + Similarity.ToString("0.####"));  
  19.     }  
  20. }  



 
改变自己,成长自己
原文地址:https://www.cnblogs.com/xxh-2014/p/8397914.html