...

Source file src/github.com/agnivade/levenshtein/levenshtein.go

Documentation: github.com/agnivade/levenshtein

     1  // Package levenshtein is a Go implementation to calculate Levenshtein Distance.
     2  //
     3  // Implementation taken from
     4  // https://gist.github.com/andrei-m/982927#gistcomment-1931258
     5  package levenshtein
     6  
     7  import "unicode/utf8"
     8  
     9  // minLengthThreshold is the length of the string beyond which
    10  // an allocation will be made. Strings smaller than this will be
    11  // zero alloc.
    12  const minLengthThreshold = 32
    13  
    14  // ComputeDistance computes the levenshtein distance between the two
    15  // strings passed as an argument. The return value is the levenshtein distance
    16  //
    17  // Works on runes (Unicode code points) but does not normalize
    18  // the input strings. See https://blog.golang.org/normalization
    19  // and the golang.org/x/text/unicode/norm package.
    20  func ComputeDistance(a, b string) int {
    21  	if len(a) == 0 {
    22  		return utf8.RuneCountInString(b)
    23  	}
    24  
    25  	if len(b) == 0 {
    26  		return utf8.RuneCountInString(a)
    27  	}
    28  
    29  	if a == b {
    30  		return 0
    31  	}
    32  
    33  	// We need to convert to []rune if the strings are non-ASCII.
    34  	// This could be avoided by using utf8.RuneCountInString
    35  	// and then doing some juggling with rune indices,
    36  	// but leads to far more bounds checks. It is a reasonable trade-off.
    37  	s1 := []rune(a)
    38  	s2 := []rune(b)
    39  
    40  	// swap to save some memory O(min(a,b)) instead of O(a)
    41  	if len(s1) > len(s2) {
    42  		s1, s2 = s2, s1
    43  	}
    44  	lenS1 := len(s1)
    45  	lenS2 := len(s2)
    46  
    47  	// Init the row.
    48  	var x []uint16
    49  	if lenS1+1 > minLengthThreshold {
    50  		x = make([]uint16, lenS1+1)
    51  	} else {
    52  		// We make a small optimization here for small strings.
    53  		// Because a slice of constant length is effectively an array,
    54  		// it does not allocate. So we can re-slice it to the right length
    55  		// as long as it is below a desired threshold.
    56  		x = make([]uint16, minLengthThreshold)
    57  		x = x[:lenS1+1]
    58  	}
    59  
    60  	// we start from 1 because index 0 is already 0.
    61  	for i := 1; i < len(x); i++ {
    62  		x[i] = uint16(i)
    63  	}
    64  
    65  	// make a dummy bounds check to prevent the 2 bounds check down below.
    66  	// The one inside the loop is particularly costly.
    67  	_ = x[lenS1]
    68  	// fill in the rest
    69  	for i := 1; i <= lenS2; i++ {
    70  		prev := uint16(i)
    71  		for j := 1; j <= lenS1; j++ {
    72  			current := x[j-1] // match
    73  			if s2[i-1] != s1[j-1] {
    74  				current = min(min(x[j-1]+1, prev+1), x[j]+1)
    75  			}
    76  			x[j-1] = prev
    77  			prev = current
    78  		}
    79  		x[lenS1] = prev
    80  	}
    81  	return int(x[lenS1])
    82  }
    83  
    84  func min(a, b uint16) uint16 {
    85  	if a < b {
    86  		return a
    87  	}
    88  	return b
    89  }
    90  

View as plain text