1 -- module Data.Gargantext.Ngrams.Utils where
4 -- calculate levenshtein distance between two strings
5 levenshtein::[Char] -> [Char] -> Int
7 levenshtein "" s2 = length s2
8 levenshtein s1 "" = length s1
10 | last s1 == last s2 = levenshtein (init s1) (init s2)
11 | otherwise = minimum [
12 1 + levenshtein (init s1) s2,
13 1 + levenshtein s1 (init s2),
14 1 + levenshtein (init s1) (init s2)
18 -- calculate levenshtein distance between two strings
19 levenshtein::[Char] -> [Char] -> Int
20 -- this part is mostly a speed optimiziation
22 | length s1 > length s2 = levenshtein s2 s1
23 | length s1 < length s2 =
24 let d = length s2 - length s1
25 in d + levenshtein s1 (take (length s2 - d) s2)
26 -- the meat of the algorithm
27 levenshtein' "" "" = 0
29 | last s1 == last s2 = levenshtein (init s1) (init s2)
30 | otherwise = minimum [1 + levenshtein (init s1) s2,
31 1 + levenshtein s1 (init s2),
32 1 + levenshtein (init s1) (init s2)]