2 Module : Gargantext.Text.Metrics
3 Description : Short description
4 Copyright : (c) Some Guy, 2013
7 Maintainer : sample@email.com
8 Stability : experimental
11 Mainly reexport functions in @Data.Text.Metrics@
14 {-# LANGUAGE NoImplicitPrelude #-}
16 module Gargantext.Text.Metrics (levenshtein
19 , damerauLevenshteinNorm
25 import Gargantext.Prelude
27 import Data.Text (Text)
28 import GHC.Real (Ratio)
29 import qualified Data.Text.Metrics as DTM
34 -- | This module provide metrics to compare Text
35 -- starting as an API rexporting main functions of the great lib
36 -- text-metrics of Mark Karpov
38 -- | Levenshtein Distance
39 -- In information theory, Linguistics and computer science,
40 -- the Levenshtein distance is a string metric for measuring
41 -- the difference between two sequences.
42 -- See: https://en.wikipedia.org/wiki/Levenshtein_distance
44 levenshtein :: Text -> Text -> Int
45 levenshtein = DTM.levenshtein
47 -- | Return normalized Levenshtein distance between two 'Text' values.
48 -- Result is a non-negative rational number (represented as @'Ratio'
49 -- 'Data.Numeric.Natural'@), where 0 signifies no similarity between the
50 -- strings, while 1 means exact match.
52 levenshteinNorm :: Text -> Text -> Ratio Int
53 levenshteinNorm = DTM.levenshteinNorm
55 -- | Return Damerau-Levenshtein distance between two 'Text' values. The
56 -- function works like 'levenshtein', but the collection of allowed
57 -- operations also includes transposition of two /adjacent/ characters.
59 -- <https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance>
61 damerauLevenshtein :: Text -> Text -> Int
62 damerauLevenshtein = DTM.damerauLevenshtein
64 -- damerau-Levenshtein distance normalized
66 damerauLevenshteinNorm :: Text -> Text -> Ratio Int
67 damerauLevenshteinNorm = DTM.damerauLevenshteinNorm
69 -- Treating inputs like sets
71 -- | Return overlap coefficient for two 'Text' values. Returned value
72 -- is in the range from 0 (no similarity) to 1 (exact match). Return 1
73 -- if both 'Text' values are empty.
75 -- See also: <https://en.wikipedia.org/wiki/Overlap_coefficient>.
76 overlap :: Text -> Text -> Ratio Int
81 -- measures dissimilarity between sample sets
82 jaccard :: Text -> Text -> Ratio Int
86 -- In information theory, the Hamming distance between two strings of
87 -- equal length is the number of positions at which the corresponding
88 -- symbols are different. In other words, it measures the minimum number of
89 -- substitutions required to change one string into the other
90 -- See: https://en.wikipedia.org/wiki/Hamming_distance
92 hamming :: Text -> Text -> Maybe Int