2 Module : Ngrams.Metrics
4 Copyright : Ngrams.Metrics (c)
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
18 module Ngrams.Metrics (main) where
20 import Data.Text (Text)
21 import qualified Data.Text as T
25 import Test.QuickCheck
27 import Gargantext.Prelude
28 import Gargantext.Text.Metrics
30 #if !MIN_VERSION_base(4,8,0)
31 import Control.Applicative
34 instance Arbitrary Text where
35 arbitrary = T.pack <$> arbitrary
42 describe "levenshtein" $ do
44 context "with concrete examples" $ do
45 testPair levenshtein "kitten" "sitting" 3
46 testPair levenshtein "cake" "drake" 2
47 testPair levenshtein "saturday" "sunday" 3
48 testPair levenshtein "red" "wax" 3
49 #if __GLASGOW_HASKELL__ >= 710
50 testPair levenshtein "a😀c" "abc" 1
52 testPair levenshtein "lucky" "lucky" 0
53 testPair levenshtein "" "" 0
54 describe "levenshteinNorm" $ do
55 testSwap levenshteinNorm
56 testPair levenshteinNorm "kitten" "sitting" (4 % 7)
57 testPair levenshteinNorm "cake" "drake" (3 % 5)
58 testPair levenshteinNorm "saturday" "sunday" (5 % 8)
59 testPair levenshteinNorm "red" "wax" (0 % 1)
60 #if __GLASGOW_HASKELL__ >= 710
61 testPair levenshteinNorm "a😀c" "abc" (2 % 3)
63 testPair levenshteinNorm "lucky" "lucky" (1 % 1)
64 testPair levenshteinNorm "" "" (1 % 1)
65 describe "damerauLevenshtein" $ do
66 testSwap damerauLevenshtein
67 testPair damerauLevenshtein "veryvery long" "very long" 4
68 testPair damerauLevenshtein "thing" "think" 1
69 testPair damerauLevenshtein "nose" "ones" 2
70 testPair damerauLevenshtein "thing" "sign" 3
71 testPair damerauLevenshtein "red" "wax" 3
72 #if __GLASGOW_HASKELL__ >= 710
73 testPair damerauLevenshtein "a😀c" "abc" 1
75 testPair damerauLevenshtein "lucky" "lucky" 0
76 testPair damerauLevenshtein "" "" 0
77 describe "damerauLevenshteinNorm" $ do
78 testSwap damerauLevenshteinNorm
79 testPair damerauLevenshteinNorm "veryvery long" "very long" (9 % 13)
80 testPair damerauLevenshteinNorm "thing" "think" (4 % 5)
81 testPair damerauLevenshteinNorm "nose" "ones" (1 % 2)
82 testPair damerauLevenshteinNorm "thing" "sign" (2 % 5)
83 testPair damerauLevenshteinNorm "red" "wax" (0 % 1)
84 #if __GLASGOW_HASKELL__ >= 710
85 testPair damerauLevenshteinNorm "a😀c" "abc" (2 % 3)
87 testPair damerauLevenshteinNorm "lucky" "lucky" (1 % 1)
88 testPair damerauLevenshteinNorm "" "" (1 % 1)
89 describe "hamming" $ do
91 testPair hamming "karolin" "kathrin" (Just 3)
92 testPair hamming "karolin" "kerstin" (Just 3)
93 testPair hamming "1011101" "1001001" (Just 2)
94 testPair hamming "2173896" "2233796" (Just 3)
95 testPair hamming "toned" "roses" (Just 3)
96 testPair hamming "red" "wax" (Just 3)
97 #if __GLASGOW_HASKELL__ >= 710
98 testPair hamming "a😀c" "abc" (Just 1)
100 testPair hamming "lucky" "lucky" (Just 0)
101 testPair hamming "" "" (Just 0)
102 testPair hamming "small" "big" Nothing
103 describe "overlap" $ do
105 testPair overlap "fly" "butterfly" (1 % 1)
106 testPair overlap "night" "nacht" (3 % 5)
107 testPair overlap "context" "contact" (5 % 7)
108 testPair overlap "red" "wax" (0 % 1)
109 #if __GLASGOW_HASKELL__ >= 710
110 testPair overlap "a😀c" "abc" (2 % 3)
112 testPair overlap "lucky" "lucky" (1 % 1)
113 describe "jaccard" $ do
115 testPair jaccard "xxx" "xyx" (1 % 2)
116 testPair jaccard "night" "nacht" (3 % 7)
117 testPair jaccard "context" "contact" (5 % 9)
118 #if __GLASGOW_HASKELL__ >= 710
119 testPair overlap "a😀c" "abc" (2 % 3)
121 testPair jaccard "lucky" "lucky" (1 % 1)
123 -- | Test that given function returns the same results when order of
124 -- arguments is swapped.
126 testSwap :: (Eq a, Show a) => (Text -> Text -> a) -> SpecWith ()
127 testSwap f = context "if we swap the arguments" $
128 it "produces the same result" $
129 property $ \a b -> f a b === f b a
131 -- | Create spec for given metric function applying it to two 'Text' values
132 -- and comparing the result with expected one.
134 testPair :: (Eq a, Show a)
135 => (Text -> Text -> a) -- ^ Function to test
136 -> Text -- ^ First input
137 -> Text -- ^ Second input
138 -> a -- ^ Expected result
140 testPair f a b r = it ("‘" <> T.unpack a <> "’ and ‘" <> T.unpack b <> "’") $