2 {-# LANGUAGE OverloadedStrings #-}
3 {-# OPTIONS_GHC -fno-warn-orphans #-}
5 module Ngrams.Metrics (main) where
7 import Data.Text (Text)
8 import qualified Data.Text as T
12 import Test.QuickCheck
14 import Gargantext.Prelude
15 import Gargantext.Ngrams.Metrics
17 #if !MIN_VERSION_base(4,8,0)
18 import Control.Applicative
21 instance Arbitrary Text where
22 arbitrary = T.pack <$> arbitrary
29 describe "levenshtein" $ do
31 context "with concrete examples" $ do
32 testPair levenshtein "kitten" "sitting" 3
33 testPair levenshtein "cake" "drake" 2
34 testPair levenshtein "saturday" "sunday" 3
35 testPair levenshtein "red" "wax" 3
36 #if __GLASGOW_HASKELL__ >= 710
37 testPair levenshtein "a😀c" "abc" 1
39 testPair levenshtein "lucky" "lucky" 0
40 testPair levenshtein "" "" 0
41 describe "levenshteinNorm" $ do
42 testSwap levenshteinNorm
43 testPair levenshteinNorm "kitten" "sitting" (4 % 7)
44 testPair levenshteinNorm "cake" "drake" (3 % 5)
45 testPair levenshteinNorm "saturday" "sunday" (5 % 8)
46 testPair levenshteinNorm "red" "wax" (0 % 1)
47 #if __GLASGOW_HASKELL__ >= 710
48 testPair levenshteinNorm "a😀c" "abc" (2 % 3)
50 testPair levenshteinNorm "lucky" "lucky" (1 % 1)
51 testPair levenshteinNorm "" "" (1 % 1)
52 describe "damerauLevenshtein" $ do
53 testSwap damerauLevenshtein
54 testPair damerauLevenshtein "veryvery long" "very long" 4
55 testPair damerauLevenshtein "thing" "think" 1
56 testPair damerauLevenshtein "nose" "ones" 2
57 testPair damerauLevenshtein "thing" "sign" 3
58 testPair damerauLevenshtein "red" "wax" 3
59 #if __GLASGOW_HASKELL__ >= 710
60 testPair damerauLevenshtein "a😀c" "abc" 1
62 testPair damerauLevenshtein "lucky" "lucky" 0
63 testPair damerauLevenshtein "" "" 0
64 describe "damerauLevenshteinNorm" $ do
65 testSwap damerauLevenshteinNorm
66 testPair damerauLevenshteinNorm "veryvery long" "very long" (9 % 13)
67 testPair damerauLevenshteinNorm "thing" "think" (4 % 5)
68 testPair damerauLevenshteinNorm "nose" "ones" (1 % 2)
69 testPair damerauLevenshteinNorm "thing" "sign" (2 % 5)
70 testPair damerauLevenshteinNorm "red" "wax" (0 % 1)
71 #if __GLASGOW_HASKELL__ >= 710
72 testPair damerauLevenshteinNorm "a😀c" "abc" (2 % 3)
74 testPair damerauLevenshteinNorm "lucky" "lucky" (1 % 1)
75 testPair damerauLevenshteinNorm "" "" (1 % 1)
76 describe "hamming" $ do
78 testPair hamming "karolin" "kathrin" (Just 3)
79 testPair hamming "karolin" "kerstin" (Just 3)
80 testPair hamming "1011101" "1001001" (Just 2)
81 testPair hamming "2173896" "2233796" (Just 3)
82 testPair hamming "toned" "roses" (Just 3)
83 testPair hamming "red" "wax" (Just 3)
84 #if __GLASGOW_HASKELL__ >= 710
85 testPair hamming "a😀c" "abc" (Just 1)
87 testPair hamming "lucky" "lucky" (Just 0)
88 testPair hamming "" "" (Just 0)
89 testPair hamming "small" "big" Nothing
90 describe "overlap" $ do
92 testPair overlap "fly" "butterfly" (1 % 1)
93 testPair overlap "night" "nacht" (3 % 5)
94 testPair overlap "context" "contact" (5 % 7)
95 testPair overlap "red" "wax" (0 % 1)
96 #if __GLASGOW_HASKELL__ >= 710
97 testPair overlap "a😀c" "abc" (2 % 3)
99 testPair overlap "lucky" "lucky" (1 % 1)
100 describe "jaccard" $ do
102 testPair jaccard "xxx" "xyx" (1 % 2)
103 testPair jaccard "night" "nacht" (3 % 7)
104 testPair jaccard "context" "contact" (5 % 9)
105 #if __GLASGOW_HASKELL__ >= 710
106 testPair overlap "a😀c" "abc" (2 % 3)
108 testPair jaccard "lucky" "lucky" (1 % 1)
110 -- | Test that given function returns the same results when order of
111 -- arguments is swapped.
113 testSwap :: (Eq a, Show a) => (Text -> Text -> a) -> SpecWith ()
114 testSwap f = context "if we swap the arguments" $
115 it "produces the same result" $
116 property $ \a b -> f a b === f b a
118 -- | Create spec for given metric function applying it to two 'Text' values
119 -- and comparing the result with expected one.
121 testPair :: (Eq a, Show a)
122 => (Text -> Text -> a) -- ^ Function to test
123 -> Text -- ^ First input
124 -> Text -- ^ Second input
125 -> a -- ^ Expected result
127 testPair f a b r = it ("‘" <> T.unpack a <> "’ and ‘" <> T.unpack b <> "’") $