]> Git — Sourcephile - gargantext.git/blob - src-test/Ngrams/Metrics.hs
[List] filename for content-disposition
[gargantext.git] / src-test / Ngrams / Metrics.hs
1 {-|
2 Module : Ngrams.Metrics
3 Description :
4 Copyright : Ngrams.Metrics (c)
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
12 -}
13
14 {-# OPTIONS_GHC -fno-warn-orphans #-}
15
16 {-# LANGUAGE CPP #-}
17 {-# LANGUAGE NoImplicitPrelude #-}
18 {-# LANGUAGE OverloadedStrings #-}
19
20 module Ngrams.Metrics (main) where
21
22 import Data.Text (Text)
23 import qualified Data.Text as T
24 import Data.Ratio
25
26 import Test.Hspec
27 import Test.QuickCheck
28
29 import Gargantext.Prelude
30 import Gargantext.Text.Metrics
31
32 #if !MIN_VERSION_base(4,8,0)
33 import Control.Applicative
34 #endif
35
36 instance Arbitrary Text where
37 arbitrary = T.pack <$> arbitrary
38
39 main :: IO ()
40 main = hspec spec
41
42 spec :: Spec
43 spec = do
44 describe "levenshtein" $ do
45 testSwap levenshtein
46 context "with concrete examples" $ do
47 testPair levenshtein "kitten" "sitting" 3
48 testPair levenshtein "cake" "drake" 2
49 testPair levenshtein "saturday" "sunday" 3
50 testPair levenshtein "red" "wax" 3
51 #if __GLASGOW_HASKELL__ >= 710
52 testPair levenshtein "a😀c" "abc" 1
53 #endif
54 testPair levenshtein "lucky" "lucky" 0
55 testPair levenshtein "" "" 0
56 describe "levenshteinNorm" $ do
57 testSwap levenshteinNorm
58 testPair levenshteinNorm "kitten" "sitting" (4 % 7)
59 testPair levenshteinNorm "cake" "drake" (3 % 5)
60 testPair levenshteinNorm "saturday" "sunday" (5 % 8)
61 testPair levenshteinNorm "red" "wax" (0 % 1)
62 #if __GLASGOW_HASKELL__ >= 710
63 testPair levenshteinNorm "a😀c" "abc" (2 % 3)
64 #endif
65 testPair levenshteinNorm "lucky" "lucky" (1 % 1)
66 testPair levenshteinNorm "" "" (1 % 1)
67 describe "damerauLevenshtein" $ do
68 testSwap damerauLevenshtein
69 testPair damerauLevenshtein "veryvery long" "very long" 4
70 testPair damerauLevenshtein "thing" "think" 1
71 testPair damerauLevenshtein "nose" "ones" 2
72 testPair damerauLevenshtein "thing" "sign" 3
73 testPair damerauLevenshtein "red" "wax" 3
74 #if __GLASGOW_HASKELL__ >= 710
75 testPair damerauLevenshtein "a😀c" "abc" 1
76 #endif
77 testPair damerauLevenshtein "lucky" "lucky" 0
78 testPair damerauLevenshtein "" "" 0
79 describe "damerauLevenshteinNorm" $ do
80 testSwap damerauLevenshteinNorm
81 testPair damerauLevenshteinNorm "veryvery long" "very long" (9 % 13)
82 testPair damerauLevenshteinNorm "thing" "think" (4 % 5)
83 testPair damerauLevenshteinNorm "nose" "ones" (1 % 2)
84 testPair damerauLevenshteinNorm "thing" "sign" (2 % 5)
85 testPair damerauLevenshteinNorm "red" "wax" (0 % 1)
86 #if __GLASGOW_HASKELL__ >= 710
87 testPair damerauLevenshteinNorm "a😀c" "abc" (2 % 3)
88 #endif
89 testPair damerauLevenshteinNorm "lucky" "lucky" (1 % 1)
90 testPair damerauLevenshteinNorm "" "" (1 % 1)
91 describe "hamming" $ do
92 testSwap hamming
93 testPair hamming "karolin" "kathrin" (Just 3)
94 testPair hamming "karolin" "kerstin" (Just 3)
95 testPair hamming "1011101" "1001001" (Just 2)
96 testPair hamming "2173896" "2233796" (Just 3)
97 testPair hamming "toned" "roses" (Just 3)
98 testPair hamming "red" "wax" (Just 3)
99 #if __GLASGOW_HASKELL__ >= 710
100 testPair hamming "a😀c" "abc" (Just 1)
101 #endif
102 testPair hamming "lucky" "lucky" (Just 0)
103 testPair hamming "" "" (Just 0)
104 testPair hamming "small" "big" Nothing
105 describe "overlap" $ do
106 testSwap overlap
107 testPair overlap "fly" "butterfly" (1 % 1)
108 testPair overlap "night" "nacht" (3 % 5)
109 testPair overlap "context" "contact" (5 % 7)
110 testPair overlap "red" "wax" (0 % 1)
111 #if __GLASGOW_HASKELL__ >= 710
112 testPair overlap "a😀c" "abc" (2 % 3)
113 #endif
114 testPair overlap "lucky" "lucky" (1 % 1)
115 describe "jaccard" $ do
116 testSwap jaccard
117 testPair jaccard "xxx" "xyx" (1 % 2)
118 testPair jaccard "night" "nacht" (3 % 7)
119 testPair jaccard "context" "contact" (5 % 9)
120 #if __GLASGOW_HASKELL__ >= 710
121 testPair overlap "a😀c" "abc" (2 % 3)
122 #endif
123 testPair jaccard "lucky" "lucky" (1 % 1)
124
125 -- | Test that given function returns the same results when order of
126 -- arguments is swapped.
127
128 testSwap :: (Eq a, Show a) => (Text -> Text -> a) -> SpecWith ()
129 testSwap f = context "if we swap the arguments" $
130 it "produces the same result" $
131 property $ \a b -> f a b === f b a
132
133 -- | Create spec for given metric function applying it to two 'Text' values
134 -- and comparing the result with expected one.
135
136 testPair :: (Eq a, Show a)
137 => (Text -> Text -> a) -- ^ Function to test
138 -> Text -- ^ First input
139 -> Text -- ^ Second input
140 -> a -- ^ Expected result
141 -> SpecWith ()
142 testPair f a b r = it ("‘" <> T.unpack a <> "’ and ‘" <> T.unpack b <> "’") $
143 f a b `shouldBe` r