]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Metrics.hs
[FIX] fix cooc behavior.
[gargantext.git] / src / Gargantext / Text / Metrics.hs
1 {-|
2 Module : Gargantext.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mainly reexport functions in @Data.Text.Metrics@
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14
15 module Gargantext.Text.Metrics where
16
17 import Data.Text (Text, pack)
18 import Data.List (concat)
19
20 --import GHC.Real (Ratio)
21 --import qualified Data.Text.Metrics as DTM
22
23 import Gargantext.Prelude
24
25 import Gargantext.Text.Metrics.Occurrences (occurrences, cooc)
26 import Gargantext.Text.Terms (TermType(Multi), terms)
27 import Gargantext.Core (Lang(EN))
28 import Gargantext.Core.Types (Terms(..))
29 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
30
31 --noApax :: Ord a => Map a Occ -> Map a Occ
32 --noApax m = M.filter (>1) m
33
34
35 metrics_text :: Text
36 metrics_text = "A table is an object. A glas is an object. The glas is on the table. The spoon is an object. The spoon is on the table."
37
38 -- | Sentences
39 metrics_sentences :: [Text]
40 metrics_sentences = [ "A table is an object."
41 , "A glas is an object."
42 , "The glas is on the table."
43 , "The spoon is an object."
44 , "The spoon is on the table."
45 ]
46
47
48 metrics_sentences_Test = splitBy (Sentences 0) metrics_text == metrics_sentences
49
50 -- | Terms reordered to visually check occurrences
51 metrics_terms :: [[[Text]]]
52 metrics_terms = [[["table"],["object"] ]
53 ,[ ["object"],["glas"] ]
54 ,[["table"], ["glas"] ]
55 ,[ ["object"], ["spoon"]]
56 ,[["table"], ["spoon"]]
57 ]
58 --metrics_terms_Test = (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text) == metrics_terms
59
60 -- | Occurrences
61 {-
62 fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
63 , (fromList ["object"],fromList [(["object"], 3 )])
64 , (fromList ["glas"] ,fromList [(["glas"] , 2 )])
65 , (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
66 -}
67 metrics_occ = occurrences <$> concat <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text)
68
69 {-
70 -- fromList [((["glas"],["object"]),6)
71 ,((["glas"],["spoon"]),4)
72 ,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
73
74 -}
75 metrics_cooc = cooc <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text)
76
77 metrics_cooc' = (mapM (terms Multi EN) $ splitBy (Sentences 0) "The table object. The table object.")
78
79
80
81
82