2 Module : Gargantext.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Mainly reexport functions in @Data.Text.Metrics@
13 {-# LANGUAGE NoImplicitPrelude #-}
15 module Gargantext.Text.Metrics where
17 import Data.Text (Text, pack)
18 import Data.List (concat)
20 --import GHC.Real (Ratio)
21 --import qualified Data.Text.Metrics as DTM
23 import Gargantext.Prelude
25 import Gargantext.Text.Metrics.Occurrences (occurrences, cooc)
26 import Gargantext.Text.Terms (TermType(Multi), terms)
27 import Gargantext.Core (Lang(EN))
28 import Gargantext.Core.Types (Terms(..))
29 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
31 --noApax :: Ord a => Map a Occ -> Map a Occ
32 --noApax m = M.filter (>1) m
36 metrics_text = "A table is an object. A glas is an object. The glas is on the table. The spoon is an object. The spoon is on the table."
39 metrics_sentences :: [Text]
40 metrics_sentences = [ "A table is an object."
41 , "A glas is an object."
42 , "The glas is on the table."
43 , "The spoon is an object."
44 , "The spoon is on the table."
48 metrics_sentences_Test = splitBy (Sentences 0) metrics_text == metrics_sentences
50 -- | Terms reordered to visually check occurrences
51 metrics_terms :: [[[Text]]]
52 metrics_terms = [[["table"],["object"] ]
53 ,[ ["object"],["glas"] ]
54 ,[["table"], ["glas"] ]
55 ,[ ["object"], ["spoon"]]
56 ,[["table"], ["spoon"]]
58 --metrics_terms_Test = (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text) == metrics_terms
62 fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
63 , (fromList ["object"],fromList [(["object"], 3 )])
64 , (fromList ["glas"] ,fromList [(["glas"] , 2 )])
65 , (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
67 metrics_occ = occurrences <$> concat <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text)
70 -- fromList [((["glas"],["object"]),6)
71 ,((["glas"],["spoon"]),4)
72 ,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
75 metrics_cooc = cooc <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text)
77 metrics_cooc' = (mapM (terms Multi EN) $ splitBy (Sentences 0) "The table object. The table object.")