2 Module : Gargantext.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Mainly reexport functions in @Data.Text.Metrics@
13 {-# LANGUAGE NoImplicitPrelude #-}
15 module Gargantext.Text.Metrics where
17 import Data.Text (Text, pack)
18 import qualified Data.Text as T
19 import Data.List (concat)
21 --import GHC.Real (Ratio)
22 --import qualified Data.Text.Metrics as DTM
24 import Gargantext.Prelude
26 import Gargantext.Text.Metrics.Count (occurrences, cooc)
27 import Gargantext.Text.Terms (TermType(Multi), terms)
28 import Gargantext.Core (Lang(EN))
29 import Gargantext.Core.Types (Terms(..))
30 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
32 --noApax :: Ord a => Map a Occ -> Map a Occ
33 --noApax m = M.filter (>1) m
37 metrics_text = T.concat ["A table is an object."
38 ,"A glas is an object too."
39 ,"Using a glas to dring is a function."
40 ,"Using a spoon to eat is a function."
41 ,"The spoon is an object to eat."
44 metrics_sentences' :: [Text]
45 metrics_sentences' = splitBy (Sentences 0) metrics_text
48 metrics_sentences :: [Text]
49 metrics_sentences = ["A table is an object."
50 ,"A glas is an object too."
51 ,"The glas and the spoon are on the table."
52 ,"The spoon is an object to eat."
53 ,"The spoon is on the table and the plate and the glas."]
56 metrics_sentences_Test = metrics_sentences == metrics_sentences'
58 -- | Terms reordered to visually check occurrences
59 metrics_terms :: [[Text]]
60 metrics_terms = undefined
62 metrics_terms' :: IO [[Terms]]
63 metrics_terms' = mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text
65 --metrics_terms_Test = metrics_terms == ((map _terms_label) <$> metrics_terms')
69 fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
70 , (fromList ["object"],fromList [(["object"], 3 )])
71 , (fromList ["glas"] ,fromList [(["glas"] , 2 )])
72 , (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
74 metrics_occ = occurrences <$> concat <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text)
77 -- fromList [((["glas"],["object"]),6)
78 ,((["glas"],["spoon"]),4)
79 ,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
82 metrics_cooc = cooc <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text)
84 metrics_cooc' = (mapM (terms Multi EN) $ splitBy (Sentences 0) "The table object. The table object.")