2 Module : Gargantext.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Mainly reexport functions in @Data.Text.Metrics@
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
16 module Gargantext.Text.Metrics where
18 import Data.Text (Text, pack)
19 import qualified Data.Text as T
20 import Data.List (concat)
22 --import GHC.Real (Ratio)
23 --import qualified Data.Text.Metrics as DTM
25 import Gargantext.Prelude
27 import Gargantext.Text.Metrics.Count (occurrences, cooc)
28 import Gargantext.Text.Terms (TermType(Multi), terms)
29 import Gargantext.Core (Lang(EN))
30 import Gargantext.Core.Types (Terms(..))
31 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
33 --noApax :: Ord a => Map a Occ -> Map a Occ
34 --noApax m = M.filter (>1) m
38 metrics_text = T.concat ["A table is an object."
39 ,"A glas is an object too."
40 ,"Using a glas to dring is a function."
41 ,"Using a spoon to eat is a function."
42 ,"The spoon is an object to eat."
45 metrics_sentences' :: [Text]
46 metrics_sentences' = splitBy (Sentences 0) metrics_text
49 metrics_sentences :: [Text]
50 metrics_sentences = ["A table is an object."
51 ,"A glas is an object too."
52 ,"The glas and the spoon are on the table."
53 ,"The spoon is an object to eat."
54 ,"The spoon is on the table and the plate and the glas."]
57 metrics_sentences_Test = metrics_sentences == metrics_sentences'
59 -- | Terms reordered to visually check occurrences
60 metrics_terms :: [[Text]]
61 metrics_terms = undefined
63 metrics_terms' :: IO [[Terms]]
64 metrics_terms' = mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text
66 --metrics_terms_Test = metrics_terms == ((map _terms_label) <$> metrics_terms')
70 fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
71 , (fromList ["object"],fromList [(["object"], 3 )])
72 , (fromList ["glas"] ,fromList [(["glas"] , 2 )])
73 , (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
75 metrics_occ = occurrences <$> concat <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text)
78 -- fromList [((["glas"],["object"]),6)
79 ,((["glas"],["spoon"]),4)
80 ,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
83 metrics_cooc = cooc <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text)
85 metrics_cooc' = (mapM (terms Multi EN) $ splitBy (Sentences 0) "The table object. The table object.")