2 Module : Gargantext.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Mainly reexport functions in @Data.Text.Metrics@
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
16 module Gargantext.Text.Metrics
19 import Data.Text (Text, pack)
22 import qualified Data.List as L
23 import qualified Data.Map as M
24 import qualified Data.Text as T
25 import Data.Tuple.Extra (both)
26 --import GHC.Real (Ratio)
27 --import qualified Data.Text.Metrics as DTM
28 import Data.Array.Accelerate (toList)
31 import Gargantext.Prelude
33 import Gargantext.Text.Metrics.Count (occurrences, cooc)
34 import Gargantext.Text.Terms (TermType(MonoMulti), terms)
35 import Gargantext.Core (Lang(EN))
36 import Gargantext.Core.Types (Terms(..))
37 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
39 import Gargantext.Viz.Graph.Distances.Matrice
40 import Gargantext.Viz.Graph.Index
42 --noApax :: Ord a => Map a Occ -> Map a Occ
43 --noApax m = M.filter (>1) m
47 metrics_text = T.intercalate " " metrics_sentences
49 metrics_sentences' :: [Text]
50 metrics_sentences' = splitBy (Sentences 0) metrics_text
53 metrics_sentences :: [Text]
54 metrics_sentences = [ "There is a table with a glass of wine and a spoon."
55 , "I can see the glass on the table."
56 , "There was only a spoon on that table."
57 , "The glass just fall from the table, pouring wine on the floor."
58 , "I wish the glass did not contain wine."
61 metrics_sentences_Test = metrics_sentences == metrics_sentences'
63 -- | Terms reordered to visually check occurrences
64 metrics_terms :: IO [[Terms]]
65 metrics_terms = mapM (terms MonoMulti EN) $ splitBy (Sentences 0) metrics_text
69 fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
70 , (fromList ["object"],fromList [(["object"], 3 )])
71 , (fromList ["glas"] ,fromList [(["glas"] , 2 )])
72 , (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
74 metrics_occ = occurrences <$> L.concat <$> metrics_terms
77 -- fromList [((["glas"],["object"]),6)
78 ,((["glas"],["spoon"]),4)
79 ,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
82 metrics_cooc = cooc <$> metrics_terms
85 let (ti,_) = createIndices m
86 let mat_cooc = cooc2mat ti m
89 , incExcSpeGen_proba mat_cooc
90 , incExcSpeGen' mat_cooc
94 metrics_incExcSpeGen = incExcSpeGen <$> metrics_cooc
96 incExcSpeGen :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
97 incExcSpeGen m = both (\x -> L.reverse $ L.sortOn snd $ zip (map snd $ M.toList fi) (toList x) )
98 (incExcSpeGen' $ cooc2mat ti m )
100 (ti,fi) = createIndices m