2 Module : Gargantext.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Mainly reexport functions in @Data.Text.Metrics@
14 noApax :: Ord a => Map a Occ -> Map a Occ
15 noApax m = M.filter (>1) m
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
22 module Gargantext.Text.Metrics
25 import Data.Text (Text, pack)
28 import qualified Data.List as L
29 import qualified Data.Map as M
30 import qualified Data.Set as S
31 import qualified Data.Text as T
32 import Data.Tuple.Extra (both)
33 --import GHC.Real (Ratio)
34 --import qualified Data.Text.Metrics as DTM
35 import Data.Array.Accelerate (toList)
38 import Gargantext.Prelude
40 import Gargantext.Text.Metrics.Count (occurrences, cooc)
41 import Gargantext.Text.Terms (TermType(MonoMulti), terms)
42 import Gargantext.Core (Lang(EN))
43 import Gargantext.Core.Types (Terms(..))
44 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
46 import Gargantext.Viz.Graph.Distances.Matrice
47 import Gargantext.Viz.Graph.Index
50 -- ord relevance: top n plus inclus
51 -- échantillonnage de généricity
53 --filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
55 ---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
56 ----(ti, fi) = createIndices m
57 -- . fromIndex fi $ filterMat $ cooc2mat ti m
60 import Data.Array.Accelerate (Matrix)
62 filterMat :: Matrix Int -> [(Index, Index)]
63 filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen')
65 (incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (conditional' m)
72 incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
73 incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
75 (ti,fi) = createIndices m
76 ordonne x = L.reverse $ L.sortOn snd $ zip (map snd $ M.toList fi) (toList x)
82 metrics_text = T.intercalate " " metrics_sentences
84 metrics_sentences' :: [Text]
85 metrics_sentences' = splitBy (Sentences 0) metrics_text
88 metrics_sentences :: [Text]
89 metrics_sentences = [ "There is a table with a glass of wine and a spoon."
90 , "I can see the glass on the table."
91 , "There was only a spoon on that table."
92 , "The glass just fall from the table, pouring wine everywhere."
93 , "I wish the glass did not contain wine."
96 metrics_sentences_Test = metrics_sentences == metrics_sentences'
98 -- | Terms reordered to visually check occurrences
100 {- [ [["table"],["glass"],["wine"],["spoon"]]
101 , [["glass"],["table"]]
102 , [["spoon"],["table"]]
103 , [["glass"],["table"],["wine"]]
104 , [["glass"],["wine"]]
108 metrics_terms :: IO [[Terms]]
109 metrics_terms = mapM (terms MonoMulti EN) $ splitBy (Sentences 0) metrics_text
113 fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
114 , (fromList ["object"],fromList [(["object"], 3 )])
115 , (fromList ["glas"] ,fromList [(["glas"] , 2 )])
116 , (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
118 metrics_occ = occurrences <$> L.concat <$> metrics_terms
121 -- fromList [((["glas"],["object"]),6)
122 ,((["glas"],["spoon"]),4)
123 ,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
126 metrics_cooc = cooc <$> metrics_terms
128 metrics_cooc_mat = do
130 let (ti,_) = createIndices m
131 let mat_cooc = cooc2mat ti m
134 , incExcSpeGen_proba mat_cooc
135 , incExcSpeGen mat_cooc
138 metrics_incExcSpeGen = incExcSpeGen_sorted <$> metrics_cooc