]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Metrics.hs
[Scores] Documentation.
[gargantext.git] / src / Gargantext / Text / Metrics.hs
1 {-|
2 Module : Gargantext.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mainly reexport functions in @Data.Text.Metrics@
11
12
13 TODO
14 noApax :: Ord a => Map a Occ -> Map a Occ
15 noApax m = M.filter (>1) m
16
17 -}
18
19 {-# LANGUAGE NoImplicitPrelude #-}
20 {-# LANGUAGE OverloadedStrings #-}
21
22 module Gargantext.Text.Metrics
23 where
24
25 import Data.Text (Text, pack)
26 import Data.Map (Map)
27
28 import qualified Data.List as L
29 import qualified Data.Map as M
30 import qualified Data.Set as S
31 import qualified Data.Text as T
32 import Data.Tuple.Extra (both)
33 --import GHC.Real (Ratio)
34 --import qualified Data.Text.Metrics as DTM
35 import Data.Array.Accelerate (toList)
36
37
38 import Gargantext.Prelude
39
40 import Gargantext.Text.Metrics.Count (occurrences, cooc)
41 import Gargantext.Text.Terms (TermType(MonoMulti), terms)
42 import Gargantext.Core (Lang(EN))
43 import Gargantext.Core.Types (Terms(..))
44 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
45
46 import Gargantext.Viz.Graph.Distances.Matrice
47 import Gargantext.Viz.Graph.Index
48
49
50 -- ord relevance: top n plus inclus
51 -- échantillonnage de généricity
52 --
53 --filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
54 --filterCooc m =
55 ---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
56 ----(ti, fi) = createIndices m
57 -- . fromIndex fi $ filterMat $ cooc2mat ti m
58
59
60 import Data.Array.Accelerate (Matrix)
61
62 filterMat :: Matrix Int -> [(Index, Index)]
63 filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen')
64 where
65 (incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (conditional' m)
66 n = nIe + nSg
67 nIe = 30
68 nSg = 70
69
70
71
72 incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
73 incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
74 where
75 (ti,fi) = createIndices m
76 ordonne x = L.reverse $ L.sortOn snd $ zip (map snd $ M.toList fi) (toList x)
77
78
79
80
81 metrics_text :: Text
82 metrics_text = T.intercalate " " metrics_sentences
83
84 metrics_sentences' :: [Text]
85 metrics_sentences' = splitBy (Sentences 0) metrics_text
86
87 -- | Sentences
88 metrics_sentences :: [Text]
89 metrics_sentences = [ "There is a table with a glass of wine and a spoon."
90 , "I can see the glass on the table."
91 , "There was only a spoon on that table."
92 , "The glass just fall from the table, pouring wine everywhere."
93 , "I wish the glass did not contain wine."
94 ]
95
96 metrics_sentences_Test = metrics_sentences == metrics_sentences'
97
98 -- | Terms reordered to visually check occurrences
99 -- >>>
100 {- [ [["table"],["glass"],["wine"],["spoon"]]
101 , [["glass"],["table"]]
102 , [["spoon"],["table"]]
103 , [["glass"],["table"],["wine"]]
104 , [["glass"],["wine"]]
105 ]
106 -}
107
108 metrics_terms :: IO [[Terms]]
109 metrics_terms = mapM (terms MonoMulti EN) $ splitBy (Sentences 0) metrics_text
110
111 -- | Occurrences
112 {-
113 fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
114 , (fromList ["object"],fromList [(["object"], 3 )])
115 , (fromList ["glas"] ,fromList [(["glas"] , 2 )])
116 , (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
117 -}
118 metrics_occ = occurrences <$> L.concat <$> metrics_terms
119
120 {-
121 -- fromList [((["glas"],["object"]),6)
122 ,((["glas"],["spoon"]),4)
123 ,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
124
125 -}
126 metrics_cooc = cooc <$> metrics_terms
127
128 metrics_cooc_mat = do
129 m <- metrics_cooc
130 let (ti,_) = createIndices m
131 let mat_cooc = cooc2mat ti m
132 pure ( ti
133 , mat_cooc
134 , incExcSpeGen_proba mat_cooc
135 , incExcSpeGen mat_cooc
136 )
137
138 metrics_incExcSpeGen = incExcSpeGen_sorted <$> metrics_cooc
139