]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Metrics.hs
[Scores] Inclusion/Exclusion and Specificity/Genericity ok with my tests.
[gargantext.git] / src / Gargantext / Text / Metrics.hs
1 {-|
2 Module : Gargantext.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mainly reexport functions in @Data.Text.Metrics@
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15
16 module Gargantext.Text.Metrics
17 where
18
19 import Data.Text (Text, pack)
20 import Data.Map (Map)
21
22 import qualified Data.List as L
23 import qualified Data.Map as M
24 import qualified Data.Text as T
25 import Data.Tuple.Extra (both)
26 --import GHC.Real (Ratio)
27 --import qualified Data.Text.Metrics as DTM
28 import Data.Array.Accelerate (toList)
29
30
31 import Gargantext.Prelude
32
33 import Gargantext.Text.Metrics.Count (occurrences, cooc)
34 import Gargantext.Text.Terms (TermType(MonoMulti), terms)
35 import Gargantext.Core (Lang(EN))
36 import Gargantext.Core.Types (Terms(..))
37 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
38
39 import Gargantext.Viz.Graph.Distances.Matrice
40 import Gargantext.Viz.Graph.Index
41
42 --noApax :: Ord a => Map a Occ -> Map a Occ
43 --noApax m = M.filter (>1) m
44
45
46 metrics_text :: Text
47 metrics_text = T.intercalate " " metrics_sentences
48
49 metrics_sentences' :: [Text]
50 metrics_sentences' = splitBy (Sentences 0) metrics_text
51
52 -- | Sentences
53 metrics_sentences :: [Text]
54 metrics_sentences = [ "There is a table with a glass of wine and a spoon."
55 , "I can see the glass on the table."
56 , "There was only a spoon on that table."
57 , "The glass just fall from the table, pouring wine elsewhere."
58 , "I wish the glass did not contain wine."
59 ]
60
61 metrics_sentences_Test = metrics_sentences == metrics_sentences'
62
63 -- | Terms reordered to visually check occurrences
64 -- >>>
65 {- [ [["table"],["glass"],["wine"],["spoon"]]
66 , [["glass"],["table"]]
67 , [["spoon"],["table"]]
68 , [["glass"],["table"],["wine"]]
69 , [["glass"],["wine"]]
70 ]
71 -}
72
73 metrics_terms :: IO [[Terms]]
74 metrics_terms = mapM (terms MonoMulti EN) $ splitBy (Sentences 0) metrics_text
75
76 -- | Occurrences
77 {-
78 fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
79 , (fromList ["object"],fromList [(["object"], 3 )])
80 , (fromList ["glas"] ,fromList [(["glas"] , 2 )])
81 , (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
82 -}
83 metrics_occ = occurrences <$> L.concat <$> metrics_terms
84
85 {-
86 -- fromList [((["glas"],["object"]),6)
87 ,((["glas"],["spoon"]),4)
88 ,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
89
90 -}
91 metrics_cooc = cooc <$> metrics_terms
92 metrics_cooc_mat = do
93 m <- metrics_cooc
94 let (ti,_) = createIndices m
95 let mat_cooc = cooc2mat ti m
96 pure ( ti
97 , mat_cooc
98 , incExcSpeGen_proba mat_cooc
99 , incExcSpeGen' mat_cooc
100 )
101
102
103 metrics_incExcSpeGen = incExcSpeGen <$> metrics_cooc
104
105 incExcSpeGen :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
106 incExcSpeGen m = both (\x -> L.reverse $ L.sortOn snd $ zip (map snd $ M.toList fi) (toList x) )
107 (incExcSpeGen' $ cooc2mat ti m )
108 where
109 (ti,fi) = createIndices m
110
111