]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Metrics.hs
[SCORE] spegen quality tested, need to add test in comments.
[gargantext.git] / src / Gargantext / Text / Metrics.hs
1 {-|
2 Module : Gargantext.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mainly reexport functions in @Data.Text.Metrics@
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15
16 module Gargantext.Text.Metrics
17 where
18
19 import Data.Text (Text, pack)
20 import Data.Map (Map)
21
22 import qualified Data.List as L
23 import qualified Data.Map as M
24 import qualified Data.Text as T
25 import Data.Tuple.Extra (both)
26 --import GHC.Real (Ratio)
27 --import qualified Data.Text.Metrics as DTM
28 import Data.Array.Accelerate (toList)
29
30
31 import Gargantext.Prelude
32
33 import Gargantext.Text.Metrics.Count (occurrences, cooc)
34 import Gargantext.Text.Terms (TermType(MonoMulti), terms)
35 import Gargantext.Core (Lang(EN))
36 import Gargantext.Core.Types (Terms(..))
37 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
38
39 import Gargantext.Viz.Graph.Distances.Matrice
40 import Gargantext.Viz.Graph.Index
41
42 --noApax :: Ord a => Map a Occ -> Map a Occ
43 --noApax m = M.filter (>1) m
44
45
46 metrics_text :: Text
47 metrics_text = T.intercalate " " metrics_sentences
48
49 metrics_sentences' :: [Text]
50 metrics_sentences' = splitBy (Sentences 0) metrics_text
51
52 -- | Sentences
53 metrics_sentences :: [Text]
54 metrics_sentences = [ "There is a table with a glass of wine and a spoon."
55 , "I can see the glass on the table."
56 , "There was a spoon on that table."
57 , "The glass just fall from the table, pouring wine elsewhere."
58 , "I wish the glass did not contain wine."
59 ]
60
61 metrics_sentences_Test = metrics_sentences == metrics_sentences'
62
63 -- | Terms reordered to visually check occurrences
64 metrics_terms :: IO [[Terms]]
65 metrics_terms = mapM (terms MonoMulti EN) $ splitBy (Sentences 0) metrics_text
66
67 -- | Occurrences
68 {-
69 fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
70 , (fromList ["object"],fromList [(["object"], 3 )])
71 , (fromList ["glas"] ,fromList [(["glas"] , 2 )])
72 , (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
73 -}
74 metrics_occ = occurrences <$> L.concat <$> metrics_terms
75
76 {-
77 -- fromList [((["glas"],["object"]),6)
78 ,((["glas"],["spoon"]),4)
79 ,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
80
81 -}
82 metrics_cooc = cooc <$> metrics_terms
83 metrics_cooc_mat = do
84 m <- metrics_cooc
85 let (ti,_) = createIndices m
86 let mat = cooc2mat ti m
87 pure ( ti
88 , mat
89 , incExcSpeGen_proba mat
90 , incExcSpeGen' mat
91 )
92
93
94 metrics_incExcSpeGen = incExcSpeGen <$> metrics_cooc
95
96 incExcSpeGen :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
97 incExcSpeGen m = both (\x -> L.reverse $ L.sortOn snd $ zip (map snd $ M.toList fi) (toList x) )
98 (incExcSpeGen' $ cooc2mat ti m )
99 where
100 (ti,fi) = createIndices m
101
102