]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Metrics/Examples.hs
[VERSION + DOC] starting the versionning from 0. Adding API modules to the doc.
[gargantext.git] / src / Gargantext / Text / Metrics / Examples.hs
1 {-|
2 Module : Gargantext.Text.Metrics.Examples
3 Description : Minimal Examples to test behavior of the functions.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 This file is intended for these purposes:
11
12 - documentation for teaching and research
13 - behavioral tests (that should be completed with uni-tests and scale-tests
14
15 -}
16
17 {-# LANGUAGE BangPatterns #-}
18 {-# LANGUAGE NoImplicitPrelude #-}
19 {-# LANGUAGE OverloadedStrings #-}
20
21 module Gargantext.Text.Metrics.Examples
22 where
23
24 import Data.Ord (Down(..))
25 import qualified Data.List as L
26
27 import Data.Map (Map)
28 import qualified Data.Map as M
29
30 import Data.Text (Text)
31 import qualified Data.Text as T
32
33 import Data.Tuple.Extra (both)
34 import Data.Array.Accelerate (toList, Matrix)
35
36 import Gargantext.Prelude
37 import Gargantext.Text.Metrics.Count (occurrences, cooc)
38 import Gargantext.Text.Terms (TermType(MonoMulti), terms)
39 import Gargantext.Core (Lang(EN))
40 import Gargantext.Core.Types (Terms(..), Label)
41 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
42 import Gargantext.Text.Metrics.Count (Grouped)
43 import Gargantext.Viz.Graph.Distances.Matrice
44 import Gargantext.Viz.Graph.Index
45
46 import qualified Data.Array.Accelerate as DAA
47
48
49 -- | From list to simple text
50 --
51 -- >>> metrics_text
52 -- "There is a table with a glass of wine and a spoon. I can see the glass on the table. There was only a spoon on that table. The glass just fall from the table, pouring wine everywhere. I wish the glass did not contain wine."
53 metrics_text :: Text
54 metrics_text = T.intercalate " " metrics_sentences
55
56
57 -- | Sentences
58 --
59 -- >>> metrics_sentences
60 -- ["There is a table with a glass of wine and a spoon.","I can see the glass on the table.","There was only a spoon on that table.","The glass just fall from the table, pouring wine everywhere.","I wish the glass did not contain wine."]
61 metrics_sentences :: [Text]
62 metrics_sentences = [ "There is a table with a glass of wine and a spoon."
63 , "I can see the glass on the table."
64 , "There was only a spoon on that table."
65 , "The glass just fall from the table, pouring wine everywhere."
66 , "I wish the glass did not contain wine."
67 ]
68
69 metrics_sentences_Test :: Bool
70 metrics_sentences_Test = metrics_sentences == splitBy (Sentences 0) metrics_text
71
72 -- | Terms reordered to visually check occurrences
73 -- Split text by sentence and then extract ngrams.
74 --
75 -- >>> metrics_terms
76 -- [[["table"],["glass"],["wine"],["spoon"]],[["glass"],["table"]],[["spoon"],["table"]],[["glass"],["table"],["wine"]],[["glass"],["wine"]]]
77 metrics_terms :: IO [[Terms]]
78 metrics_terms = mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) metrics_text
79
80 -- | Test the Occurrences
81 --
82 -- >>> metrics_occ
83 -- fromList [(fromList ["glass"],fromList [(["glass"],4)]),(fromList ["spoon"],fromList [(["spoon"],2)]),(fromList ["tabl"],fromList [(["table"],4)]),(fromList ["wine"],fromList [(["wine"],3)])]
84 metrics_occ :: IO (Map Grouped (Map Terms Int))
85 metrics_occ = occurrences <$> L.concat <$> metrics_terms
86
87 -- | Test the cooccurrences
88 --
89 -- >>> metrics_cooc
90 -- fromList [((["glass"],["glass"]),4),((["spoon"],["glass"]),1),((["spoon"],["spoon"]),2),((["table"],["glass"]),3),((["table"],["spoon"]),2),((["table"],["table"]),4),((["wine"],["glass"]),3),((["wine"],["spoon"]),1),((["wine"],["table"]),2),((["wine"],["wine"]),3)]
91 metrics_cooc :: IO (Map (Label, Label) Int)
92 metrics_cooc = cooc <$> metrics_terms
93
94 -- | Tests
95 metrics_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector InclusionExclusion, DAA.Vector SpecificityGenericity))
96 metrics_cooc_mat = do
97 m <- metrics_cooc
98 let (ti,_) = createIndices m
99 let mat_cooc = cooc2mat ti m
100 pure ( ti
101 , mat_cooc
102 , incExcSpeGen_proba mat_cooc
103 , incExcSpeGen mat_cooc
104 )
105
106 metrics_incExcSpeGen :: IO ([(Label, Double)], [(Label, Double)])
107 metrics_incExcSpeGen = incExcSpeGen_sorted <$> metrics_cooc
108
109 incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
110 incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
111 where
112 (ti,fi) = createIndices m
113 ordonne x = sortWith (Down . snd) $ zip (map snd $ M.toList fi) (toList x)
114
115