]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Examples.hs
Merge branch 'dev-textflow' into dev
[gargantext.git] / src / Gargantext / Core / Text / Examples.hs
1 {-|
2 Module : Gargantext.Core.Text.Examples
3 Description : Minimal Examples to test behavior of the functions.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 This file is intended for these purposes:
11
12 - documentation for teaching and research
13 - learn basics of Haskell which is a scientific programming language
14 - behavioral tests (that should be completed with uni-tests and scale-tests)
15
16 This document defines basic of Text definitions according to Gargantext..
17
18 - What is a term ?
19 - What is a sentence ?
20 - What is a paragraph ?
21
22 -}
23
24 {-# LANGUAGE BangPatterns #-}
25
26 module Gargantext.Core.Text.Examples
27 where
28
29 import Data.Ord (Down(..))
30 import qualified Data.List as L
31
32 import Data.Map (Map)
33 import qualified Data.Map as M
34
35 import Data.Text (Text)
36 import qualified Data.Text as T
37
38 import Data.Tuple.Extra (both)
39 import Data.Array.Accelerate (toList, Matrix)
40
41 import Gargantext.Prelude
42 import Gargantext.Core.Text.Metrics.Count (occurrences, cooc)
43 import Gargantext.Core.Text.Terms (TermType(MonoMulti), terms)
44 import Gargantext.Core (Lang(EN))
45 import Gargantext.Core.Types (Terms(..), Label)
46 import Gargantext.Core.Text.Context (splitBy, SplitContext(Sentences))
47 import Gargantext.Core.Text.Metrics.Count (Grouped)
48 import Gargantext.Core.Viz.Graph.Distances.Matrice
49 import Gargantext.Core.Viz.Graph.Index
50
51 import qualified Data.Array.Accelerate as DAA
52
53 -- | Sentences
54 -- Let be a list of Texts: ['Data.Text.Text']. Each text in this example is a sentence.
55 --
56 -- >>> ex_sentences
57 -- ["There is a table with a glass of wine and a spoon.","I can see the glass on the table.","There was only a spoon on that table.","The glass just fall from the table, pouring wine everywhere.","I wish the glass did not contain wine."]
58 ex_sentences :: [Text]
59 ex_sentences = [ "There is a table with a glass of wine and a spoon."
60 , "I can see the glass on the table."
61 , "There was only a spoon on that table."
62 , "The glass just fall from the table, pouring wine everywhere."
63 , "I wish the glass did not contain wine."
64 ]
65
66
67 -- | From list to simple text as paragraph.
68 -- Let 'Data.Text.intercalate' each sentence with a space. Result is a paragraph.
69 --
70 -- >>> T.intercalate (T.pack " ") ex_sentences
71 -- "There is a table with a glass of wine and a spoon. I can see the glass on the table. There was only a spoon on that table. The glass just fall from the table, pouring wine everywhere. I wish the glass did not contain wine."
72 ex_paragraph :: Text
73 ex_paragraph = T.intercalate " " ex_sentences
74
75 -- | Let split sentences by Contexts of text.
76 -- More about 'Gargantext.Core.Text.Context'
77 --
78 -- >>> ex_sentences == splitBy (Sentences 0) ex_paragraph
79 -- True
80
81 -- | Terms reordered to visually check occurrences
82 -- Split text by sentence and then extract ngrams.
83 --
84 -- >>> mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
85 -- [[["table"],["glass"],["wine"],["spoon"]],[["glass"],["table"]],[["spoon"],["table"]],[["glass"],["table"],["wine"]],[["glass"],["wine"]]]
86 ex_terms :: IO [[Terms]]
87 ex_terms = mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
88
89 -- | Test the Occurrences
90 --
91 -- >>> occurrences <$> L.concat <$> ex_terms
92 -- fromList [(fromList ["glass"],fromList [(["glass"],4)]),(fromList ["spoon"],fromList [(["spoon"],2)]),(fromList ["tabl"],fromList [(["table"],4)]),(fromList ["wine"],fromList [(["wine"],3)])]
93 ex_occ :: IO (Map Grouped (Map Terms Int))
94 ex_occ = occurrences <$> L.concat <$> ex_terms
95
96 -- | Test the cooccurrences
97 -- Use the 'Gargantext.Core.Text.Metrics.Count.cooc' function.
98 --
99 -- >>> cooc <$> ex_terms
100 -- fromList [((["glass"],["glass"]),4),((["spoon"],["glass"]),1),((["spoon"],["spoon"]),2),((["table"],["glass"]),3),((["table"],["spoon"]),2),((["table"],["table"]),4),((["wine"],["glass"]),3),((["wine"],["spoon"]),1),((["wine"],["table"]),2),((["wine"],["wine"]),3)]
101 ex_cooc :: IO (Map (Label, Label) Int)
102 ex_cooc = cooc <$> ex_terms
103
104 -- | Tests the specificity and genericity
105 --
106 -- >>> ex_cooc_mat
107 -- (fromList [(["glass"],0),(["spoon"],1),(["table"],2),(["wine"],3)],Matrix (Z :. 4 :. 4)
108 -- [ 4, 0, 0, 0,
109 -- 1, 2, 0, 0,
110 -- 3, 2, 4, 0,
111 -- 3, 1, 2, 3],Matrix (Z :. 4 :. 4)
112 -- [ 1.0, 0.25, 0.75, 0.75,
113 -- 0.0, 1.0, 1.0, 0.5,
114 -- 0.0, 0.0, 1.0, 0.5,
115 -- 0.0, 0.0, 0.0, 1.0],(Vector (Z :. 4) [0.5833333333333334,0.5833333333333334,0.75,0.5833333333333334],Vector (Z :. 4) [-0.5833333333333334,-0.4166666666666667,0.41666666666666674,0.5833333333333334]))
116 ex_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector InclusionExclusion, DAA.Vector SpecificityGenericity))
117 ex_cooc_mat = do
118 m <- ex_cooc
119 let (ti,_) = createIndices m
120 let mat_cooc = cooc2mat ti m
121 pure ( ti
122 , mat_cooc
123 , incExcSpeGen_proba mat_cooc
124 , incExcSpeGen mat_cooc
125 )
126
127 ex_incExcSpeGen :: IO ([(Label, Double)], [(Label, Double)])
128 ex_incExcSpeGen = incExcSpeGen_sorted <$> ex_cooc
129
130 incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
131 incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
132 where
133 (ti,fi) = createIndices m
134 ordonne x = sortWith (Down . snd)
135 $ zip (map snd $ M.toList fi) (toList x)
136
137