]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Examples.hs
Merge branch 'dev-phylo' into dev-merge
[gargantext.git] / src / Gargantext / Text / Examples.hs
1 {-|
2 Module : Gargantext.Text.Examples
3 Description : Minimal Examples to test behavior of the functions.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 This file is intended for these purposes:
11
12 - documentation for teaching and research
13 - learn basics of Haskell which is a scientific programming language
14 - behavioral tests (that should be completed with uni-tests and scale-tests)
15
16 This document defines basic of Text definitions according to Gargantext..
17
18 - What is a term ?
19 - What is a sentence ?
20 - What is a paragraph ?
21
22 -}
23
24 {-# LANGUAGE BangPatterns #-}
25 {-# LANGUAGE NoImplicitPrelude #-}
26 {-# LANGUAGE OverloadedStrings #-}
27
28 module Gargantext.Text.Examples
29 where
30
31 import Data.Ord (Down(..))
32 import qualified Data.List as L
33
34 import Data.Map (Map)
35 import qualified Data.Map as M
36
37 import Data.Text (Text)
38 import qualified Data.Text as T
39
40 import Data.Tuple.Extra (both)
41 import Data.Array.Accelerate (toList, Matrix)
42
43 import Gargantext.Prelude
44 import Gargantext.Text.Metrics.Count (occurrences, cooc)
45 import Gargantext.Text.Terms (TermType(MonoMulti), terms)
46 import Gargantext.Core (Lang(EN))
47 import Gargantext.Core.Types (Terms(..), Label)
48 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
49 import Gargantext.Text.Metrics.Count (Grouped)
50 import Gargantext.Viz.Graph.Distances.Matrice
51 import Gargantext.Viz.Graph.Index
52
53 import qualified Data.Array.Accelerate as DAA
54
55 -- | Sentences
56 -- Let be a list of Texts: ['Data.Text.Text']. Each text in this example is a sentence.
57 --
58 -- >>> ex_sentences
59 -- ["There is a table with a glass of wine and a spoon.","I can see the glass on the table.","There was only a spoon on that table.","The glass just fall from the table, pouring wine everywhere.","I wish the glass did not contain wine."]
60 ex_sentences :: [Text]
61 ex_sentences = [ "There is a table with a glass of wine and a spoon."
62 , "I can see the glass on the table."
63 , "There was only a spoon on that table."
64 , "The glass just fall from the table, pouring wine everywhere."
65 , "I wish the glass did not contain wine."
66 ]
67
68
69 -- | From list to simple text as paragraph.
70 -- Let 'Data.Text.intercalate' each sentence with a space. Result is a paragraph.
71 --
72 -- >>> T.intercalate (T.pack " ") ex_sentences
73 -- "There is a table with a glass of wine and a spoon. I can see the glass on the table. There was only a spoon on that table. The glass just fall from the table, pouring wine everywhere. I wish the glass did not contain wine."
74 ex_paragraph :: Text
75 ex_paragraph = T.intercalate " " ex_sentences
76
77 -- | Let split sentences by Contexts of text.
78 -- More about 'Gargantext.Text.Context'
79 --
80 -- >>> ex_sentences == splitBy (Sentences 0) ex_paragraph
81 -- True
82
83 -- | Terms reordered to visually check occurrences
84 -- Split text by sentence and then extract ngrams.
85 --
86 -- >>> mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
87 -- [[["table"],["glass"],["wine"],["spoon"]],[["glass"],["table"]],[["spoon"],["table"]],[["glass"],["table"],["wine"]],[["glass"],["wine"]]]
88 ex_terms :: IO [[Terms]]
89 ex_terms = mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
90
91 -- | Test the Occurrences
92 --
93 -- >>> occurrences <$> L.concat <$> ex_terms
94 -- fromList [(fromList ["glass"],fromList [(["glass"],4)]),(fromList ["spoon"],fromList [(["spoon"],2)]),(fromList ["tabl"],fromList [(["table"],4)]),(fromList ["wine"],fromList [(["wine"],3)])]
95 ex_occ :: IO (Map Grouped (Map Terms Int))
96 ex_occ = occurrences <$> L.concat <$> ex_terms
97
98 -- | Test the cooccurrences
99 -- Use the 'Gargantext.Text.Metrics.Count.cooc' function.
100 --
101 -- >>> cooc <$> ex_terms
102 -- fromList [((["glass"],["glass"]),4),((["spoon"],["glass"]),1),((["spoon"],["spoon"]),2),((["table"],["glass"]),3),((["table"],["spoon"]),2),((["table"],["table"]),4),((["wine"],["glass"]),3),((["wine"],["spoon"]),1),((["wine"],["table"]),2),((["wine"],["wine"]),3)]
103 ex_cooc :: IO (Map (Label, Label) Int)
104 ex_cooc = cooc <$> ex_terms
105
106 -- | Tests the specificity and genericity
107 --
108 -- >>> ex_cooc_mat
109 -- (fromList [(["glass"],0),(["spoon"],1),(["table"],2),(["wine"],3)],Matrix (Z :. 4 :. 4)
110 -- [ 4, 0, 0, 0,
111 -- 1, 2, 0, 0,
112 -- 3, 2, 4, 0,
113 -- 3, 1, 2, 3],Matrix (Z :. 4 :. 4)
114 -- [ 1.0, 0.25, 0.75, 0.75,
115 -- 0.0, 1.0, 1.0, 0.5,
116 -- 0.0, 0.0, 1.0, 0.5,
117 -- 0.0, 0.0, 0.0, 1.0],(Vector (Z :. 4) [0.5833333333333334,0.5833333333333334,0.75,0.5833333333333334],Vector (Z :. 4) [-0.5833333333333334,-0.4166666666666667,0.41666666666666674,0.5833333333333334]))
118 ex_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector InclusionExclusion, DAA.Vector SpecificityGenericity))
119 ex_cooc_mat = do
120 m <- ex_cooc
121 let (ti,_) = createIndices m
122 let mat_cooc = cooc2mat ti m
123 pure ( ti
124 , mat_cooc
125 , incExcSpeGen_proba mat_cooc
126 , incExcSpeGen mat_cooc
127 )
128
129 ex_incExcSpeGen :: IO ([(Label, Double)], [(Label, Double)])
130 ex_incExcSpeGen = incExcSpeGen_sorted <$> ex_cooc
131
132 incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
133 incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
134 where
135 (ti,fi) = createIndices m
136 ordonne x = sortWith (Down . snd)
137 $ zip (map snd $ M.toList fi) (toList x)
138
139